1*38fd1498Szrj /* Redundant Extension Elimination pass for the GNU compiler.
2*38fd1498Szrj Copyright (C) 2010-2018 Free Software Foundation, Inc.
3*38fd1498Szrj Contributed by Ilya Enkovich (ilya.enkovich@intel.com)
4*38fd1498Szrj
5*38fd1498Szrj Based on the Redundant Zero-extension elimination pass contributed by
6*38fd1498Szrj Sriraman Tallam (tmsriram@google.com) and Silvius Rus (rus@google.com).
7*38fd1498Szrj
8*38fd1498Szrj This file is part of GCC.
9*38fd1498Szrj
10*38fd1498Szrj GCC is free software; you can redistribute it and/or modify it under
11*38fd1498Szrj the terms of the GNU General Public License as published by the Free
12*38fd1498Szrj Software Foundation; either version 3, or (at your option) any later
13*38fd1498Szrj version.
14*38fd1498Szrj
15*38fd1498Szrj GCC is distributed in the hope that it will be useful, but WITHOUT ANY
16*38fd1498Szrj WARRANTY; without even the implied warranty of MERCHANTABILITY or
17*38fd1498Szrj FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18*38fd1498Szrj for more details.
19*38fd1498Szrj
20*38fd1498Szrj You should have received a copy of the GNU General Public License
21*38fd1498Szrj along with GCC; see the file COPYING3. If not see
22*38fd1498Szrj <http://www.gnu.org/licenses/>. */
23*38fd1498Szrj
24*38fd1498Szrj
25*38fd1498Szrj /* Problem Description :
26*38fd1498Szrj --------------------
27*38fd1498Szrj This pass is intended to remove redundant extension instructions.
28*38fd1498Szrj Such instructions appear for different reasons. We expect some of
29*38fd1498Szrj them due to implicit zero-extension in 64-bit registers after writing
30*38fd1498Szrj to their lower 32-bit half (e.g. for the x86-64 architecture).
31*38fd1498Szrj Another possible reason is a type cast which follows a load (for
32*38fd1498Szrj instance a register restore) and which can be combined into a single
33*38fd1498Szrj instruction, and for which earlier local passes, e.g. the combiner,
34*38fd1498Szrj weren't able to optimize.
35*38fd1498Szrj
36*38fd1498Szrj How does this pass work ?
37*38fd1498Szrj --------------------------
38*38fd1498Szrj
39*38fd1498Szrj This pass is run after register allocation. Hence, all registers that
40*38fd1498Szrj this pass deals with are hard registers. This pass first looks for an
41*38fd1498Szrj extension instruction that could possibly be redundant. Such extension
42*38fd1498Szrj instructions show up in RTL with the pattern :
43*38fd1498Szrj (set (reg:<SWI248> x) (any_extend:<SWI248> (reg:<SWI124> x))),
44*38fd1498Szrj where x can be any hard register.
45*38fd1498Szrj Now, this pass tries to eliminate this instruction by merging the
46*38fd1498Szrj extension with the definitions of register x. For instance, if
47*38fd1498Szrj one of the definitions of register x was :
48*38fd1498Szrj (set (reg:SI x) (plus:SI (reg:SI z1) (reg:SI z2))),
49*38fd1498Szrj followed by extension :
50*38fd1498Szrj (set (reg:DI x) (zero_extend:DI (reg:SI x)))
51*38fd1498Szrj then the combination converts this into :
52*38fd1498Szrj (set (reg:DI x) (zero_extend:DI (plus:SI (reg:SI z1) (reg:SI z2)))).
53*38fd1498Szrj If all the merged definitions are recognizable assembly instructions,
54*38fd1498Szrj the extension is effectively eliminated.
55*38fd1498Szrj
56*38fd1498Szrj For example, for the x86-64 architecture, implicit zero-extensions
57*38fd1498Szrj are captured with appropriate patterns in the i386.md file. Hence,
58*38fd1498Szrj these merged definition can be matched to a single assembly instruction.
59*38fd1498Szrj The original extension instruction is then deleted if all the
60*38fd1498Szrj definitions can be merged.
61*38fd1498Szrj
62*38fd1498Szrj However, there are cases where the definition instruction cannot be
63*38fd1498Szrj merged with an extension. Examples are CALL instructions. In such
64*38fd1498Szrj cases, the original extension is not redundant and this pass does
65*38fd1498Szrj not delete it.
66*38fd1498Szrj
67*38fd1498Szrj Handling conditional moves :
68*38fd1498Szrj ----------------------------
69*38fd1498Szrj
70*38fd1498Szrj Architectures like x86-64 support conditional moves whose semantics for
71*38fd1498Szrj extension differ from the other instructions. For instance, the
72*38fd1498Szrj instruction *cmov ebx, eax*
73*38fd1498Szrj zero-extends eax onto rax only when the move from ebx to eax happens.
74*38fd1498Szrj Otherwise, eax may not be zero-extended. Consider conditional moves as
75*38fd1498Szrj RTL instructions of the form
76*38fd1498Szrj (set (reg:SI x) (if_then_else (cond) (reg:SI y) (reg:SI z))).
77*38fd1498Szrj This pass tries to merge an extension with a conditional move by
78*38fd1498Szrj actually merging the definitions of y and z with an extension and then
79*38fd1498Szrj converting the conditional move into :
80*38fd1498Szrj (set (reg:DI x) (if_then_else (cond) (reg:DI y) (reg:DI z))).
81*38fd1498Szrj Since registers y and z are extended, register x will also be extended
82*38fd1498Szrj after the conditional move. Note that this step has to be done
83*38fd1498Szrj transitively since the definition of a conditional copy can be
84*38fd1498Szrj another conditional copy.
85*38fd1498Szrj
86*38fd1498Szrj Motivating Example I :
87*38fd1498Szrj ---------------------
88*38fd1498Szrj For this program :
89*38fd1498Szrj **********************************************
90*38fd1498Szrj bad_code.c
91*38fd1498Szrj
92*38fd1498Szrj int mask[1000];
93*38fd1498Szrj
94*38fd1498Szrj int foo(unsigned x)
95*38fd1498Szrj {
96*38fd1498Szrj if (x < 10)
97*38fd1498Szrj x = x * 45;
98*38fd1498Szrj else
99*38fd1498Szrj x = x * 78;
100*38fd1498Szrj return mask[x];
101*38fd1498Szrj }
102*38fd1498Szrj **********************************************
103*38fd1498Szrj
104*38fd1498Szrj $ gcc -O2 bad_code.c
105*38fd1498Szrj ........
106*38fd1498Szrj 400315: b8 4e 00 00 00 mov $0x4e,%eax
107*38fd1498Szrj 40031a: 0f af f8 imul %eax,%edi
108*38fd1498Szrj 40031d: 89 ff mov %edi,%edi - useless extension
109*38fd1498Szrj 40031f: 8b 04 bd 60 19 40 00 mov 0x401960(,%rdi,4),%eax
110*38fd1498Szrj 400326: c3 retq
111*38fd1498Szrj ......
112*38fd1498Szrj 400330: ba 2d 00 00 00 mov $0x2d,%edx
113*38fd1498Szrj 400335: 0f af fa imul %edx,%edi
114*38fd1498Szrj 400338: 89 ff mov %edi,%edi - useless extension
115*38fd1498Szrj 40033a: 8b 04 bd 60 19 40 00 mov 0x401960(,%rdi,4),%eax
116*38fd1498Szrj 400341: c3 retq
117*38fd1498Szrj
118*38fd1498Szrj $ gcc -O2 -free bad_code.c
119*38fd1498Szrj ......
120*38fd1498Szrj 400315: 6b ff 4e imul $0x4e,%edi,%edi
121*38fd1498Szrj 400318: 8b 04 bd 40 19 40 00 mov 0x401940(,%rdi,4),%eax
122*38fd1498Szrj 40031f: c3 retq
123*38fd1498Szrj 400320: 6b ff 2d imul $0x2d,%edi,%edi
124*38fd1498Szrj 400323: 8b 04 bd 40 19 40 00 mov 0x401940(,%rdi,4),%eax
125*38fd1498Szrj 40032a: c3 retq
126*38fd1498Szrj
127*38fd1498Szrj Motivating Example II :
128*38fd1498Szrj ---------------------
129*38fd1498Szrj
130*38fd1498Szrj Here is an example with a conditional move.
131*38fd1498Szrj
132*38fd1498Szrj For this program :
133*38fd1498Szrj **********************************************
134*38fd1498Szrj
135*38fd1498Szrj unsigned long long foo(unsigned x , unsigned y)
136*38fd1498Szrj {
137*38fd1498Szrj unsigned z;
138*38fd1498Szrj if (x > 100)
139*38fd1498Szrj z = x + y;
140*38fd1498Szrj else
141*38fd1498Szrj z = x - y;
142*38fd1498Szrj return (unsigned long long)(z);
143*38fd1498Szrj }
144*38fd1498Szrj
145*38fd1498Szrj $ gcc -O2 bad_code.c
146*38fd1498Szrj ............
147*38fd1498Szrj 400360: 8d 14 3e lea (%rsi,%rdi,1),%edx
148*38fd1498Szrj 400363: 89 f8 mov %edi,%eax
149*38fd1498Szrj 400365: 29 f0 sub %esi,%eax
150*38fd1498Szrj 400367: 83 ff 65 cmp $0x65,%edi
151*38fd1498Szrj 40036a: 0f 43 c2 cmovae %edx,%eax
152*38fd1498Szrj 40036d: 89 c0 mov %eax,%eax - useless extension
153*38fd1498Szrj 40036f: c3 retq
154*38fd1498Szrj
155*38fd1498Szrj $ gcc -O2 -free bad_code.c
156*38fd1498Szrj .............
157*38fd1498Szrj 400360: 89 fa mov %edi,%edx
158*38fd1498Szrj 400362: 8d 04 3e lea (%rsi,%rdi,1),%eax
159*38fd1498Szrj 400365: 29 f2 sub %esi,%edx
160*38fd1498Szrj 400367: 83 ff 65 cmp $0x65,%edi
161*38fd1498Szrj 40036a: 89 d6 mov %edx,%esi
162*38fd1498Szrj 40036c: 48 0f 42 c6 cmovb %rsi,%rax
163*38fd1498Szrj 400370: c3 retq
164*38fd1498Szrj
165*38fd1498Szrj Motivating Example III :
166*38fd1498Szrj ---------------------
167*38fd1498Szrj
168*38fd1498Szrj Here is an example with a type cast.
169*38fd1498Szrj
170*38fd1498Szrj For this program :
171*38fd1498Szrj **********************************************
172*38fd1498Szrj
173*38fd1498Szrj void test(int size, unsigned char *in, unsigned char *out)
174*38fd1498Szrj {
175*38fd1498Szrj int i;
176*38fd1498Szrj unsigned char xr, xg, xy=0;
177*38fd1498Szrj
178*38fd1498Szrj for (i = 0; i < size; i++) {
179*38fd1498Szrj xr = *in++;
180*38fd1498Szrj xg = *in++;
181*38fd1498Szrj xy = (unsigned char) ((19595*xr + 38470*xg) >> 16);
182*38fd1498Szrj *out++ = xy;
183*38fd1498Szrj }
184*38fd1498Szrj }
185*38fd1498Szrj
186*38fd1498Szrj $ gcc -O2 bad_code.c
187*38fd1498Szrj ............
188*38fd1498Szrj 10: 0f b6 0e movzbl (%rsi),%ecx
189*38fd1498Szrj 13: 0f b6 46 01 movzbl 0x1(%rsi),%eax
190*38fd1498Szrj 17: 48 83 c6 02 add $0x2,%rsi
191*38fd1498Szrj 1b: 0f b6 c9 movzbl %cl,%ecx - useless extension
192*38fd1498Szrj 1e: 0f b6 c0 movzbl %al,%eax - useless extension
193*38fd1498Szrj 21: 69 c9 8b 4c 00 00 imul $0x4c8b,%ecx,%ecx
194*38fd1498Szrj 27: 69 c0 46 96 00 00 imul $0x9646,%eax,%eax
195*38fd1498Szrj
196*38fd1498Szrj $ gcc -O2 -free bad_code.c
197*38fd1498Szrj .............
198*38fd1498Szrj 10: 0f b6 0e movzbl (%rsi),%ecx
199*38fd1498Szrj 13: 0f b6 46 01 movzbl 0x1(%rsi),%eax
200*38fd1498Szrj 17: 48 83 c6 02 add $0x2,%rsi
201*38fd1498Szrj 1b: 69 c9 8b 4c 00 00 imul $0x4c8b,%ecx,%ecx
202*38fd1498Szrj 21: 69 c0 46 96 00 00 imul $0x9646,%eax,%eax
203*38fd1498Szrj
204*38fd1498Szrj Usefulness :
205*38fd1498Szrj ----------
206*38fd1498Szrj
207*38fd1498Szrj The original redundant zero-extension elimination pass reported reduction
208*38fd1498Szrj of the dynamic instruction count of a compression benchmark by 2.8% and
209*38fd1498Szrj improvement of its run time by about 1%.
210*38fd1498Szrj
211*38fd1498Szrj The additional performance gain with the enhanced pass is mostly expected
212*38fd1498Szrj on in-order architectures where redundancy cannot be compensated by out of
213*38fd1498Szrj order execution. Measurements showed up to 10% performance gain (reduced
214*38fd1498Szrj run time) on EEMBC 2.0 benchmarks on Atom processor with geomean performance
215*38fd1498Szrj gain 1%. */
216*38fd1498Szrj
217*38fd1498Szrj
218*38fd1498Szrj #include "config.h"
219*38fd1498Szrj #include "system.h"
220*38fd1498Szrj #include "coretypes.h"
221*38fd1498Szrj #include "backend.h"
222*38fd1498Szrj #include "target.h"
223*38fd1498Szrj #include "rtl.h"
224*38fd1498Szrj #include "tree.h"
225*38fd1498Szrj #include "df.h"
226*38fd1498Szrj #include "memmodel.h"
227*38fd1498Szrj #include "tm_p.h"
228*38fd1498Szrj #include "optabs.h"
229*38fd1498Szrj #include "regs.h"
230*38fd1498Szrj #include "emit-rtl.h"
231*38fd1498Szrj #include "recog.h"
232*38fd1498Szrj #include "cfgrtl.h"
233*38fd1498Szrj #include "expr.h"
234*38fd1498Szrj #include "tree-pass.h"
235*38fd1498Szrj
236*38fd1498Szrj /* This structure represents a candidate for elimination. */
237*38fd1498Szrj
238*38fd1498Szrj struct ext_cand
239*38fd1498Szrj {
240*38fd1498Szrj /* The expression. */
241*38fd1498Szrj const_rtx expr;
242*38fd1498Szrj
243*38fd1498Szrj /* The kind of extension. */
244*38fd1498Szrj enum rtx_code code;
245*38fd1498Szrj
246*38fd1498Szrj /* The destination mode. */
247*38fd1498Szrj machine_mode mode;
248*38fd1498Szrj
249*38fd1498Szrj /* The instruction where it lives. */
250*38fd1498Szrj rtx_insn *insn;
251*38fd1498Szrj };
252*38fd1498Szrj
253*38fd1498Szrj
254*38fd1498Szrj static int max_insn_uid;
255*38fd1498Szrj
256*38fd1498Szrj /* Update or remove REG_EQUAL or REG_EQUIV notes for INSN. */
257*38fd1498Szrj
258*38fd1498Szrj static bool
update_reg_equal_equiv_notes(rtx_insn * insn,machine_mode new_mode,machine_mode old_mode,enum rtx_code code)259*38fd1498Szrj update_reg_equal_equiv_notes (rtx_insn *insn, machine_mode new_mode,
260*38fd1498Szrj machine_mode old_mode, enum rtx_code code)
261*38fd1498Szrj {
262*38fd1498Szrj rtx *loc = ®_NOTES (insn);
263*38fd1498Szrj while (*loc)
264*38fd1498Szrj {
265*38fd1498Szrj enum reg_note kind = REG_NOTE_KIND (*loc);
266*38fd1498Szrj if (kind == REG_EQUAL || kind == REG_EQUIV)
267*38fd1498Szrj {
268*38fd1498Szrj rtx orig_src = XEXP (*loc, 0);
269*38fd1498Szrj /* Update equivalency constants. Recall that RTL constants are
270*38fd1498Szrj sign-extended. */
271*38fd1498Szrj if (GET_CODE (orig_src) == CONST_INT
272*38fd1498Szrj && HWI_COMPUTABLE_MODE_P (new_mode))
273*38fd1498Szrj {
274*38fd1498Szrj if (INTVAL (orig_src) >= 0 || code == SIGN_EXTEND)
275*38fd1498Szrj /* Nothing needed. */;
276*38fd1498Szrj else
277*38fd1498Szrj {
278*38fd1498Szrj /* Zero-extend the negative constant by masking out the
279*38fd1498Szrj bits outside the source mode. */
280*38fd1498Szrj rtx new_const_int
281*38fd1498Szrj = gen_int_mode (INTVAL (orig_src)
282*38fd1498Szrj & GET_MODE_MASK (old_mode),
283*38fd1498Szrj new_mode);
284*38fd1498Szrj if (!validate_change (insn, &XEXP (*loc, 0),
285*38fd1498Szrj new_const_int, true))
286*38fd1498Szrj return false;
287*38fd1498Szrj }
288*38fd1498Szrj loc = &XEXP (*loc, 1);
289*38fd1498Szrj }
290*38fd1498Szrj /* Drop all other notes, they assume a wrong mode. */
291*38fd1498Szrj else if (!validate_change (insn, loc, XEXP (*loc, 1), true))
292*38fd1498Szrj return false;
293*38fd1498Szrj }
294*38fd1498Szrj else
295*38fd1498Szrj loc = &XEXP (*loc, 1);
296*38fd1498Szrj }
297*38fd1498Szrj return true;
298*38fd1498Szrj }
299*38fd1498Szrj
300*38fd1498Szrj /* Given a insn (CURR_INSN), an extension candidate for removal (CAND)
301*38fd1498Szrj and a pointer to the SET rtx (ORIG_SET) that needs to be modified,
302*38fd1498Szrj this code modifies the SET rtx to a new SET rtx that extends the
303*38fd1498Szrj right hand expression into a register on the left hand side. Note
304*38fd1498Szrj that multiple assumptions are made about the nature of the set that
305*38fd1498Szrj needs to be true for this to work and is called from merge_def_and_ext.
306*38fd1498Szrj
307*38fd1498Szrj Original :
308*38fd1498Szrj (set (reg a) (expression))
309*38fd1498Szrj
310*38fd1498Szrj Transform :
311*38fd1498Szrj (set (reg a) (any_extend (expression)))
312*38fd1498Szrj
313*38fd1498Szrj Special Cases :
314*38fd1498Szrj If the expression is a constant or another extension, then directly
315*38fd1498Szrj assign it to the register. */
316*38fd1498Szrj
317*38fd1498Szrj static bool
combine_set_extension(ext_cand * cand,rtx_insn * curr_insn,rtx * orig_set)318*38fd1498Szrj combine_set_extension (ext_cand *cand, rtx_insn *curr_insn, rtx *orig_set)
319*38fd1498Szrj {
320*38fd1498Szrj rtx orig_src = SET_SRC (*orig_set);
321*38fd1498Szrj machine_mode orig_mode = GET_MODE (SET_DEST (*orig_set));
322*38fd1498Szrj rtx new_set;
323*38fd1498Szrj rtx cand_pat = PATTERN (cand->insn);
324*38fd1498Szrj
325*38fd1498Szrj /* If the extension's source/destination registers are not the same
326*38fd1498Szrj then we need to change the original load to reference the destination
327*38fd1498Szrj of the extension. Then we need to emit a copy from that destination
328*38fd1498Szrj to the original destination of the load. */
329*38fd1498Szrj rtx new_reg;
330*38fd1498Szrj bool copy_needed
331*38fd1498Szrj = (REGNO (SET_DEST (cand_pat)) != REGNO (XEXP (SET_SRC (cand_pat), 0)));
332*38fd1498Szrj if (copy_needed)
333*38fd1498Szrj new_reg = gen_rtx_REG (cand->mode, REGNO (SET_DEST (cand_pat)));
334*38fd1498Szrj else
335*38fd1498Szrj new_reg = gen_rtx_REG (cand->mode, REGNO (SET_DEST (*orig_set)));
336*38fd1498Szrj
337*38fd1498Szrj /* Merge constants by directly moving the constant into the register under
338*38fd1498Szrj some conditions. Recall that RTL constants are sign-extended. */
339*38fd1498Szrj if (GET_CODE (orig_src) == CONST_INT
340*38fd1498Szrj && HWI_COMPUTABLE_MODE_P (cand->mode))
341*38fd1498Szrj {
342*38fd1498Szrj if (INTVAL (orig_src) >= 0 || cand->code == SIGN_EXTEND)
343*38fd1498Szrj new_set = gen_rtx_SET (new_reg, orig_src);
344*38fd1498Szrj else
345*38fd1498Szrj {
346*38fd1498Szrj /* Zero-extend the negative constant by masking out the bits outside
347*38fd1498Szrj the source mode. */
348*38fd1498Szrj rtx new_const_int
349*38fd1498Szrj = gen_int_mode (INTVAL (orig_src) & GET_MODE_MASK (orig_mode),
350*38fd1498Szrj GET_MODE (new_reg));
351*38fd1498Szrj new_set = gen_rtx_SET (new_reg, new_const_int);
352*38fd1498Szrj }
353*38fd1498Szrj }
354*38fd1498Szrj else if (GET_MODE (orig_src) == VOIDmode)
355*38fd1498Szrj {
356*38fd1498Szrj /* This is mostly due to a call insn that should not be optimized. */
357*38fd1498Szrj return false;
358*38fd1498Szrj }
359*38fd1498Szrj else if (GET_CODE (orig_src) == cand->code)
360*38fd1498Szrj {
361*38fd1498Szrj /* Here is a sequence of two extensions. Try to merge them. */
362*38fd1498Szrj rtx temp_extension
363*38fd1498Szrj = gen_rtx_fmt_e (cand->code, cand->mode, XEXP (orig_src, 0));
364*38fd1498Szrj rtx simplified_temp_extension = simplify_rtx (temp_extension);
365*38fd1498Szrj if (simplified_temp_extension)
366*38fd1498Szrj temp_extension = simplified_temp_extension;
367*38fd1498Szrj new_set = gen_rtx_SET (new_reg, temp_extension);
368*38fd1498Szrj }
369*38fd1498Szrj else if (GET_CODE (orig_src) == IF_THEN_ELSE)
370*38fd1498Szrj {
371*38fd1498Szrj /* Only IF_THEN_ELSE of phi-type copies are combined. Otherwise,
372*38fd1498Szrj in general, IF_THEN_ELSE should not be combined. */
373*38fd1498Szrj return false;
374*38fd1498Szrj }
375*38fd1498Szrj else
376*38fd1498Szrj {
377*38fd1498Szrj /* This is the normal case. */
378*38fd1498Szrj rtx temp_extension
379*38fd1498Szrj = gen_rtx_fmt_e (cand->code, cand->mode, orig_src);
380*38fd1498Szrj rtx simplified_temp_extension = simplify_rtx (temp_extension);
381*38fd1498Szrj if (simplified_temp_extension)
382*38fd1498Szrj temp_extension = simplified_temp_extension;
383*38fd1498Szrj new_set = gen_rtx_SET (new_reg, temp_extension);
384*38fd1498Szrj }
385*38fd1498Szrj
386*38fd1498Szrj /* This change is a part of a group of changes. Hence,
387*38fd1498Szrj validate_change will not try to commit the change. */
388*38fd1498Szrj if (validate_change (curr_insn, orig_set, new_set, true)
389*38fd1498Szrj && update_reg_equal_equiv_notes (curr_insn, cand->mode, orig_mode,
390*38fd1498Szrj cand->code))
391*38fd1498Szrj {
392*38fd1498Szrj if (dump_file)
393*38fd1498Szrj {
394*38fd1498Szrj fprintf (dump_file,
395*38fd1498Szrj "Tentatively merged extension with definition %s:\n",
396*38fd1498Szrj (copy_needed) ? "(copy needed)" : "");
397*38fd1498Szrj print_rtl_single (dump_file, curr_insn);
398*38fd1498Szrj }
399*38fd1498Szrj return true;
400*38fd1498Szrj }
401*38fd1498Szrj
402*38fd1498Szrj return false;
403*38fd1498Szrj }
404*38fd1498Szrj
405*38fd1498Szrj /* Treat if_then_else insns, where the operands of both branches
406*38fd1498Szrj are registers, as copies. For instance,
407*38fd1498Szrj Original :
408*38fd1498Szrj (set (reg:SI a) (if_then_else (cond) (reg:SI b) (reg:SI c)))
409*38fd1498Szrj Transformed :
410*38fd1498Szrj (set (reg:DI a) (if_then_else (cond) (reg:DI b) (reg:DI c)))
411*38fd1498Szrj DEF_INSN is the if_then_else insn. */
412*38fd1498Szrj
413*38fd1498Szrj static bool
transform_ifelse(ext_cand * cand,rtx_insn * def_insn)414*38fd1498Szrj transform_ifelse (ext_cand *cand, rtx_insn *def_insn)
415*38fd1498Szrj {
416*38fd1498Szrj rtx set_insn = PATTERN (def_insn);
417*38fd1498Szrj rtx srcreg, dstreg, srcreg2;
418*38fd1498Szrj rtx map_srcreg, map_dstreg, map_srcreg2;
419*38fd1498Szrj rtx ifexpr;
420*38fd1498Szrj rtx cond;
421*38fd1498Szrj rtx new_set;
422*38fd1498Szrj
423*38fd1498Szrj gcc_assert (GET_CODE (set_insn) == SET);
424*38fd1498Szrj
425*38fd1498Szrj cond = XEXP (SET_SRC (set_insn), 0);
426*38fd1498Szrj dstreg = SET_DEST (set_insn);
427*38fd1498Szrj srcreg = XEXP (SET_SRC (set_insn), 1);
428*38fd1498Szrj srcreg2 = XEXP (SET_SRC (set_insn), 2);
429*38fd1498Szrj /* If the conditional move already has the right or wider mode,
430*38fd1498Szrj there is nothing to do. */
431*38fd1498Szrj if (GET_MODE_UNIT_SIZE (GET_MODE (dstreg))
432*38fd1498Szrj >= GET_MODE_UNIT_SIZE (cand->mode))
433*38fd1498Szrj return true;
434*38fd1498Szrj
435*38fd1498Szrj map_srcreg = gen_rtx_REG (cand->mode, REGNO (srcreg));
436*38fd1498Szrj map_srcreg2 = gen_rtx_REG (cand->mode, REGNO (srcreg2));
437*38fd1498Szrj map_dstreg = gen_rtx_REG (cand->mode, REGNO (dstreg));
438*38fd1498Szrj ifexpr = gen_rtx_IF_THEN_ELSE (cand->mode, cond, map_srcreg, map_srcreg2);
439*38fd1498Szrj new_set = gen_rtx_SET (map_dstreg, ifexpr);
440*38fd1498Szrj
441*38fd1498Szrj if (validate_change (def_insn, &PATTERN (def_insn), new_set, true)
442*38fd1498Szrj && update_reg_equal_equiv_notes (def_insn, cand->mode, GET_MODE (dstreg),
443*38fd1498Szrj cand->code))
444*38fd1498Szrj {
445*38fd1498Szrj if (dump_file)
446*38fd1498Szrj {
447*38fd1498Szrj fprintf (dump_file,
448*38fd1498Szrj "Mode of conditional move instruction extended:\n");
449*38fd1498Szrj print_rtl_single (dump_file, def_insn);
450*38fd1498Szrj }
451*38fd1498Szrj return true;
452*38fd1498Szrj }
453*38fd1498Szrj
454*38fd1498Szrj return false;
455*38fd1498Szrj }
456*38fd1498Szrj
457*38fd1498Szrj /* Get all the reaching definitions of an instruction. The definitions are
458*38fd1498Szrj desired for REG used in INSN. Return the definition list or NULL if a
459*38fd1498Szrj definition is missing. If DEST is non-NULL, additionally push the INSN
460*38fd1498Szrj of the definitions onto DEST. */
461*38fd1498Szrj
462*38fd1498Szrj static struct df_link *
get_defs(rtx_insn * insn,rtx reg,vec<rtx_insn * > * dest)463*38fd1498Szrj get_defs (rtx_insn *insn, rtx reg, vec<rtx_insn *> *dest)
464*38fd1498Szrj {
465*38fd1498Szrj df_ref use;
466*38fd1498Szrj struct df_link *ref_chain, *ref_link;
467*38fd1498Szrj
468*38fd1498Szrj FOR_EACH_INSN_USE (use, insn)
469*38fd1498Szrj {
470*38fd1498Szrj if (GET_CODE (DF_REF_REG (use)) == SUBREG)
471*38fd1498Szrj return NULL;
472*38fd1498Szrj if (REGNO (DF_REF_REG (use)) == REGNO (reg))
473*38fd1498Szrj break;
474*38fd1498Szrj }
475*38fd1498Szrj
476*38fd1498Szrj gcc_assert (use != NULL);
477*38fd1498Szrj
478*38fd1498Szrj ref_chain = DF_REF_CHAIN (use);
479*38fd1498Szrj
480*38fd1498Szrj for (ref_link = ref_chain; ref_link; ref_link = ref_link->next)
481*38fd1498Szrj {
482*38fd1498Szrj /* Problem getting some definition for this instruction. */
483*38fd1498Szrj if (ref_link->ref == NULL)
484*38fd1498Szrj return NULL;
485*38fd1498Szrj if (DF_REF_INSN_INFO (ref_link->ref) == NULL)
486*38fd1498Szrj return NULL;
487*38fd1498Szrj /* As global regs are assumed to be defined at each function call
488*38fd1498Szrj dataflow can report a call_insn as being a definition of REG.
489*38fd1498Szrj But we can't do anything with that in this pass so proceed only
490*38fd1498Szrj if the instruction really sets REG in a way that can be deduced
491*38fd1498Szrj from the RTL structure. */
492*38fd1498Szrj if (global_regs[REGNO (reg)]
493*38fd1498Szrj && !set_of (reg, DF_REF_INSN (ref_link->ref)))
494*38fd1498Szrj return NULL;
495*38fd1498Szrj }
496*38fd1498Szrj
497*38fd1498Szrj if (dest)
498*38fd1498Szrj for (ref_link = ref_chain; ref_link; ref_link = ref_link->next)
499*38fd1498Szrj dest->safe_push (DF_REF_INSN (ref_link->ref));
500*38fd1498Szrj
501*38fd1498Szrj return ref_chain;
502*38fd1498Szrj }
503*38fd1498Szrj
504*38fd1498Szrj /* Get all the reaching uses of an instruction. The uses are desired for REG
505*38fd1498Szrj set in INSN. Return use list or NULL if a use is missing or irregular. */
506*38fd1498Szrj
507*38fd1498Szrj static struct df_link *
get_uses(rtx_insn * insn,rtx reg)508*38fd1498Szrj get_uses (rtx_insn *insn, rtx reg)
509*38fd1498Szrj {
510*38fd1498Szrj df_ref def;
511*38fd1498Szrj struct df_link *ref_chain, *ref_link;
512*38fd1498Szrj
513*38fd1498Szrj FOR_EACH_INSN_DEF (def, insn)
514*38fd1498Szrj if (REGNO (DF_REF_REG (def)) == REGNO (reg))
515*38fd1498Szrj break;
516*38fd1498Szrj
517*38fd1498Szrj gcc_assert (def != NULL);
518*38fd1498Szrj
519*38fd1498Szrj ref_chain = DF_REF_CHAIN (def);
520*38fd1498Szrj
521*38fd1498Szrj for (ref_link = ref_chain; ref_link; ref_link = ref_link->next)
522*38fd1498Szrj {
523*38fd1498Szrj /* Problem getting some use for this instruction. */
524*38fd1498Szrj if (ref_link->ref == NULL)
525*38fd1498Szrj return NULL;
526*38fd1498Szrj if (DF_REF_CLASS (ref_link->ref) != DF_REF_REGULAR)
527*38fd1498Szrj return NULL;
528*38fd1498Szrj }
529*38fd1498Szrj
530*38fd1498Szrj return ref_chain;
531*38fd1498Szrj }
532*38fd1498Szrj
533*38fd1498Szrj /* Return true if INSN is
534*38fd1498Szrj (SET (reg REGNO (def_reg)) (if_then_else (cond) (REG x1) (REG x2)))
535*38fd1498Szrj and store x1 and x2 in REG_1 and REG_2. */
536*38fd1498Szrj
537*38fd1498Szrj static bool
is_cond_copy_insn(rtx_insn * insn,rtx * reg1,rtx * reg2)538*38fd1498Szrj is_cond_copy_insn (rtx_insn *insn, rtx *reg1, rtx *reg2)
539*38fd1498Szrj {
540*38fd1498Szrj rtx expr = single_set (insn);
541*38fd1498Szrj
542*38fd1498Szrj if (expr != NULL_RTX
543*38fd1498Szrj && GET_CODE (expr) == SET
544*38fd1498Szrj && GET_CODE (SET_DEST (expr)) == REG
545*38fd1498Szrj && GET_CODE (SET_SRC (expr)) == IF_THEN_ELSE
546*38fd1498Szrj && GET_CODE (XEXP (SET_SRC (expr), 1)) == REG
547*38fd1498Szrj && GET_CODE (XEXP (SET_SRC (expr), 2)) == REG)
548*38fd1498Szrj {
549*38fd1498Szrj *reg1 = XEXP (SET_SRC (expr), 1);
550*38fd1498Szrj *reg2 = XEXP (SET_SRC (expr), 2);
551*38fd1498Szrj return true;
552*38fd1498Szrj }
553*38fd1498Szrj
554*38fd1498Szrj return false;
555*38fd1498Szrj }
556*38fd1498Szrj
557*38fd1498Szrj enum ext_modified_kind
558*38fd1498Szrj {
559*38fd1498Szrj /* The insn hasn't been modified by ree pass yet. */
560*38fd1498Szrj EXT_MODIFIED_NONE,
561*38fd1498Szrj /* Changed into zero extension. */
562*38fd1498Szrj EXT_MODIFIED_ZEXT,
563*38fd1498Szrj /* Changed into sign extension. */
564*38fd1498Szrj EXT_MODIFIED_SEXT
565*38fd1498Szrj };
566*38fd1498Szrj
567*38fd1498Szrj struct ATTRIBUTE_PACKED ext_modified
568*38fd1498Szrj {
569*38fd1498Szrj /* Mode from which ree has zero or sign extended the destination. */
570*38fd1498Szrj ENUM_BITFIELD(machine_mode) mode : 8;
571*38fd1498Szrj
572*38fd1498Szrj /* Kind of modification of the insn. */
573*38fd1498Szrj ENUM_BITFIELD(ext_modified_kind) kind : 2;
574*38fd1498Szrj
575*38fd1498Szrj unsigned int do_not_reextend : 1;
576*38fd1498Szrj
577*38fd1498Szrj /* True if the insn is scheduled to be deleted. */
578*38fd1498Szrj unsigned int deleted : 1;
579*38fd1498Szrj };
580*38fd1498Szrj
581*38fd1498Szrj /* Vectors used by combine_reaching_defs and its helpers. */
582*38fd1498Szrj struct ext_state
583*38fd1498Szrj {
584*38fd1498Szrj /* In order to avoid constant alloc/free, we keep these
585*38fd1498Szrj 4 vectors live through the entire find_and_remove_re and just
586*38fd1498Szrj truncate them each time. */
587*38fd1498Szrj auto_vec<rtx_insn *> defs_list;
588*38fd1498Szrj auto_vec<rtx_insn *> copies_list;
589*38fd1498Szrj auto_vec<rtx_insn *> modified_list;
590*38fd1498Szrj auto_vec<rtx_insn *> work_list;
591*38fd1498Szrj
592*38fd1498Szrj /* For instructions that have been successfully modified, this is
593*38fd1498Szrj the original mode from which the insn is extending and
594*38fd1498Szrj kind of extension. */
595*38fd1498Szrj struct ext_modified *modified;
596*38fd1498Szrj };
597*38fd1498Szrj
598*38fd1498Szrj /* Reaching Definitions of the extended register could be conditional copies
599*38fd1498Szrj or regular definitions. This function separates the two types into two
600*38fd1498Szrj lists, STATE->DEFS_LIST and STATE->COPIES_LIST. This is necessary because,
601*38fd1498Szrj if a reaching definition is a conditional copy, merging the extension with
602*38fd1498Szrj this definition is wrong. Conditional copies are merged by transitively
603*38fd1498Szrj merging their definitions. The defs_list is populated with all the reaching
604*38fd1498Szrj definitions of the extension instruction (EXTEND_INSN) which must be merged
605*38fd1498Szrj with an extension. The copies_list contains all the conditional moves that
606*38fd1498Szrj will later be extended into a wider mode conditional move if all the merges
607*38fd1498Szrj are successful. The function returns false upon failure, true upon
608*38fd1498Szrj success. */
609*38fd1498Szrj
610*38fd1498Szrj static bool
make_defs_and_copies_lists(rtx_insn * extend_insn,const_rtx set_pat,ext_state * state)611*38fd1498Szrj make_defs_and_copies_lists (rtx_insn *extend_insn, const_rtx set_pat,
612*38fd1498Szrj ext_state *state)
613*38fd1498Szrj {
614*38fd1498Szrj rtx src_reg = XEXP (SET_SRC (set_pat), 0);
615*38fd1498Szrj bool *is_insn_visited;
616*38fd1498Szrj bool ret = true;
617*38fd1498Szrj
618*38fd1498Szrj state->work_list.truncate (0);
619*38fd1498Szrj
620*38fd1498Szrj /* Initialize the work list. */
621*38fd1498Szrj if (!get_defs (extend_insn, src_reg, &state->work_list))
622*38fd1498Szrj return false;
623*38fd1498Szrj
624*38fd1498Szrj is_insn_visited = XCNEWVEC (bool, max_insn_uid);
625*38fd1498Szrj
626*38fd1498Szrj /* Perform transitive closure for conditional copies. */
627*38fd1498Szrj while (!state->work_list.is_empty ())
628*38fd1498Szrj {
629*38fd1498Szrj rtx_insn *def_insn = state->work_list.pop ();
630*38fd1498Szrj rtx reg1, reg2;
631*38fd1498Szrj
632*38fd1498Szrj gcc_assert (INSN_UID (def_insn) < max_insn_uid);
633*38fd1498Szrj
634*38fd1498Szrj if (is_insn_visited[INSN_UID (def_insn)])
635*38fd1498Szrj continue;
636*38fd1498Szrj is_insn_visited[INSN_UID (def_insn)] = true;
637*38fd1498Szrj
638*38fd1498Szrj if (is_cond_copy_insn (def_insn, ®1, ®2))
639*38fd1498Szrj {
640*38fd1498Szrj /* Push it onto the copy list first. */
641*38fd1498Szrj state->copies_list.safe_push (def_insn);
642*38fd1498Szrj
643*38fd1498Szrj /* Now perform the transitive closure. */
644*38fd1498Szrj if (!get_defs (def_insn, reg1, &state->work_list)
645*38fd1498Szrj || !get_defs (def_insn, reg2, &state->work_list))
646*38fd1498Szrj {
647*38fd1498Szrj ret = false;
648*38fd1498Szrj break;
649*38fd1498Szrj }
650*38fd1498Szrj }
651*38fd1498Szrj else
652*38fd1498Szrj state->defs_list.safe_push (def_insn);
653*38fd1498Szrj }
654*38fd1498Szrj
655*38fd1498Szrj XDELETEVEC (is_insn_visited);
656*38fd1498Szrj
657*38fd1498Szrj return ret;
658*38fd1498Szrj }
659*38fd1498Szrj
660*38fd1498Szrj /* If DEF_INSN has single SET expression, possibly buried inside
661*38fd1498Szrj a PARALLEL, return the address of the SET expression, else
662*38fd1498Szrj return NULL. This is similar to single_set, except that
663*38fd1498Szrj single_set allows multiple SETs when all but one is dead. */
664*38fd1498Szrj static rtx *
get_sub_rtx(rtx_insn * def_insn)665*38fd1498Szrj get_sub_rtx (rtx_insn *def_insn)
666*38fd1498Szrj {
667*38fd1498Szrj enum rtx_code code = GET_CODE (PATTERN (def_insn));
668*38fd1498Szrj rtx *sub_rtx = NULL;
669*38fd1498Szrj
670*38fd1498Szrj if (code == PARALLEL)
671*38fd1498Szrj {
672*38fd1498Szrj for (int i = 0; i < XVECLEN (PATTERN (def_insn), 0); i++)
673*38fd1498Szrj {
674*38fd1498Szrj rtx s_expr = XVECEXP (PATTERN (def_insn), 0, i);
675*38fd1498Szrj if (GET_CODE (s_expr) != SET)
676*38fd1498Szrj continue;
677*38fd1498Szrj
678*38fd1498Szrj if (sub_rtx == NULL)
679*38fd1498Szrj sub_rtx = &XVECEXP (PATTERN (def_insn), 0, i);
680*38fd1498Szrj else
681*38fd1498Szrj {
682*38fd1498Szrj /* PARALLEL with multiple SETs. */
683*38fd1498Szrj return NULL;
684*38fd1498Szrj }
685*38fd1498Szrj }
686*38fd1498Szrj }
687*38fd1498Szrj else if (code == SET)
688*38fd1498Szrj sub_rtx = &PATTERN (def_insn);
689*38fd1498Szrj else
690*38fd1498Szrj {
691*38fd1498Szrj /* It is not a PARALLEL or a SET, what could it be ? */
692*38fd1498Szrj return NULL;
693*38fd1498Szrj }
694*38fd1498Szrj
695*38fd1498Szrj gcc_assert (sub_rtx != NULL);
696*38fd1498Szrj return sub_rtx;
697*38fd1498Szrj }
698*38fd1498Szrj
699*38fd1498Szrj /* Merge the DEF_INSN with an extension. Calls combine_set_extension
700*38fd1498Szrj on the SET pattern. */
701*38fd1498Szrj
702*38fd1498Szrj static bool
merge_def_and_ext(ext_cand * cand,rtx_insn * def_insn,ext_state * state)703*38fd1498Szrj merge_def_and_ext (ext_cand *cand, rtx_insn *def_insn, ext_state *state)
704*38fd1498Szrj {
705*38fd1498Szrj machine_mode ext_src_mode;
706*38fd1498Szrj rtx *sub_rtx;
707*38fd1498Szrj
708*38fd1498Szrj ext_src_mode = GET_MODE (XEXP (SET_SRC (cand->expr), 0));
709*38fd1498Szrj sub_rtx = get_sub_rtx (def_insn);
710*38fd1498Szrj
711*38fd1498Szrj if (sub_rtx == NULL)
712*38fd1498Szrj return false;
713*38fd1498Szrj
714*38fd1498Szrj if (REG_P (SET_DEST (*sub_rtx))
715*38fd1498Szrj && (GET_MODE (SET_DEST (*sub_rtx)) == ext_src_mode
716*38fd1498Szrj || ((state->modified[INSN_UID (def_insn)].kind
717*38fd1498Szrj == (cand->code == ZERO_EXTEND
718*38fd1498Szrj ? EXT_MODIFIED_ZEXT : EXT_MODIFIED_SEXT))
719*38fd1498Szrj && state->modified[INSN_UID (def_insn)].mode
720*38fd1498Szrj == ext_src_mode)))
721*38fd1498Szrj {
722*38fd1498Szrj if (GET_MODE_UNIT_SIZE (GET_MODE (SET_DEST (*sub_rtx)))
723*38fd1498Szrj >= GET_MODE_UNIT_SIZE (cand->mode))
724*38fd1498Szrj return true;
725*38fd1498Szrj /* If def_insn is already scheduled to be deleted, don't attempt
726*38fd1498Szrj to modify it. */
727*38fd1498Szrj if (state->modified[INSN_UID (def_insn)].deleted)
728*38fd1498Szrj return false;
729*38fd1498Szrj if (combine_set_extension (cand, def_insn, sub_rtx))
730*38fd1498Szrj {
731*38fd1498Szrj if (state->modified[INSN_UID (def_insn)].kind == EXT_MODIFIED_NONE)
732*38fd1498Szrj state->modified[INSN_UID (def_insn)].mode = ext_src_mode;
733*38fd1498Szrj return true;
734*38fd1498Szrj }
735*38fd1498Szrj }
736*38fd1498Szrj
737*38fd1498Szrj return false;
738*38fd1498Szrj }
739*38fd1498Szrj
740*38fd1498Szrj /* Given SRC, which should be one or more extensions of a REG, strip
741*38fd1498Szrj away the extensions and return the REG. */
742*38fd1498Szrj
743*38fd1498Szrj static inline rtx
get_extended_src_reg(rtx src)744*38fd1498Szrj get_extended_src_reg (rtx src)
745*38fd1498Szrj {
746*38fd1498Szrj while (GET_CODE (src) == SIGN_EXTEND || GET_CODE (src) == ZERO_EXTEND)
747*38fd1498Szrj src = XEXP (src, 0);
748*38fd1498Szrj gcc_assert (REG_P (src));
749*38fd1498Szrj return src;
750*38fd1498Szrj }
751*38fd1498Szrj
752*38fd1498Szrj /* This function goes through all reaching defs of the source
753*38fd1498Szrj of the candidate for elimination (CAND) and tries to combine
754*38fd1498Szrj the extension with the definition instruction. The changes
755*38fd1498Szrj are made as a group so that even if one definition cannot be
756*38fd1498Szrj merged, all reaching definitions end up not being merged.
757*38fd1498Szrj When a conditional copy is encountered, merging is attempted
758*38fd1498Szrj transitively on its definitions. It returns true upon success
759*38fd1498Szrj and false upon failure. */
760*38fd1498Szrj
761*38fd1498Szrj static bool
combine_reaching_defs(ext_cand * cand,const_rtx set_pat,ext_state * state)762*38fd1498Szrj combine_reaching_defs (ext_cand *cand, const_rtx set_pat, ext_state *state)
763*38fd1498Szrj {
764*38fd1498Szrj rtx_insn *def_insn;
765*38fd1498Szrj bool merge_successful = true;
766*38fd1498Szrj int i;
767*38fd1498Szrj int defs_ix;
768*38fd1498Szrj bool outcome;
769*38fd1498Szrj
770*38fd1498Szrj state->defs_list.truncate (0);
771*38fd1498Szrj state->copies_list.truncate (0);
772*38fd1498Szrj
773*38fd1498Szrj outcome = make_defs_and_copies_lists (cand->insn, set_pat, state);
774*38fd1498Szrj
775*38fd1498Szrj if (!outcome)
776*38fd1498Szrj return false;
777*38fd1498Szrj
778*38fd1498Szrj /* If the destination operand of the extension is a different
779*38fd1498Szrj register than the source operand, then additional restrictions
780*38fd1498Szrj are needed. Note we have to handle cases where we have nested
781*38fd1498Szrj extensions in the source operand. */
782*38fd1498Szrj bool copy_needed
783*38fd1498Szrj = (REGNO (SET_DEST (PATTERN (cand->insn)))
784*38fd1498Szrj != REGNO (get_extended_src_reg (SET_SRC (PATTERN (cand->insn)))));
785*38fd1498Szrj if (copy_needed)
786*38fd1498Szrj {
787*38fd1498Szrj /* Considering transformation of
788*38fd1498Szrj (set (reg1) (expression))
789*38fd1498Szrj ...
790*38fd1498Szrj (set (reg2) (any_extend (reg1)))
791*38fd1498Szrj
792*38fd1498Szrj into
793*38fd1498Szrj
794*38fd1498Szrj (set (reg2) (any_extend (expression)))
795*38fd1498Szrj (set (reg1) (reg2))
796*38fd1498Szrj ... */
797*38fd1498Szrj
798*38fd1498Szrj /* In theory we could handle more than one reaching def, it
799*38fd1498Szrj just makes the code to update the insn stream more complex. */
800*38fd1498Szrj if (state->defs_list.length () != 1)
801*38fd1498Szrj return false;
802*38fd1498Szrj
803*38fd1498Szrj /* We don't have the structure described above if there are
804*38fd1498Szrj conditional moves in between the def and the candidate,
805*38fd1498Szrj and we will not handle them correctly. See PR68194. */
806*38fd1498Szrj if (state->copies_list.length () > 0)
807*38fd1498Szrj return false;
808*38fd1498Szrj
809*38fd1498Szrj /* We require the candidate not already be modified. It may,
810*38fd1498Szrj for example have been changed from a (sign_extend (reg))
811*38fd1498Szrj into (zero_extend (sign_extend (reg))).
812*38fd1498Szrj
813*38fd1498Szrj Handling that case shouldn't be terribly difficult, but the code
814*38fd1498Szrj here and the code to emit copies would need auditing. Until
815*38fd1498Szrj we see a need, this is the safe thing to do. */
816*38fd1498Szrj if (state->modified[INSN_UID (cand->insn)].kind != EXT_MODIFIED_NONE)
817*38fd1498Szrj return false;
818*38fd1498Szrj
819*38fd1498Szrj machine_mode dst_mode = GET_MODE (SET_DEST (PATTERN (cand->insn)));
820*38fd1498Szrj rtx src_reg = get_extended_src_reg (SET_SRC (PATTERN (cand->insn)));
821*38fd1498Szrj
822*38fd1498Szrj /* Ensure we can use the src_reg in dst_mode (needed for
823*38fd1498Szrj the (set (reg1) (reg2)) insn mentioned above). */
824*38fd1498Szrj if (!targetm.hard_regno_mode_ok (REGNO (src_reg), dst_mode))
825*38fd1498Szrj return false;
826*38fd1498Szrj
827*38fd1498Szrj /* Ensure the number of hard registers of the copy match. */
828*38fd1498Szrj if (hard_regno_nregs (REGNO (src_reg), dst_mode) != REG_NREGS (src_reg))
829*38fd1498Szrj return false;
830*38fd1498Szrj
831*38fd1498Szrj /* There's only one reaching def. */
832*38fd1498Szrj rtx_insn *def_insn = state->defs_list[0];
833*38fd1498Szrj
834*38fd1498Szrj /* The defining statement must not have been modified either. */
835*38fd1498Szrj if (state->modified[INSN_UID (def_insn)].kind != EXT_MODIFIED_NONE)
836*38fd1498Szrj return false;
837*38fd1498Szrj
838*38fd1498Szrj /* The defining statement and candidate insn must be in the same block.
839*38fd1498Szrj This is merely to keep the test for safety and updating the insn
840*38fd1498Szrj stream simple. Also ensure that within the block the candidate
841*38fd1498Szrj follows the defining insn. */
842*38fd1498Szrj basic_block bb = BLOCK_FOR_INSN (cand->insn);
843*38fd1498Szrj if (bb != BLOCK_FOR_INSN (def_insn)
844*38fd1498Szrj || DF_INSN_LUID (def_insn) > DF_INSN_LUID (cand->insn))
845*38fd1498Szrj return false;
846*38fd1498Szrj
847*38fd1498Szrj /* If there is an overlap between the destination of DEF_INSN and
848*38fd1498Szrj CAND->insn, then this transformation is not safe. Note we have
849*38fd1498Szrj to test in the widened mode. */
850*38fd1498Szrj rtx *dest_sub_rtx = get_sub_rtx (def_insn);
851*38fd1498Szrj if (dest_sub_rtx == NULL
852*38fd1498Szrj || !REG_P (SET_DEST (*dest_sub_rtx)))
853*38fd1498Szrj return false;
854*38fd1498Szrj
855*38fd1498Szrj rtx tmp_reg = gen_rtx_REG (GET_MODE (SET_DEST (PATTERN (cand->insn))),
856*38fd1498Szrj REGNO (SET_DEST (*dest_sub_rtx)));
857*38fd1498Szrj if (reg_overlap_mentioned_p (tmp_reg, SET_DEST (PATTERN (cand->insn))))
858*38fd1498Szrj return false;
859*38fd1498Szrj
860*38fd1498Szrj /* On RISC machines we must make sure that changing the mode of SRC_REG
861*38fd1498Szrj as destination register will not affect its reaching uses, which may
862*38fd1498Szrj read its value in a larger mode because DEF_INSN implicitly sets it
863*38fd1498Szrj in word mode. */
864*38fd1498Szrj poly_int64 prec
865*38fd1498Szrj = GET_MODE_PRECISION (GET_MODE (SET_DEST (*dest_sub_rtx)));
866*38fd1498Szrj if (WORD_REGISTER_OPERATIONS && known_lt (prec, BITS_PER_WORD))
867*38fd1498Szrj {
868*38fd1498Szrj struct df_link *uses = get_uses (def_insn, src_reg);
869*38fd1498Szrj if (!uses)
870*38fd1498Szrj return false;
871*38fd1498Szrj
872*38fd1498Szrj for (df_link *use = uses; use; use = use->next)
873*38fd1498Szrj if (paradoxical_subreg_p (GET_MODE (*DF_REF_LOC (use->ref)),
874*38fd1498Szrj GET_MODE (SET_DEST (*dest_sub_rtx))))
875*38fd1498Szrj return false;
876*38fd1498Szrj }
877*38fd1498Szrj
878*38fd1498Szrj /* The destination register of the extension insn must not be
879*38fd1498Szrj used or set between the def_insn and cand->insn exclusive. */
880*38fd1498Szrj if (reg_used_between_p (SET_DEST (PATTERN (cand->insn)),
881*38fd1498Szrj def_insn, cand->insn)
882*38fd1498Szrj || reg_set_between_p (SET_DEST (PATTERN (cand->insn)),
883*38fd1498Szrj def_insn, cand->insn))
884*38fd1498Szrj return false;
885*38fd1498Szrj
886*38fd1498Szrj /* We must be able to copy between the two registers. Generate,
887*38fd1498Szrj recognize and verify constraints of the copy. Also fail if this
888*38fd1498Szrj generated more than one insn.
889*38fd1498Szrj
890*38fd1498Szrj This generates garbage since we throw away the insn when we're
891*38fd1498Szrj done, only to recreate it later if this test was successful.
892*38fd1498Szrj
893*38fd1498Szrj Make sure to get the mode from the extension (cand->insn). This
894*38fd1498Szrj is different than in the code to emit the copy as we have not
895*38fd1498Szrj modified the defining insn yet. */
896*38fd1498Szrj start_sequence ();
897*38fd1498Szrj rtx pat = PATTERN (cand->insn);
898*38fd1498Szrj rtx new_dst = gen_rtx_REG (GET_MODE (SET_DEST (pat)),
899*38fd1498Szrj REGNO (get_extended_src_reg (SET_SRC (pat))));
900*38fd1498Szrj rtx new_src = gen_rtx_REG (GET_MODE (SET_DEST (pat)),
901*38fd1498Szrj REGNO (SET_DEST (pat)));
902*38fd1498Szrj emit_move_insn (new_dst, new_src);
903*38fd1498Szrj
904*38fd1498Szrj rtx_insn *insn = get_insns ();
905*38fd1498Szrj end_sequence ();
906*38fd1498Szrj if (NEXT_INSN (insn))
907*38fd1498Szrj return false;
908*38fd1498Szrj if (recog_memoized (insn) == -1)
909*38fd1498Szrj return false;
910*38fd1498Szrj extract_insn (insn);
911*38fd1498Szrj if (!constrain_operands (1, get_preferred_alternatives (insn, bb)))
912*38fd1498Szrj return false;
913*38fd1498Szrj
914*38fd1498Szrj while (REG_P (SET_SRC (*dest_sub_rtx))
915*38fd1498Szrj && (REGNO (SET_SRC (*dest_sub_rtx)) == REGNO (SET_DEST (pat))))
916*38fd1498Szrj {
917*38fd1498Szrj /* Considering transformation of
918*38fd1498Szrj (set (reg2) (expression))
919*38fd1498Szrj ...
920*38fd1498Szrj (set (reg1) (reg2))
921*38fd1498Szrj ...
922*38fd1498Szrj (set (reg2) (any_extend (reg1)))
923*38fd1498Szrj
924*38fd1498Szrj into
925*38fd1498Szrj
926*38fd1498Szrj (set (reg2) (any_extend (expression)))
927*38fd1498Szrj (set (reg1) (reg2))
928*38fd1498Szrj ... */
929*38fd1498Szrj struct df_link *defs
930*38fd1498Szrj = get_defs (def_insn, SET_SRC (*dest_sub_rtx), NULL);
931*38fd1498Szrj if (defs == NULL || defs->next)
932*38fd1498Szrj break;
933*38fd1498Szrj
934*38fd1498Szrj /* There is only one reaching def. */
935*38fd1498Szrj rtx_insn *def_insn2 = DF_REF_INSN (defs->ref);
936*38fd1498Szrj
937*38fd1498Szrj /* The defining statement must not have been modified either. */
938*38fd1498Szrj if (state->modified[INSN_UID (def_insn2)].kind != EXT_MODIFIED_NONE)
939*38fd1498Szrj break;
940*38fd1498Szrj
941*38fd1498Szrj /* The def_insn2 and candidate insn must be in the same
942*38fd1498Szrj block and def_insn follows def_insn2. */
943*38fd1498Szrj if (bb != BLOCK_FOR_INSN (def_insn2)
944*38fd1498Szrj || DF_INSN_LUID (def_insn2) > DF_INSN_LUID (def_insn))
945*38fd1498Szrj break;
946*38fd1498Szrj
947*38fd1498Szrj rtx *dest_sub_rtx2 = get_sub_rtx (def_insn2);
948*38fd1498Szrj if (dest_sub_rtx2 == NULL
949*38fd1498Szrj || !REG_P (SET_DEST (*dest_sub_rtx2)))
950*38fd1498Szrj break;
951*38fd1498Szrj
952*38fd1498Szrj /* On RISC machines we must make sure that changing the mode of
953*38fd1498Szrj SRC_REG as destination register will not affect its reaching
954*38fd1498Szrj uses, which may read its value in a larger mode because DEF_INSN
955*38fd1498Szrj implicitly sets it in word mode. */
956*38fd1498Szrj if (WORD_REGISTER_OPERATIONS && known_lt (prec, BITS_PER_WORD))
957*38fd1498Szrj {
958*38fd1498Szrj struct df_link *uses = get_uses (def_insn2, SET_DEST (pat));
959*38fd1498Szrj if (!uses)
960*38fd1498Szrj break;
961*38fd1498Szrj
962*38fd1498Szrj df_link *use;
963*38fd1498Szrj rtx dest2 = SET_DEST (*dest_sub_rtx2);
964*38fd1498Szrj for (use = uses; use; use = use->next)
965*38fd1498Szrj if (paradoxical_subreg_p (GET_MODE (*DF_REF_LOC (use->ref)),
966*38fd1498Szrj GET_MODE (dest2)))
967*38fd1498Szrj break;
968*38fd1498Szrj if (use)
969*38fd1498Szrj break;
970*38fd1498Szrj }
971*38fd1498Szrj
972*38fd1498Szrj /* The destination register of the extension insn must not be
973*38fd1498Szrj used or set between the def_insn2 and def_insn exclusive.
974*38fd1498Szrj Likewise for the other reg, i.e. check both reg1 and reg2
975*38fd1498Szrj in the above comment. */
976*38fd1498Szrj if (reg_used_between_p (SET_DEST (PATTERN (cand->insn)),
977*38fd1498Szrj def_insn2, def_insn)
978*38fd1498Szrj || reg_set_between_p (SET_DEST (PATTERN (cand->insn)),
979*38fd1498Szrj def_insn2, def_insn)
980*38fd1498Szrj || reg_used_between_p (src_reg, def_insn2, def_insn)
981*38fd1498Szrj || reg_set_between_p (src_reg, def_insn2, def_insn))
982*38fd1498Szrj break;
983*38fd1498Szrj
984*38fd1498Szrj state->defs_list[0] = def_insn2;
985*38fd1498Szrj break;
986*38fd1498Szrj }
987*38fd1498Szrj }
988*38fd1498Szrj
989*38fd1498Szrj /* If cand->insn has been already modified, update cand->mode to a wider
990*38fd1498Szrj mode if possible, or punt. */
991*38fd1498Szrj if (state->modified[INSN_UID (cand->insn)].kind != EXT_MODIFIED_NONE)
992*38fd1498Szrj {
993*38fd1498Szrj machine_mode mode;
994*38fd1498Szrj rtx set;
995*38fd1498Szrj
996*38fd1498Szrj if (state->modified[INSN_UID (cand->insn)].kind
997*38fd1498Szrj != (cand->code == ZERO_EXTEND
998*38fd1498Szrj ? EXT_MODIFIED_ZEXT : EXT_MODIFIED_SEXT)
999*38fd1498Szrj || state->modified[INSN_UID (cand->insn)].mode != cand->mode
1000*38fd1498Szrj || (set = single_set (cand->insn)) == NULL_RTX)
1001*38fd1498Szrj return false;
1002*38fd1498Szrj mode = GET_MODE (SET_DEST (set));
1003*38fd1498Szrj gcc_assert (GET_MODE_UNIT_SIZE (mode)
1004*38fd1498Szrj >= GET_MODE_UNIT_SIZE (cand->mode));
1005*38fd1498Szrj cand->mode = mode;
1006*38fd1498Szrj }
1007*38fd1498Szrj
1008*38fd1498Szrj merge_successful = true;
1009*38fd1498Szrj
1010*38fd1498Szrj /* Go through the defs vector and try to merge all the definitions
1011*38fd1498Szrj in this vector. */
1012*38fd1498Szrj state->modified_list.truncate (0);
1013*38fd1498Szrj FOR_EACH_VEC_ELT (state->defs_list, defs_ix, def_insn)
1014*38fd1498Szrj {
1015*38fd1498Szrj if (merge_def_and_ext (cand, def_insn, state))
1016*38fd1498Szrj state->modified_list.safe_push (def_insn);
1017*38fd1498Szrj else
1018*38fd1498Szrj {
1019*38fd1498Szrj merge_successful = false;
1020*38fd1498Szrj break;
1021*38fd1498Szrj }
1022*38fd1498Szrj }
1023*38fd1498Szrj
1024*38fd1498Szrj /* Now go through the conditional copies vector and try to merge all
1025*38fd1498Szrj the copies in this vector. */
1026*38fd1498Szrj if (merge_successful)
1027*38fd1498Szrj {
1028*38fd1498Szrj FOR_EACH_VEC_ELT (state->copies_list, i, def_insn)
1029*38fd1498Szrj {
1030*38fd1498Szrj if (transform_ifelse (cand, def_insn))
1031*38fd1498Szrj state->modified_list.safe_push (def_insn);
1032*38fd1498Szrj else
1033*38fd1498Szrj {
1034*38fd1498Szrj merge_successful = false;
1035*38fd1498Szrj break;
1036*38fd1498Szrj }
1037*38fd1498Szrj }
1038*38fd1498Szrj }
1039*38fd1498Szrj
1040*38fd1498Szrj if (merge_successful)
1041*38fd1498Szrj {
1042*38fd1498Szrj /* Commit the changes here if possible
1043*38fd1498Szrj FIXME: It's an all-or-nothing scenario. Even if only one definition
1044*38fd1498Szrj cannot be merged, we entirely give up. In the future, we should allow
1045*38fd1498Szrj extensions to be partially eliminated along those paths where the
1046*38fd1498Szrj definitions could be merged. */
1047*38fd1498Szrj if (apply_change_group ())
1048*38fd1498Szrj {
1049*38fd1498Szrj if (dump_file)
1050*38fd1498Szrj fprintf (dump_file, "All merges were successful.\n");
1051*38fd1498Szrj
1052*38fd1498Szrj FOR_EACH_VEC_ELT (state->modified_list, i, def_insn)
1053*38fd1498Szrj {
1054*38fd1498Szrj ext_modified *modified = &state->modified[INSN_UID (def_insn)];
1055*38fd1498Szrj if (modified->kind == EXT_MODIFIED_NONE)
1056*38fd1498Szrj modified->kind = (cand->code == ZERO_EXTEND ? EXT_MODIFIED_ZEXT
1057*38fd1498Szrj : EXT_MODIFIED_SEXT);
1058*38fd1498Szrj
1059*38fd1498Szrj if (copy_needed)
1060*38fd1498Szrj modified->do_not_reextend = 1;
1061*38fd1498Szrj }
1062*38fd1498Szrj return true;
1063*38fd1498Szrj }
1064*38fd1498Szrj else
1065*38fd1498Szrj {
1066*38fd1498Szrj /* Changes need not be cancelled explicitly as apply_change_group
1067*38fd1498Szrj does it. Print list of definitions in the dump_file for debug
1068*38fd1498Szrj purposes. This extension cannot be deleted. */
1069*38fd1498Szrj if (dump_file)
1070*38fd1498Szrj {
1071*38fd1498Szrj fprintf (dump_file,
1072*38fd1498Szrj "Merge cancelled, non-mergeable definitions:\n");
1073*38fd1498Szrj FOR_EACH_VEC_ELT (state->modified_list, i, def_insn)
1074*38fd1498Szrj print_rtl_single (dump_file, def_insn);
1075*38fd1498Szrj }
1076*38fd1498Szrj }
1077*38fd1498Szrj }
1078*38fd1498Szrj else
1079*38fd1498Szrj {
1080*38fd1498Szrj /* Cancel any changes that have been made so far. */
1081*38fd1498Szrj cancel_changes (0);
1082*38fd1498Szrj }
1083*38fd1498Szrj
1084*38fd1498Szrj return false;
1085*38fd1498Szrj }
1086*38fd1498Szrj
1087*38fd1498Szrj /* Add an extension pattern that could be eliminated. */
1088*38fd1498Szrj
1089*38fd1498Szrj static void
add_removable_extension(const_rtx expr,rtx_insn * insn,vec<ext_cand> * insn_list,unsigned * def_map,bitmap init_regs)1090*38fd1498Szrj add_removable_extension (const_rtx expr, rtx_insn *insn,
1091*38fd1498Szrj vec<ext_cand> *insn_list,
1092*38fd1498Szrj unsigned *def_map,
1093*38fd1498Szrj bitmap init_regs)
1094*38fd1498Szrj {
1095*38fd1498Szrj enum rtx_code code;
1096*38fd1498Szrj machine_mode mode;
1097*38fd1498Szrj unsigned int idx;
1098*38fd1498Szrj rtx src, dest;
1099*38fd1498Szrj
1100*38fd1498Szrj /* We are looking for SET (REG N) (ANY_EXTEND (REG N)). */
1101*38fd1498Szrj if (GET_CODE (expr) != SET)
1102*38fd1498Szrj return;
1103*38fd1498Szrj
1104*38fd1498Szrj src = SET_SRC (expr);
1105*38fd1498Szrj code = GET_CODE (src);
1106*38fd1498Szrj dest = SET_DEST (expr);
1107*38fd1498Szrj mode = GET_MODE (dest);
1108*38fd1498Szrj
1109*38fd1498Szrj if (REG_P (dest)
1110*38fd1498Szrj && (code == SIGN_EXTEND || code == ZERO_EXTEND)
1111*38fd1498Szrj && REG_P (XEXP (src, 0)))
1112*38fd1498Szrj {
1113*38fd1498Szrj rtx reg = XEXP (src, 0);
1114*38fd1498Szrj struct df_link *defs, *def;
1115*38fd1498Szrj ext_cand *cand;
1116*38fd1498Szrj
1117*38fd1498Szrj /* Zero-extension of an undefined value is partly defined (it's
1118*38fd1498Szrj completely undefined for sign-extension, though). So if there exists
1119*38fd1498Szrj a path from the entry to this zero-extension that leaves this register
1120*38fd1498Szrj uninitialized, removing the extension could change the behavior of
1121*38fd1498Szrj correct programs. So first, check it is not the case. */
1122*38fd1498Szrj if (code == ZERO_EXTEND && !bitmap_bit_p (init_regs, REGNO (reg)))
1123*38fd1498Szrj {
1124*38fd1498Szrj if (dump_file)
1125*38fd1498Szrj {
1126*38fd1498Szrj fprintf (dump_file, "Cannot eliminate extension:\n");
1127*38fd1498Szrj print_rtl_single (dump_file, insn);
1128*38fd1498Szrj fprintf (dump_file, " because it can operate on uninitialized"
1129*38fd1498Szrj " data\n");
1130*38fd1498Szrj }
1131*38fd1498Szrj return;
1132*38fd1498Szrj }
1133*38fd1498Szrj
1134*38fd1498Szrj /* Second, make sure we can get all the reaching definitions. */
1135*38fd1498Szrj defs = get_defs (insn, reg, NULL);
1136*38fd1498Szrj if (!defs)
1137*38fd1498Szrj {
1138*38fd1498Szrj if (dump_file)
1139*38fd1498Szrj {
1140*38fd1498Szrj fprintf (dump_file, "Cannot eliminate extension:\n");
1141*38fd1498Szrj print_rtl_single (dump_file, insn);
1142*38fd1498Szrj fprintf (dump_file, " because of missing definition(s)\n");
1143*38fd1498Szrj }
1144*38fd1498Szrj return;
1145*38fd1498Szrj }
1146*38fd1498Szrj
1147*38fd1498Szrj /* Third, make sure the reaching definitions don't feed another and
1148*38fd1498Szrj different extension. FIXME: this obviously can be improved. */
1149*38fd1498Szrj for (def = defs; def; def = def->next)
1150*38fd1498Szrj if ((idx = def_map[INSN_UID (DF_REF_INSN (def->ref))])
1151*38fd1498Szrj && idx != -1U
1152*38fd1498Szrj && (cand = &(*insn_list)[idx - 1])
1153*38fd1498Szrj && cand->code != code)
1154*38fd1498Szrj {
1155*38fd1498Szrj if (dump_file)
1156*38fd1498Szrj {
1157*38fd1498Szrj fprintf (dump_file, "Cannot eliminate extension:\n");
1158*38fd1498Szrj print_rtl_single (dump_file, insn);
1159*38fd1498Szrj fprintf (dump_file, " because of other extension\n");
1160*38fd1498Szrj }
1161*38fd1498Szrj return;
1162*38fd1498Szrj }
1163*38fd1498Szrj /* For vector mode extensions, ensure that all uses of the
1164*38fd1498Szrj XEXP (src, 0) register are in insn or debug insns, as unlike
1165*38fd1498Szrj integral extensions lowpart subreg of the sign/zero extended
1166*38fd1498Szrj register are not equal to the original register, so we have
1167*38fd1498Szrj to change all uses or none and the current code isn't able
1168*38fd1498Szrj to change them all at once in one transaction. */
1169*38fd1498Szrj else if (VECTOR_MODE_P (GET_MODE (XEXP (src, 0))))
1170*38fd1498Szrj {
1171*38fd1498Szrj if (idx == 0)
1172*38fd1498Szrj {
1173*38fd1498Szrj struct df_link *ref_chain, *ref_link;
1174*38fd1498Szrj
1175*38fd1498Szrj ref_chain = DF_REF_CHAIN (def->ref);
1176*38fd1498Szrj for (ref_link = ref_chain; ref_link; ref_link = ref_link->next)
1177*38fd1498Szrj {
1178*38fd1498Szrj if (ref_link->ref == NULL
1179*38fd1498Szrj || DF_REF_INSN_INFO (ref_link->ref) == NULL)
1180*38fd1498Szrj {
1181*38fd1498Szrj idx = -1U;
1182*38fd1498Szrj break;
1183*38fd1498Szrj }
1184*38fd1498Szrj rtx_insn *use_insn = DF_REF_INSN (ref_link->ref);
1185*38fd1498Szrj if (use_insn != insn && !DEBUG_INSN_P (use_insn))
1186*38fd1498Szrj {
1187*38fd1498Szrj idx = -1U;
1188*38fd1498Szrj break;
1189*38fd1498Szrj }
1190*38fd1498Szrj }
1191*38fd1498Szrj if (idx == -1U)
1192*38fd1498Szrj def_map[INSN_UID (DF_REF_INSN (def->ref))] = idx;
1193*38fd1498Szrj }
1194*38fd1498Szrj if (idx == -1U)
1195*38fd1498Szrj {
1196*38fd1498Szrj if (dump_file)
1197*38fd1498Szrj {
1198*38fd1498Szrj fprintf (dump_file, "Cannot eliminate extension:\n");
1199*38fd1498Szrj print_rtl_single (dump_file, insn);
1200*38fd1498Szrj fprintf (dump_file,
1201*38fd1498Szrj " because some vector uses aren't extension\n");
1202*38fd1498Szrj }
1203*38fd1498Szrj return;
1204*38fd1498Szrj }
1205*38fd1498Szrj }
1206*38fd1498Szrj
1207*38fd1498Szrj /* Fourth, if the extended version occupies more registers than the
1208*38fd1498Szrj original and the source of the extension is the same hard register
1209*38fd1498Szrj as the destination of the extension, then we can not eliminate
1210*38fd1498Szrj the extension without deep analysis, so just punt.
1211*38fd1498Szrj
1212*38fd1498Szrj We allow this when the registers are different because the
1213*38fd1498Szrj code in combine_reaching_defs will handle that case correctly. */
1214*38fd1498Szrj if (hard_regno_nregs (REGNO (dest), mode) != REG_NREGS (reg)
1215*38fd1498Szrj && reg_overlap_mentioned_p (dest, reg))
1216*38fd1498Szrj return;
1217*38fd1498Szrj
1218*38fd1498Szrj /* Then add the candidate to the list and insert the reaching definitions
1219*38fd1498Szrj into the definition map. */
1220*38fd1498Szrj ext_cand e = {expr, code, mode, insn};
1221*38fd1498Szrj insn_list->safe_push (e);
1222*38fd1498Szrj idx = insn_list->length ();
1223*38fd1498Szrj
1224*38fd1498Szrj for (def = defs; def; def = def->next)
1225*38fd1498Szrj def_map[INSN_UID (DF_REF_INSN (def->ref))] = idx;
1226*38fd1498Szrj }
1227*38fd1498Szrj }
1228*38fd1498Szrj
1229*38fd1498Szrj /* Traverse the instruction stream looking for extensions and return the
1230*38fd1498Szrj list of candidates. */
1231*38fd1498Szrj
1232*38fd1498Szrj static vec<ext_cand>
find_removable_extensions(void)1233*38fd1498Szrj find_removable_extensions (void)
1234*38fd1498Szrj {
1235*38fd1498Szrj vec<ext_cand> insn_list = vNULL;
1236*38fd1498Szrj basic_block bb;
1237*38fd1498Szrj rtx_insn *insn;
1238*38fd1498Szrj rtx set;
1239*38fd1498Szrj unsigned *def_map = XCNEWVEC (unsigned, max_insn_uid);
1240*38fd1498Szrj bitmap_head init, kill, gen, tmp;
1241*38fd1498Szrj
1242*38fd1498Szrj bitmap_initialize (&init, NULL);
1243*38fd1498Szrj bitmap_initialize (&kill, NULL);
1244*38fd1498Szrj bitmap_initialize (&gen, NULL);
1245*38fd1498Szrj bitmap_initialize (&tmp, NULL);
1246*38fd1498Szrj
1247*38fd1498Szrj FOR_EACH_BB_FN (bb, cfun)
1248*38fd1498Szrj {
1249*38fd1498Szrj bitmap_copy (&init, DF_MIR_IN (bb));
1250*38fd1498Szrj bitmap_clear (&kill);
1251*38fd1498Szrj bitmap_clear (&gen);
1252*38fd1498Szrj
1253*38fd1498Szrj FOR_BB_INSNS (bb, insn)
1254*38fd1498Szrj {
1255*38fd1498Szrj if (NONDEBUG_INSN_P (insn))
1256*38fd1498Szrj {
1257*38fd1498Szrj set = single_set (insn);
1258*38fd1498Szrj if (set != NULL_RTX)
1259*38fd1498Szrj add_removable_extension (set, insn, &insn_list, def_map,
1260*38fd1498Szrj &init);
1261*38fd1498Szrj df_mir_simulate_one_insn (bb, insn, &kill, &gen);
1262*38fd1498Szrj bitmap_ior_and_compl (&tmp, &gen, &init, &kill);
1263*38fd1498Szrj bitmap_copy (&init, &tmp);
1264*38fd1498Szrj }
1265*38fd1498Szrj }
1266*38fd1498Szrj }
1267*38fd1498Szrj
1268*38fd1498Szrj XDELETEVEC (def_map);
1269*38fd1498Szrj
1270*38fd1498Szrj return insn_list;
1271*38fd1498Szrj }
1272*38fd1498Szrj
1273*38fd1498Szrj /* This is the main function that checks the insn stream for redundant
1274*38fd1498Szrj extensions and tries to remove them if possible. */
1275*38fd1498Szrj
1276*38fd1498Szrj static void
find_and_remove_re(void)1277*38fd1498Szrj find_and_remove_re (void)
1278*38fd1498Szrj {
1279*38fd1498Szrj ext_cand *curr_cand;
1280*38fd1498Szrj rtx_insn *curr_insn = NULL;
1281*38fd1498Szrj int num_re_opportunities = 0, num_realized = 0, i;
1282*38fd1498Szrj vec<ext_cand> reinsn_list;
1283*38fd1498Szrj auto_vec<rtx_insn *> reinsn_del_list;
1284*38fd1498Szrj auto_vec<rtx_insn *> reinsn_copy_list;
1285*38fd1498Szrj
1286*38fd1498Szrj /* Construct DU chain to get all reaching definitions of each
1287*38fd1498Szrj extension instruction. */
1288*38fd1498Szrj df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
1289*38fd1498Szrj df_chain_add_problem (DF_UD_CHAIN + DF_DU_CHAIN);
1290*38fd1498Szrj df_mir_add_problem ();
1291*38fd1498Szrj df_analyze ();
1292*38fd1498Szrj df_set_flags (DF_DEFER_INSN_RESCAN);
1293*38fd1498Szrj
1294*38fd1498Szrj max_insn_uid = get_max_uid ();
1295*38fd1498Szrj reinsn_list = find_removable_extensions ();
1296*38fd1498Szrj
1297*38fd1498Szrj ext_state state;
1298*38fd1498Szrj if (reinsn_list.is_empty ())
1299*38fd1498Szrj state.modified = NULL;
1300*38fd1498Szrj else
1301*38fd1498Szrj state.modified = XCNEWVEC (struct ext_modified, max_insn_uid);
1302*38fd1498Szrj
1303*38fd1498Szrj FOR_EACH_VEC_ELT (reinsn_list, i, curr_cand)
1304*38fd1498Szrj {
1305*38fd1498Szrj num_re_opportunities++;
1306*38fd1498Szrj
1307*38fd1498Szrj /* Try to combine the extension with the definition. */
1308*38fd1498Szrj if (dump_file)
1309*38fd1498Szrj {
1310*38fd1498Szrj fprintf (dump_file, "Trying to eliminate extension:\n");
1311*38fd1498Szrj print_rtl_single (dump_file, curr_cand->insn);
1312*38fd1498Szrj }
1313*38fd1498Szrj
1314*38fd1498Szrj if (combine_reaching_defs (curr_cand, curr_cand->expr, &state))
1315*38fd1498Szrj {
1316*38fd1498Szrj if (dump_file)
1317*38fd1498Szrj fprintf (dump_file, "Eliminated the extension.\n");
1318*38fd1498Szrj num_realized++;
1319*38fd1498Szrj /* If the RHS of the current candidate is not (extend (reg)), then
1320*38fd1498Szrj we do not allow the optimization of extensions where
1321*38fd1498Szrj the source and destination registers do not match. Thus
1322*38fd1498Szrj checking REG_P here is correct. */
1323*38fd1498Szrj if (REG_P (XEXP (SET_SRC (PATTERN (curr_cand->insn)), 0))
1324*38fd1498Szrj && (REGNO (SET_DEST (PATTERN (curr_cand->insn)))
1325*38fd1498Szrj != REGNO (XEXP (SET_SRC (PATTERN (curr_cand->insn)), 0))))
1326*38fd1498Szrj {
1327*38fd1498Szrj reinsn_copy_list.safe_push (curr_cand->insn);
1328*38fd1498Szrj reinsn_copy_list.safe_push (state.defs_list[0]);
1329*38fd1498Szrj }
1330*38fd1498Szrj reinsn_del_list.safe_push (curr_cand->insn);
1331*38fd1498Szrj state.modified[INSN_UID (curr_cand->insn)].deleted = 1;
1332*38fd1498Szrj }
1333*38fd1498Szrj }
1334*38fd1498Szrj
1335*38fd1498Szrj /* The copy list contains pairs of insns which describe copies we
1336*38fd1498Szrj need to insert into the INSN stream.
1337*38fd1498Szrj
1338*38fd1498Szrj The first insn in each pair is the extension insn, from which
1339*38fd1498Szrj we derive the source and destination of the copy.
1340*38fd1498Szrj
1341*38fd1498Szrj The second insn in each pair is the memory reference where the
1342*38fd1498Szrj extension will ultimately happen. We emit the new copy
1343*38fd1498Szrj immediately after this insn.
1344*38fd1498Szrj
1345*38fd1498Szrj It may first appear that the arguments for the copy are reversed.
1346*38fd1498Szrj Remember that the memory reference will be changed to refer to the
1347*38fd1498Szrj destination of the extention. So we're actually emitting a copy
1348*38fd1498Szrj from the new destination to the old destination. */
1349*38fd1498Szrj for (unsigned int i = 0; i < reinsn_copy_list.length (); i += 2)
1350*38fd1498Szrj {
1351*38fd1498Szrj rtx_insn *curr_insn = reinsn_copy_list[i];
1352*38fd1498Szrj rtx_insn *def_insn = reinsn_copy_list[i + 1];
1353*38fd1498Szrj
1354*38fd1498Szrj /* Use the mode of the destination of the defining insn
1355*38fd1498Szrj for the mode of the copy. This is necessary if the
1356*38fd1498Szrj defining insn was used to eliminate a second extension
1357*38fd1498Szrj that was wider than the first. */
1358*38fd1498Szrj rtx sub_rtx = *get_sub_rtx (def_insn);
1359*38fd1498Szrj rtx pat = PATTERN (curr_insn);
1360*38fd1498Szrj rtx new_dst = gen_rtx_REG (GET_MODE (SET_DEST (sub_rtx)),
1361*38fd1498Szrj REGNO (XEXP (SET_SRC (pat), 0)));
1362*38fd1498Szrj rtx new_src = gen_rtx_REG (GET_MODE (SET_DEST (sub_rtx)),
1363*38fd1498Szrj REGNO (SET_DEST (pat)));
1364*38fd1498Szrj rtx set = gen_rtx_SET (new_dst, new_src);
1365*38fd1498Szrj emit_insn_after (set, def_insn);
1366*38fd1498Szrj }
1367*38fd1498Szrj
1368*38fd1498Szrj /* Delete all useless extensions here in one sweep. */
1369*38fd1498Szrj FOR_EACH_VEC_ELT (reinsn_del_list, i, curr_insn)
1370*38fd1498Szrj delete_insn (curr_insn);
1371*38fd1498Szrj
1372*38fd1498Szrj reinsn_list.release ();
1373*38fd1498Szrj XDELETEVEC (state.modified);
1374*38fd1498Szrj
1375*38fd1498Szrj if (dump_file && num_re_opportunities > 0)
1376*38fd1498Szrj fprintf (dump_file, "Elimination opportunities = %d realized = %d\n",
1377*38fd1498Szrj num_re_opportunities, num_realized);
1378*38fd1498Szrj }
1379*38fd1498Szrj
1380*38fd1498Szrj /* Find and remove redundant extensions. */
1381*38fd1498Szrj
1382*38fd1498Szrj static unsigned int
rest_of_handle_ree(void)1383*38fd1498Szrj rest_of_handle_ree (void)
1384*38fd1498Szrj {
1385*38fd1498Szrj find_and_remove_re ();
1386*38fd1498Szrj return 0;
1387*38fd1498Szrj }
1388*38fd1498Szrj
1389*38fd1498Szrj namespace {
1390*38fd1498Szrj
1391*38fd1498Szrj const pass_data pass_data_ree =
1392*38fd1498Szrj {
1393*38fd1498Szrj RTL_PASS, /* type */
1394*38fd1498Szrj "ree", /* name */
1395*38fd1498Szrj OPTGROUP_NONE, /* optinfo_flags */
1396*38fd1498Szrj TV_REE, /* tv_id */
1397*38fd1498Szrj 0, /* properties_required */
1398*38fd1498Szrj 0, /* properties_provided */
1399*38fd1498Szrj 0, /* properties_destroyed */
1400*38fd1498Szrj 0, /* todo_flags_start */
1401*38fd1498Szrj TODO_df_finish, /* todo_flags_finish */
1402*38fd1498Szrj };
1403*38fd1498Szrj
1404*38fd1498Szrj class pass_ree : public rtl_opt_pass
1405*38fd1498Szrj {
1406*38fd1498Szrj public:
pass_ree(gcc::context * ctxt)1407*38fd1498Szrj pass_ree (gcc::context *ctxt)
1408*38fd1498Szrj : rtl_opt_pass (pass_data_ree, ctxt)
1409*38fd1498Szrj {}
1410*38fd1498Szrj
1411*38fd1498Szrj /* opt_pass methods: */
gate(function *)1412*38fd1498Szrj virtual bool gate (function *) { return (optimize > 0 && flag_ree); }
execute(function *)1413*38fd1498Szrj virtual unsigned int execute (function *) { return rest_of_handle_ree (); }
1414*38fd1498Szrj
1415*38fd1498Szrj }; // class pass_ree
1416*38fd1498Szrj
1417*38fd1498Szrj } // anon namespace
1418*38fd1498Szrj
1419*38fd1498Szrj rtl_opt_pass *
make_pass_ree(gcc::context * ctxt)1420*38fd1498Szrj make_pass_ree (gcc::context *ctxt)
1421*38fd1498Szrj {
1422*38fd1498Szrj return new pass_ree (ctxt);
1423*38fd1498Szrj }
1424