xref: /netbsd-src/external/gpl3/gcc.old/dist/gcc/ree.c (revision 946379e7b37692fc43f68eb0d1c10daa0a7f3b6c)
1 /* Redundant Extension Elimination pass for the GNU compiler.
2    Copyright (C) 2010-2013 Free Software Foundation, Inc.
3    Contributed by Ilya Enkovich (ilya.enkovich@intel.com)
4 
5    Based on the Redundant Zero-extension elimination pass contributed by
6    Sriraman Tallam (tmsriram@google.com) and Silvius Rus (rus@google.com).
7 
8 This file is part of GCC.
9 
10 GCC is free software; you can redistribute it and/or modify it under
11 the terms of the GNU General Public License as published by the Free
12 Software Foundation; either version 3, or (at your option) any later
13 version.
14 
15 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
16 WARRANTY; without even the implied warranty of MERCHANTABILITY or
17 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
18 for more details.
19 
20 You should have received a copy of the GNU General Public License
21 along with GCC; see the file COPYING3.  If not see
22 <http://www.gnu.org/licenses/>.  */
23 
24 
25 /* Problem Description :
26    --------------------
27    This pass is intended to remove redundant extension instructions.
28    Such instructions appear for different reasons.  We expect some of
29    them due to implicit zero-extension in 64-bit registers after writing
30    to their lower 32-bit half (e.g. for the x86-64 architecture).
31    Another possible reason is a type cast which follows a load (for
32    instance a register restore) and which can be combined into a single
33    instruction, and for which earlier local passes, e.g. the combiner,
34    weren't able to optimize.
35 
36    How does this pass work  ?
37    --------------------------
38 
39    This pass is run after register allocation.  Hence, all registers that
40    this pass deals with are hard registers.  This pass first looks for an
41    extension instruction that could possibly be redundant.  Such extension
42    instructions show up in RTL with the pattern  :
43    (set (reg:<SWI248> x) (any_extend:<SWI248> (reg:<SWI124> x))),
44    where x can be any hard register.
45    Now, this pass tries to eliminate this instruction by merging the
46    extension with the definitions of register x.  For instance, if
47    one of the definitions of register x was  :
48    (set (reg:SI x) (plus:SI (reg:SI z1) (reg:SI z2))),
49    followed by extension  :
50    (set (reg:DI x) (zero_extend:DI (reg:SI x)))
51    then the combination converts this into :
52    (set (reg:DI x) (zero_extend:DI (plus:SI (reg:SI z1) (reg:SI z2)))).
53    If all the merged definitions are recognizable assembly instructions,
54    the extension is effectively eliminated.
55 
56    For example, for the x86-64 architecture, implicit zero-extensions
57    are captured with appropriate patterns in the i386.md file.  Hence,
58    these merged definition can be matched to a single assembly instruction.
59    The original extension instruction is then deleted if all the
60    definitions can be merged.
61 
62    However, there are cases where the definition instruction cannot be
63    merged with an extension.  Examples are CALL instructions.  In such
64    cases, the original extension is not redundant and this pass does
65    not delete it.
66 
67    Handling conditional moves :
68    ----------------------------
69 
70    Architectures like x86-64 support conditional moves whose semantics for
71    extension differ from the other instructions.  For instance, the
72    instruction *cmov ebx, eax*
73    zero-extends eax onto rax only when the move from ebx to eax happens.
74    Otherwise, eax may not be zero-extended.  Consider conditional moves as
75    RTL instructions of the form
76    (set (reg:SI x) (if_then_else (cond) (reg:SI y) (reg:SI z))).
77    This pass tries to merge an extension with a conditional move by
78    actually merging the definitions of y and z with an extension and then
79    converting the conditional move into :
80    (set (reg:DI x) (if_then_else (cond) (reg:DI y) (reg:DI z))).
81    Since registers y and z are extended, register x will also be extended
82    after the conditional move.  Note that this step has to be done
83    transitively since the definition of a conditional copy can be
84    another conditional copy.
85 
86    Motivating Example I :
87    ---------------------
88    For this program :
89    **********************************************
90    bad_code.c
91 
92    int mask[1000];
93 
94    int foo(unsigned x)
95    {
96      if (x < 10)
97        x = x * 45;
98      else
99        x = x * 78;
100      return mask[x];
101    }
102    **********************************************
103 
104    $ gcc -O2 bad_code.c
105      ........
106      400315:       b8 4e 00 00 00          mov    $0x4e,%eax
107      40031a:       0f af f8                imul   %eax,%edi
108      40031d:       89 ff                   mov    %edi,%edi - useless extension
109      40031f:       8b 04 bd 60 19 40 00    mov    0x401960(,%rdi,4),%eax
110      400326:       c3                      retq
111      ......
112      400330:       ba 2d 00 00 00          mov    $0x2d,%edx
113      400335:       0f af fa                imul   %edx,%edi
114      400338:       89 ff                   mov    %edi,%edi - useless extension
115      40033a:       8b 04 bd 60 19 40 00    mov    0x401960(,%rdi,4),%eax
116      400341:       c3                      retq
117 
118    $ gcc -O2 -free bad_code.c
119      ......
120      400315:       6b ff 4e                imul   $0x4e,%edi,%edi
121      400318:       8b 04 bd 40 19 40 00    mov    0x401940(,%rdi,4),%eax
122      40031f:       c3                      retq
123      400320:       6b ff 2d                imul   $0x2d,%edi,%edi
124      400323:       8b 04 bd 40 19 40 00    mov    0x401940(,%rdi,4),%eax
125      40032a:       c3                      retq
126 
127    Motivating Example II :
128    ---------------------
129 
130    Here is an example with a conditional move.
131 
132    For this program :
133    **********************************************
134 
135    unsigned long long foo(unsigned x , unsigned y)
136    {
137      unsigned z;
138      if (x > 100)
139        z = x + y;
140      else
141        z = x - y;
142      return (unsigned long long)(z);
143    }
144 
145    $ gcc -O2 bad_code.c
146      ............
147      400360:       8d 14 3e                lea    (%rsi,%rdi,1),%edx
148      400363:       89 f8                   mov    %edi,%eax
149      400365:       29 f0                   sub    %esi,%eax
150      400367:       83 ff 65                cmp    $0x65,%edi
151      40036a:       0f 43 c2                cmovae %edx,%eax
152      40036d:       89 c0                   mov    %eax,%eax - useless extension
153      40036f:       c3                      retq
154 
155    $ gcc -O2 -free bad_code.c
156      .............
157      400360:       89 fa                   mov    %edi,%edx
158      400362:       8d 04 3e                lea    (%rsi,%rdi,1),%eax
159      400365:       29 f2                   sub    %esi,%edx
160      400367:       83 ff 65                cmp    $0x65,%edi
161      40036a:       89 d6                   mov    %edx,%esi
162      40036c:       48 0f 42 c6             cmovb  %rsi,%rax
163      400370:       c3                      retq
164 
165   Motivating Example III :
166   ---------------------
167 
168   Here is an example with a type cast.
169 
170   For this program :
171   **********************************************
172 
173   void test(int size, unsigned char *in, unsigned char *out)
174   {
175     int i;
176     unsigned char xr, xg, xy=0;
177 
178     for (i = 0; i < size; i++) {
179       xr = *in++;
180       xg = *in++;
181       xy = (unsigned char) ((19595*xr + 38470*xg) >> 16);
182       *out++ = xy;
183     }
184   }
185 
186   $ gcc -O2 bad_code.c
187     ............
188     10:   0f b6 0e                movzbl (%rsi),%ecx
189     13:   0f b6 46 01             movzbl 0x1(%rsi),%eax
190     17:   48 83 c6 02             add    $0x2,%rsi
191     1b:   0f b6 c9                movzbl %cl,%ecx - useless extension
192     1e:   0f b6 c0                movzbl %al,%eax - useless extension
193     21:   69 c9 8b 4c 00 00       imul   $0x4c8b,%ecx,%ecx
194     27:   69 c0 46 96 00 00       imul   $0x9646,%eax,%eax
195 
196    $ gcc -O2 -free bad_code.c
197      .............
198     10:   0f b6 0e                movzbl (%rsi),%ecx
199     13:   0f b6 46 01             movzbl 0x1(%rsi),%eax
200     17:   48 83 c6 02             add    $0x2,%rsi
201     1b:   69 c9 8b 4c 00 00       imul   $0x4c8b,%ecx,%ecx
202     21:   69 c0 46 96 00 00       imul   $0x9646,%eax,%eax
203 
204    Usefulness :
205    ----------
206 
207    The original redundant zero-extension elimination pass reported reduction
208    of the dynamic instruction count of a compression benchmark by 2.8% and
209    improvement of its run time by about 1%.
210 
211    The additional performance gain with the enhanced pass is mostly expected
212    on in-order architectures where redundancy cannot be compensated by out of
213    order execution.  Measurements showed up to 10% performance gain (reduced
214    run time) on EEMBC 2.0 benchmarks on Atom processor with geomean performance
215    gain 1%.  */
216 
217 
218 #include "config.h"
219 #include "system.h"
220 #include "coretypes.h"
221 #include "tm.h"
222 #include "rtl.h"
223 #include "tree.h"
224 #include "tm_p.h"
225 #include "flags.h"
226 #include "regs.h"
227 #include "hard-reg-set.h"
228 #include "basic-block.h"
229 #include "insn-config.h"
230 #include "function.h"
231 #include "expr.h"
232 #include "insn-attr.h"
233 #include "recog.h"
234 #include "diagnostic-core.h"
235 #include "target.h"
236 #include "optabs.h"
237 #include "insn-codes.h"
238 #include "rtlhooks-def.h"
239 #include "params.h"
240 #include "tree-pass.h"
241 #include "df.h"
242 #include "cgraph.h"
243 
244 /* This structure represents a candidate for elimination.  */
245 
246 typedef struct ext_cand
247 {
248   /* The expression.  */
249   const_rtx expr;
250 
251   /* The kind of extension.  */
252   enum rtx_code code;
253 
254   /* The destination mode.  */
255   enum machine_mode mode;
256 
257   /* The instruction where it lives.  */
258   rtx insn;
259 } ext_cand;
260 
261 
262 static int max_insn_uid;
263 
264 /* Update or remove REG_EQUAL or REG_EQUIV notes for INSN.  */
265 
266 static bool
267 update_reg_equal_equiv_notes (rtx insn, enum machine_mode new_mode,
268 			      enum machine_mode old_mode, enum rtx_code code)
269 {
270   rtx *loc = &REG_NOTES (insn);
271   while (*loc)
272     {
273       enum reg_note kind = REG_NOTE_KIND (*loc);
274       if (kind == REG_EQUAL || kind == REG_EQUIV)
275 	{
276 	  rtx orig_src = XEXP (*loc, 0);
277 	  /* Update equivalency constants.  Recall that RTL constants are
278 	     sign-extended.  */
279 	  if (GET_CODE (orig_src) == CONST_INT
280 	      && HOST_BITS_PER_WIDE_INT >= GET_MODE_BITSIZE (new_mode))
281 	    {
282 	      if (INTVAL (orig_src) >= 0 || code == SIGN_EXTEND)
283 		/* Nothing needed.  */;
284 	      else
285 		{
286 		  /* Zero-extend the negative constant by masking out the
287 		     bits outside the source mode.  */
288 		  rtx new_const_int
289 		    = gen_int_mode (INTVAL (orig_src)
290 				    & GET_MODE_MASK (old_mode),
291 				    new_mode);
292 		  if (!validate_change (insn, &XEXP (*loc, 0),
293 					new_const_int, true))
294 		    return false;
295 		}
296 	      loc = &XEXP (*loc, 1);
297 	    }
298 	  /* Drop all other notes, they assume a wrong mode.  */
299 	  else if (!validate_change (insn, loc, XEXP (*loc, 1), true))
300 	    return false;
301 	}
302       else
303 	loc = &XEXP (*loc, 1);
304     }
305   return true;
306 }
307 
308 /* Given a insn (CURR_INSN), an extension candidate for removal (CAND)
309    and a pointer to the SET rtx (ORIG_SET) that needs to be modified,
310    this code modifies the SET rtx to a new SET rtx that extends the
311    right hand expression into a register on the left hand side.  Note
312    that multiple assumptions are made about the nature of the set that
313    needs to be true for this to work and is called from merge_def_and_ext.
314 
315    Original :
316    (set (reg a) (expression))
317 
318    Transform :
319    (set (reg a) (any_extend (expression)))
320 
321    Special Cases :
322    If the expression is a constant or another extension, then directly
323    assign it to the register.  */
324 
325 static bool
326 combine_set_extension (ext_cand *cand, rtx curr_insn, rtx *orig_set)
327 {
328   rtx orig_src = SET_SRC (*orig_set);
329   enum machine_mode orig_mode = GET_MODE (SET_DEST (*orig_set));
330   rtx new_reg = gen_rtx_REG (cand->mode, REGNO (SET_DEST (*orig_set)));
331   rtx new_set;
332 
333   /* Merge constants by directly moving the constant into the register under
334      some conditions.  Recall that RTL constants are sign-extended.  */
335   if (GET_CODE (orig_src) == CONST_INT
336       && HOST_BITS_PER_WIDE_INT >= GET_MODE_BITSIZE (cand->mode))
337     {
338       if (INTVAL (orig_src) >= 0 || cand->code == SIGN_EXTEND)
339 	new_set = gen_rtx_SET (VOIDmode, new_reg, orig_src);
340       else
341 	{
342 	  /* Zero-extend the negative constant by masking out the bits outside
343 	     the source mode.  */
344 	  rtx new_const_int
345 	    = GEN_INT (INTVAL (orig_src) & GET_MODE_MASK (orig_mode));
346 	  new_set = gen_rtx_SET (VOIDmode, new_reg, new_const_int);
347 	}
348     }
349   else if (GET_MODE (orig_src) == VOIDmode)
350     {
351       /* This is mostly due to a call insn that should not be optimized.  */
352       return false;
353     }
354   else if (GET_CODE (orig_src) == cand->code)
355     {
356       /* Here is a sequence of two extensions.  Try to merge them.  */
357       rtx temp_extension
358 	= gen_rtx_fmt_e (cand->code, cand->mode, XEXP (orig_src, 0));
359       rtx simplified_temp_extension = simplify_rtx (temp_extension);
360       if (simplified_temp_extension)
361         temp_extension = simplified_temp_extension;
362       new_set = gen_rtx_SET (VOIDmode, new_reg, temp_extension);
363     }
364   else if (GET_CODE (orig_src) == IF_THEN_ELSE)
365     {
366       /* Only IF_THEN_ELSE of phi-type copies are combined.  Otherwise,
367          in general, IF_THEN_ELSE should not be combined.  */
368       return false;
369     }
370   else
371     {
372       /* This is the normal case.  */
373       rtx temp_extension
374 	= gen_rtx_fmt_e (cand->code, cand->mode, orig_src);
375       rtx simplified_temp_extension = simplify_rtx (temp_extension);
376       if (simplified_temp_extension)
377         temp_extension = simplified_temp_extension;
378       new_set = gen_rtx_SET (VOIDmode, new_reg, temp_extension);
379     }
380 
381   /* This change is a part of a group of changes.  Hence,
382      validate_change will not try to commit the change.  */
383   if (validate_change (curr_insn, orig_set, new_set, true)
384       && update_reg_equal_equiv_notes (curr_insn, cand->mode, orig_mode,
385 				       cand->code))
386     {
387       if (dump_file)
388         {
389           fprintf (dump_file,
390 		   "Tentatively merged extension with definition:\n");
391           print_rtl_single (dump_file, curr_insn);
392         }
393       return true;
394     }
395 
396   return false;
397 }
398 
399 /* Treat if_then_else insns, where the operands of both branches
400    are registers, as copies.  For instance,
401    Original :
402    (set (reg:SI a) (if_then_else (cond) (reg:SI b) (reg:SI c)))
403    Transformed :
404    (set (reg:DI a) (if_then_else (cond) (reg:DI b) (reg:DI c)))
405    DEF_INSN is the if_then_else insn.  */
406 
407 static bool
408 transform_ifelse (ext_cand *cand, rtx def_insn)
409 {
410   rtx set_insn = PATTERN (def_insn);
411   rtx srcreg, dstreg, srcreg2;
412   rtx map_srcreg, map_dstreg, map_srcreg2;
413   rtx ifexpr;
414   rtx cond;
415   rtx new_set;
416 
417   gcc_assert (GET_CODE (set_insn) == SET);
418 
419   cond = XEXP (SET_SRC (set_insn), 0);
420   dstreg = SET_DEST (set_insn);
421   srcreg = XEXP (SET_SRC (set_insn), 1);
422   srcreg2 = XEXP (SET_SRC (set_insn), 2);
423   /* If the conditional move already has the right or wider mode,
424      there is nothing to do.  */
425   if (GET_MODE_SIZE (GET_MODE (dstreg)) >= GET_MODE_SIZE (cand->mode))
426     return true;
427 
428   map_srcreg = gen_rtx_REG (cand->mode, REGNO (srcreg));
429   map_srcreg2 = gen_rtx_REG (cand->mode, REGNO (srcreg2));
430   map_dstreg = gen_rtx_REG (cand->mode, REGNO (dstreg));
431   ifexpr = gen_rtx_IF_THEN_ELSE (cand->mode, cond, map_srcreg, map_srcreg2);
432   new_set = gen_rtx_SET (VOIDmode, map_dstreg, ifexpr);
433 
434   if (validate_change (def_insn, &PATTERN (def_insn), new_set, true)
435       && update_reg_equal_equiv_notes (def_insn, cand->mode, GET_MODE (dstreg),
436 				       cand->code))
437     {
438       if (dump_file)
439         {
440           fprintf (dump_file,
441 		   "Mode of conditional move instruction extended:\n");
442           print_rtl_single (dump_file, def_insn);
443         }
444       return true;
445     }
446 
447   return false;
448 }
449 
450 /* Get all the reaching definitions of an instruction.  The definitions are
451    desired for REG used in INSN.  Return the definition list or NULL if a
452    definition is missing.  If DEST is non-NULL, additionally push the INSN
453    of the definitions onto DEST.  */
454 
455 static struct df_link *
456 get_defs (rtx insn, rtx reg, vec<rtx> *dest)
457 {
458   df_ref reg_info, *uses;
459   struct df_link *ref_chain, *ref_link;
460 
461   reg_info = NULL;
462 
463   for (uses = DF_INSN_USES (insn); *uses; uses++)
464     {
465       reg_info = *uses;
466       if (GET_CODE (DF_REF_REG (reg_info)) == SUBREG)
467         return NULL;
468       if (REGNO (DF_REF_REG (reg_info)) == REGNO (reg))
469         break;
470     }
471 
472   gcc_assert (reg_info != NULL && uses != NULL);
473 
474   ref_chain = DF_REF_CHAIN (reg_info);
475 
476   for (ref_link = ref_chain; ref_link; ref_link = ref_link->next)
477     {
478       /* Problem getting some definition for this instruction.  */
479       if (ref_link->ref == NULL)
480         return NULL;
481       if (DF_REF_INSN_INFO (ref_link->ref) == NULL)
482         return NULL;
483     }
484 
485   if (dest)
486     for (ref_link = ref_chain; ref_link; ref_link = ref_link->next)
487       dest->safe_push (DF_REF_INSN (ref_link->ref));
488 
489   return ref_chain;
490 }
491 
492 /* Return true if INSN is
493      (SET (reg REGNO (def_reg)) (if_then_else (cond) (REG x1) (REG x2)))
494    and store x1 and x2 in REG_1 and REG_2.  */
495 
496 static bool
497 is_cond_copy_insn (rtx insn, rtx *reg1, rtx *reg2)
498 {
499   rtx expr = single_set (insn);
500 
501   if (expr != NULL_RTX
502       && GET_CODE (expr) == SET
503       && GET_CODE (SET_DEST (expr)) == REG
504       && GET_CODE (SET_SRC (expr))  == IF_THEN_ELSE
505       && GET_CODE (XEXP (SET_SRC (expr), 1)) == REG
506       && GET_CODE (XEXP (SET_SRC (expr), 2)) == REG)
507     {
508       *reg1 = XEXP (SET_SRC (expr), 1);
509       *reg2 = XEXP (SET_SRC (expr), 2);
510       return true;
511     }
512 
513   return false;
514 }
515 
516 enum ext_modified_kind
517 {
518   /* The insn hasn't been modified by ree pass yet.  */
519   EXT_MODIFIED_NONE,
520   /* Changed into zero extension.  */
521   EXT_MODIFIED_ZEXT,
522   /* Changed into sign extension.  */
523   EXT_MODIFIED_SEXT
524 };
525 
526 struct ATTRIBUTE_PACKED ext_modified
527 {
528   /* Mode from which ree has zero or sign extended the destination.  */
529   ENUM_BITFIELD(machine_mode) mode : 8;
530 
531   /* Kind of modification of the insn.  */
532   ENUM_BITFIELD(ext_modified_kind) kind : 2;
533 
534   /* True if the insn is scheduled to be deleted.  */
535   unsigned int deleted : 1;
536 };
537 
538 /* Vectors used by combine_reaching_defs and its helpers.  */
539 typedef struct ext_state
540 {
541   /* In order to avoid constant alloc/free, we keep these
542      4 vectors live through the entire find_and_remove_re and just
543      truncate them each time.  */
544   vec<rtx> defs_list;
545   vec<rtx> copies_list;
546   vec<rtx> modified_list;
547   vec<rtx> work_list;
548 
549   /* For instructions that have been successfully modified, this is
550      the original mode from which the insn is extending and
551      kind of extension.  */
552   struct ext_modified *modified;
553 } ext_state;
554 
555 /* Reaching Definitions of the extended register could be conditional copies
556    or regular definitions.  This function separates the two types into two
557    lists, STATE->DEFS_LIST and STATE->COPIES_LIST.  This is necessary because,
558    if a reaching definition is a conditional copy, merging the extension with
559    this definition is wrong.  Conditional copies are merged by transitively
560    merging their definitions.  The defs_list is populated with all the reaching
561    definitions of the extension instruction (EXTEND_INSN) which must be merged
562    with an extension.  The copies_list contains all the conditional moves that
563    will later be extended into a wider mode conditional move if all the merges
564    are successful.  The function returns false upon failure, true upon
565    success.  */
566 
567 static bool
568 make_defs_and_copies_lists (rtx extend_insn, const_rtx set_pat,
569 			    ext_state *state)
570 {
571   rtx src_reg = XEXP (SET_SRC (set_pat), 0);
572   bool *is_insn_visited;
573   bool ret = true;
574 
575   state->work_list.truncate (0);
576 
577   /* Initialize the work list.  */
578   if (!get_defs (extend_insn, src_reg, &state->work_list))
579     gcc_unreachable ();
580 
581   is_insn_visited = XCNEWVEC (bool, max_insn_uid);
582 
583   /* Perform transitive closure for conditional copies.  */
584   while (!state->work_list.is_empty ())
585     {
586       rtx def_insn = state->work_list.pop ();
587       rtx reg1, reg2;
588 
589       gcc_assert (INSN_UID (def_insn) < max_insn_uid);
590 
591       if (is_insn_visited[INSN_UID (def_insn)])
592 	continue;
593       is_insn_visited[INSN_UID (def_insn)] = true;
594 
595       if (is_cond_copy_insn (def_insn, &reg1, &reg2))
596 	{
597 	  /* Push it onto the copy list first.  */
598 	  state->copies_list.safe_push (def_insn);
599 
600 	  /* Now perform the transitive closure.  */
601 	  if (!get_defs (def_insn, reg1, &state->work_list)
602 	      || !get_defs (def_insn, reg2, &state->work_list))
603 	    {
604 	      ret = false;
605 	      break;
606 	    }
607         }
608       else
609 	state->defs_list.safe_push (def_insn);
610     }
611 
612   XDELETEVEC (is_insn_visited);
613 
614   return ret;
615 }
616 
617 /* Merge the DEF_INSN with an extension.  Calls combine_set_extension
618    on the SET pattern.  */
619 
620 static bool
621 merge_def_and_ext (ext_cand *cand, rtx def_insn, ext_state *state)
622 {
623   enum machine_mode ext_src_mode;
624   enum rtx_code code;
625   rtx *sub_rtx;
626   rtx s_expr;
627   int i;
628 
629   ext_src_mode = GET_MODE (XEXP (SET_SRC (cand->expr), 0));
630   code = GET_CODE (PATTERN (def_insn));
631   sub_rtx = NULL;
632 
633   if (code == PARALLEL)
634     {
635       for (i = 0; i < XVECLEN (PATTERN (def_insn), 0); i++)
636         {
637           s_expr = XVECEXP (PATTERN (def_insn), 0, i);
638           if (GET_CODE (s_expr) != SET)
639             continue;
640 
641           if (sub_rtx == NULL)
642             sub_rtx = &XVECEXP (PATTERN (def_insn), 0, i);
643           else
644             {
645               /* PARALLEL with multiple SETs.  */
646               return false;
647             }
648         }
649     }
650   else if (code == SET)
651     sub_rtx = &PATTERN (def_insn);
652   else
653     {
654       /* It is not a PARALLEL or a SET, what could it be ? */
655       return false;
656     }
657 
658   gcc_assert (sub_rtx != NULL);
659 
660   if (REG_P (SET_DEST (*sub_rtx))
661       && (GET_MODE (SET_DEST (*sub_rtx)) == ext_src_mode
662 	  || ((state->modified[INSN_UID (def_insn)].kind
663 	       == (cand->code == ZERO_EXTEND
664 		   ? EXT_MODIFIED_ZEXT : EXT_MODIFIED_SEXT))
665 	      && state->modified[INSN_UID (def_insn)].mode
666 		 == ext_src_mode)))
667     {
668       if (GET_MODE_SIZE (GET_MODE (SET_DEST (*sub_rtx)))
669 	  >= GET_MODE_SIZE (cand->mode))
670 	return true;
671       /* If def_insn is already scheduled to be deleted, don't attempt
672 	 to modify it.  */
673       if (state->modified[INSN_UID (def_insn)].deleted)
674 	return false;
675       if (combine_set_extension (cand, def_insn, sub_rtx))
676 	{
677 	  if (state->modified[INSN_UID (def_insn)].kind == EXT_MODIFIED_NONE)
678 	    state->modified[INSN_UID (def_insn)].mode = ext_src_mode;
679 	  return true;
680 	}
681     }
682 
683   return false;
684 }
685 
686 /* This function goes through all reaching defs of the source
687    of the candidate for elimination (CAND) and tries to combine
688    the extension with the definition instruction.  The changes
689    are made as a group so that even if one definition cannot be
690    merged, all reaching definitions end up not being merged.
691    When a conditional copy is encountered, merging is attempted
692    transitively on its definitions.  It returns true upon success
693    and false upon failure.  */
694 
695 static bool
696 combine_reaching_defs (ext_cand *cand, const_rtx set_pat, ext_state *state)
697 {
698   rtx def_insn;
699   bool merge_successful = true;
700   int i;
701   int defs_ix;
702   bool outcome;
703 
704   state->defs_list.truncate (0);
705   state->copies_list.truncate (0);
706 
707   outcome = make_defs_and_copies_lists (cand->insn, set_pat, state);
708 
709   if (!outcome)
710     return false;
711 
712   /* If cand->insn has been already modified, update cand->mode to a wider
713      mode if possible, or punt.  */
714   if (state->modified[INSN_UID (cand->insn)].kind != EXT_MODIFIED_NONE)
715     {
716       enum machine_mode mode;
717       rtx set;
718 
719       if (state->modified[INSN_UID (cand->insn)].kind
720 	  != (cand->code == ZERO_EXTEND
721 	      ? EXT_MODIFIED_ZEXT : EXT_MODIFIED_SEXT)
722 	  || state->modified[INSN_UID (cand->insn)].mode != cand->mode
723 	  || (set = single_set (cand->insn)) == NULL_RTX)
724 	return false;
725       mode = GET_MODE (SET_DEST (set));
726       gcc_assert (GET_MODE_SIZE (mode) >= GET_MODE_SIZE (cand->mode));
727       cand->mode = mode;
728     }
729 
730   merge_successful = true;
731 
732   /* Go through the defs vector and try to merge all the definitions
733      in this vector.  */
734   state->modified_list.truncate (0);
735   FOR_EACH_VEC_ELT (state->defs_list, defs_ix, def_insn)
736     {
737       if (merge_def_and_ext (cand, def_insn, state))
738 	state->modified_list.safe_push (def_insn);
739       else
740         {
741           merge_successful = false;
742           break;
743         }
744     }
745 
746   /* Now go through the conditional copies vector and try to merge all
747      the copies in this vector.  */
748   if (merge_successful)
749     {
750       FOR_EACH_VEC_ELT (state->copies_list, i, def_insn)
751         {
752           if (transform_ifelse (cand, def_insn))
753 	    state->modified_list.safe_push (def_insn);
754           else
755             {
756               merge_successful = false;
757               break;
758             }
759         }
760     }
761 
762   if (merge_successful)
763     {
764       /* Commit the changes here if possible
765 	 FIXME: It's an all-or-nothing scenario.  Even if only one definition
766 	 cannot be merged, we entirely give up.  In the future, we should allow
767 	 extensions to be partially eliminated along those paths where the
768 	 definitions could be merged.  */
769       if (apply_change_group ())
770         {
771           if (dump_file)
772             fprintf (dump_file, "All merges were successful.\n");
773 
774 	  FOR_EACH_VEC_ELT (state->modified_list, i, def_insn)
775 	    if (state->modified[INSN_UID (def_insn)].kind == EXT_MODIFIED_NONE)
776 	      state->modified[INSN_UID (def_insn)].kind
777 		= (cand->code == ZERO_EXTEND
778 		   ? EXT_MODIFIED_ZEXT : EXT_MODIFIED_SEXT);
779 
780           return true;
781         }
782       else
783         {
784           /* Changes need not be cancelled explicitly as apply_change_group
785              does it.  Print list of definitions in the dump_file for debug
786              purposes.  This extension cannot be deleted.  */
787           if (dump_file)
788             {
789 	      fprintf (dump_file,
790 		       "Merge cancelled, non-mergeable definitions:\n");
791 	      FOR_EACH_VEC_ELT (state->modified_list, i, def_insn)
792 	        print_rtl_single (dump_file, def_insn);
793             }
794         }
795     }
796   else
797     {
798       /* Cancel any changes that have been made so far.  */
799       cancel_changes (0);
800     }
801 
802   return false;
803 }
804 
805 /* Add an extension pattern that could be eliminated.  */
806 
807 static void
808 add_removable_extension (const_rtx expr, rtx insn,
809 			 vec<ext_cand> *insn_list,
810 			 unsigned *def_map)
811 {
812   enum rtx_code code;
813   enum machine_mode mode;
814   unsigned int idx;
815   rtx src, dest;
816 
817   /* We are looking for SET (REG N) (ANY_EXTEND (REG N)).  */
818   if (GET_CODE (expr) != SET)
819     return;
820 
821   src = SET_SRC (expr);
822   code = GET_CODE (src);
823   dest = SET_DEST (expr);
824   mode = GET_MODE (dest);
825 
826   if (REG_P (dest)
827       && (code == SIGN_EXTEND || code == ZERO_EXTEND)
828       && REG_P (XEXP (src, 0))
829       && REGNO (dest) == REGNO (XEXP (src, 0)))
830     {
831       struct df_link *defs, *def;
832       ext_cand *cand;
833 
834       /* First, make sure we can get all the reaching definitions.  */
835       defs = get_defs (insn, XEXP (src, 0), NULL);
836       if (!defs)
837 	{
838 	  if (dump_file)
839 	    {
840 	      fprintf (dump_file, "Cannot eliminate extension:\n");
841 	      print_rtl_single (dump_file, insn);
842 	      fprintf (dump_file, " because of missing definition(s)\n");
843 	    }
844 	  return;
845 	}
846 
847       /* Second, make sure the reaching definitions don't feed another and
848 	 different extension.  FIXME: this obviously can be improved.  */
849       for (def = defs; def; def = def->next)
850 	if ((idx = def_map[INSN_UID(DF_REF_INSN (def->ref))])
851 	    && (cand = &(*insn_list)[idx - 1])
852 	    && cand->code != code)
853 	  {
854 	    if (dump_file)
855 	      {
856 	        fprintf (dump_file, "Cannot eliminate extension:\n");
857 		print_rtl_single (dump_file, insn);
858 	        fprintf (dump_file, " because of other extension\n");
859 	      }
860 	    return;
861 	  }
862 
863       /* Then add the candidate to the list and insert the reaching definitions
864          into the definition map.  */
865       ext_cand e = {expr, code, mode, insn};
866       insn_list->safe_push (e);
867       idx = insn_list->length ();
868 
869       for (def = defs; def; def = def->next)
870 	def_map[INSN_UID(DF_REF_INSN (def->ref))] = idx;
871     }
872 }
873 
874 /* Traverse the instruction stream looking for extensions and return the
875    list of candidates.  */
876 
877 static vec<ext_cand>
878 find_removable_extensions (void)
879 {
880   vec<ext_cand> insn_list = vNULL;
881   basic_block bb;
882   rtx insn, set;
883   unsigned *def_map = XCNEWVEC (unsigned, max_insn_uid);
884 
885   FOR_EACH_BB (bb)
886     FOR_BB_INSNS (bb, insn)
887       {
888 	if (!NONDEBUG_INSN_P (insn))
889 	  continue;
890 
891 	set = single_set (insn);
892 	if (set == NULL_RTX)
893 	  continue;
894 	add_removable_extension (set, insn, &insn_list, def_map);
895       }
896 
897   XDELETEVEC (def_map);
898 
899   return insn_list;
900 }
901 
902 /* This is the main function that checks the insn stream for redundant
903    extensions and tries to remove them if possible.  */
904 
905 static void
906 find_and_remove_re (void)
907 {
908   ext_cand *curr_cand;
909   rtx curr_insn = NULL_RTX;
910   int num_re_opportunities = 0, num_realized = 0, i;
911   vec<ext_cand> reinsn_list;
912   vec<rtx> reinsn_del_list;
913   ext_state state;
914 
915   /* Construct DU chain to get all reaching definitions of each
916      extension instruction.  */
917   df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
918   df_chain_add_problem (DF_UD_CHAIN + DF_DU_CHAIN);
919   df_analyze ();
920   df_set_flags (DF_DEFER_INSN_RESCAN);
921 
922   max_insn_uid = get_max_uid ();
923   reinsn_del_list.create (0);
924   reinsn_list = find_removable_extensions ();
925   state.defs_list.create (0);
926   state.copies_list.create (0);
927   state.modified_list.create (0);
928   state.work_list.create (0);
929   if (reinsn_list.is_empty ())
930     state.modified = NULL;
931   else
932     state.modified = XCNEWVEC (struct ext_modified, max_insn_uid);
933 
934   FOR_EACH_VEC_ELT (reinsn_list, i, curr_cand)
935     {
936       num_re_opportunities++;
937 
938       /* Try to combine the extension with the definition.  */
939       if (dump_file)
940         {
941           fprintf (dump_file, "Trying to eliminate extension:\n");
942           print_rtl_single (dump_file, curr_cand->insn);
943         }
944 
945       if (combine_reaching_defs (curr_cand, curr_cand->expr, &state))
946         {
947           if (dump_file)
948             fprintf (dump_file, "Eliminated the extension.\n");
949           num_realized++;
950           reinsn_del_list.safe_push (curr_cand->insn);
951 	  state.modified[INSN_UID (curr_cand->insn)].deleted = 1;
952         }
953     }
954 
955   /* Delete all useless extensions here in one sweep.  */
956   FOR_EACH_VEC_ELT (reinsn_del_list, i, curr_insn)
957     delete_insn (curr_insn);
958 
959   reinsn_list.release ();
960   reinsn_del_list.release ();
961   state.defs_list.release ();
962   state.copies_list.release ();
963   state.modified_list.release ();
964   state.work_list.release ();
965   XDELETEVEC (state.modified);
966 
967   if (dump_file && num_re_opportunities > 0)
968     fprintf (dump_file, "Elimination opportunities = %d realized = %d\n",
969 	     num_re_opportunities, num_realized);
970 
971   df_finish_pass (false);
972 }
973 
974 /* Find and remove redundant extensions.  */
975 
976 static unsigned int
977 rest_of_handle_ree (void)
978 {
979   timevar_push (TV_REE);
980   find_and_remove_re ();
981   timevar_pop (TV_REE);
982   return 0;
983 }
984 
985 /* Run REE pass when flag_ree is set at optimization level > 0.  */
986 
987 static bool
988 gate_handle_ree (void)
989 {
990   return (optimize > 0 && flag_ree);
991 }
992 
993 struct rtl_opt_pass pass_ree =
994 {
995  {
996   RTL_PASS,
997   "ree",                                /* name */
998   OPTGROUP_NONE,                        /* optinfo_flags */
999   gate_handle_ree,                      /* gate */
1000   rest_of_handle_ree,                   /* execute */
1001   NULL,                                 /* sub */
1002   NULL,                                 /* next */
1003   0,                                    /* static_pass_number */
1004   TV_REE,                               /* tv_id */
1005   0,                                    /* properties_required */
1006   0,                                    /* properties_provided */
1007   0,                                    /* properties_destroyed */
1008   0,                                    /* todo_flags_start */
1009   TODO_ggc_collect |
1010   TODO_verify_rtl_sharing,              /* todo_flags_finish */
1011  }
1012 };
1013