xref: /netbsd-src/external/gpl3/gcc.old/dist/gcc/config/bfin/bfin.c (revision bdc22b2e01993381dcefeff2bc9b56ca75a4235c)
1 /* The Blackfin code generation auxiliary output file.
2    Copyright (C) 2005-2015 Free Software Foundation, Inc.
3    Contributed by Analog Devices.
4 
5    This file is part of GCC.
6 
7    GCC is free software; you can redistribute it and/or modify it
8    under the terms of the GNU General Public License as published
9    by the Free Software Foundation; either version 3, or (at your
10    option) any later version.
11 
12    GCC is distributed in the hope that it will be useful, but WITHOUT
13    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
15    License for more details.
16 
17    You should have received a copy of the GNU General Public License
18    along with GCC; see the file COPYING3.  If not see
19    <http://www.gnu.org/licenses/>.  */
20 
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "rtl.h"
26 #include "regs.h"
27 #include "hard-reg-set.h"
28 #include "insn-config.h"
29 #include "insn-codes.h"
30 #include "conditions.h"
31 #include "insn-flags.h"
32 #include "output.h"
33 #include "insn-attr.h"
34 #include "hash-set.h"
35 #include "machmode.h"
36 #include "vec.h"
37 #include "double-int.h"
38 #include "input.h"
39 #include "alias.h"
40 #include "symtab.h"
41 #include "wide-int.h"
42 #include "inchash.h"
43 #include "tree.h"
44 #include "fold-const.h"
45 #include "varasm.h"
46 #include "calls.h"
47 #include "flags.h"
48 #include "except.h"
49 #include "function.h"
50 #include "target.h"
51 #include "target-def.h"
52 #include "hashtab.h"
53 #include "statistics.h"
54 #include "real.h"
55 #include "fixed-value.h"
56 #include "expmed.h"
57 #include "dojump.h"
58 #include "explow.h"
59 #include "emit-rtl.h"
60 #include "stmt.h"
61 #include "expr.h"
62 #include "diagnostic-core.h"
63 #include "recog.h"
64 #include "optabs.h"
65 #include "ggc.h"
66 #include "predict.h"
67 #include "dominance.h"
68 #include "cfg.h"
69 #include "cfgrtl.h"
70 #include "cfganal.h"
71 #include "lcm.h"
72 #include "cfgbuild.h"
73 #include "cfgcleanup.h"
74 #include "basic-block.h"
75 #include "hash-map.h"
76 #include "is-a.h"
77 #include "plugin-api.h"
78 #include "ipa-ref.h"
79 #include "cgraph.h"
80 #include "langhooks.h"
81 #include "bfin-protos.h"
82 #include "tm_p.h"
83 #include "tm-preds.h"
84 #include "tm-constrs.h"
85 #include "gt-bfin.h"
86 #include "timevar.h"
87 #include "df.h"
88 #include "sel-sched.h"
89 #include "hw-doloop.h"
90 #include "opts.h"
91 #include "dumpfile.h"
92 #include "builtins.h"
93 
94 /* A C structure for machine-specific, per-function data.
95    This is added to the cfun structure.  */
96 struct GTY(()) machine_function
97 {
98   /* Set if we are notified by the doloop pass that a hardware loop
99      was created.  */
100   int has_hardware_loops;
101 
102   /* Set if we create a memcpy pattern that uses loop registers.  */
103   int has_loopreg_clobber;
104 };
105 
106 /* RTX for condition code flag register and RETS register */
107 extern GTY(()) rtx bfin_cc_rtx;
108 extern GTY(()) rtx bfin_rets_rtx;
109 rtx bfin_cc_rtx, bfin_rets_rtx;
110 
111 int max_arg_registers = 0;
112 
113 /* Arrays used when emitting register names.  */
114 const char *short_reg_names[]  =  SHORT_REGISTER_NAMES;
115 const char *high_reg_names[]   =  HIGH_REGISTER_NAMES;
116 const char *dregs_pair_names[] =  DREGS_PAIR_NAMES;
117 const char *byte_reg_names[]   =  BYTE_REGISTER_NAMES;
118 
119 static int arg_regs[] = FUNCTION_ARG_REGISTERS;
120 static int ret_regs[] = FUNCTION_RETURN_REGISTERS;
121 
122 int splitting_for_sched, splitting_loops;
123 
124 static void
125 bfin_globalize_label (FILE *stream, const char *name)
126 {
127   fputs (".global ", stream);
128   assemble_name (stream, name);
129   fputc (';',stream);
130   fputc ('\n',stream);
131 }
132 
133 static void
134 output_file_start (void)
135 {
136   FILE *file = asm_out_file;
137   int i;
138 
139   fprintf (file, ".file \"%s\";\n", LOCATION_FILE (input_location));
140 
141   for (i = 0; arg_regs[i] >= 0; i++)
142     ;
143   max_arg_registers = i;	/* how many arg reg used  */
144 }
145 
146 /* Examine machine-dependent attributes of function type FUNTYPE and return its
147    type.  See the definition of E_FUNKIND.  */
148 
149 static e_funkind
150 funkind (const_tree funtype)
151 {
152   tree attrs = TYPE_ATTRIBUTES (funtype);
153   if (lookup_attribute ("interrupt_handler", attrs))
154     return INTERRUPT_HANDLER;
155   else if (lookup_attribute ("exception_handler", attrs))
156     return EXCPT_HANDLER;
157   else if (lookup_attribute ("nmi_handler", attrs))
158     return NMI_HANDLER;
159   else
160     return SUBROUTINE;
161 }
162 
163 /* Legitimize PIC addresses.  If the address is already position-independent,
164    we return ORIG.  Newly generated position-independent addresses go into a
165    reg.  This is REG if nonzero, otherwise we allocate register(s) as
166    necessary.  PICREG is the register holding the pointer to the PIC offset
167    table.  */
168 
169 static rtx
170 legitimize_pic_address (rtx orig, rtx reg, rtx picreg)
171 {
172   rtx addr = orig;
173   rtx new_rtx = orig;
174 
175   if (GET_CODE (addr) == SYMBOL_REF || GET_CODE (addr) == LABEL_REF)
176     {
177       int unspec;
178       rtx tmp;
179 
180       if (TARGET_ID_SHARED_LIBRARY)
181 	unspec = UNSPEC_MOVE_PIC;
182       else if (GET_CODE (addr) == SYMBOL_REF
183 	       && SYMBOL_REF_FUNCTION_P (addr))
184 	unspec = UNSPEC_FUNCDESC_GOT17M4;
185       else
186 	unspec = UNSPEC_MOVE_FDPIC;
187 
188       if (reg == 0)
189 	{
190 	  gcc_assert (can_create_pseudo_p ());
191 	  reg = gen_reg_rtx (Pmode);
192 	}
193 
194       tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), unspec);
195       new_rtx = gen_const_mem (Pmode, gen_rtx_PLUS (Pmode, picreg, tmp));
196 
197       emit_move_insn (reg, new_rtx);
198       if (picreg == pic_offset_table_rtx)
199 	crtl->uses_pic_offset_table = 1;
200       return reg;
201     }
202 
203   else if (GET_CODE (addr) == CONST || GET_CODE (addr) == PLUS)
204     {
205       rtx base;
206 
207       if (GET_CODE (addr) == CONST)
208 	{
209 	  addr = XEXP (addr, 0);
210 	  gcc_assert (GET_CODE (addr) == PLUS);
211 	}
212 
213       if (XEXP (addr, 0) == picreg)
214 	return orig;
215 
216       if (reg == 0)
217 	{
218 	  gcc_assert (can_create_pseudo_p ());
219 	  reg = gen_reg_rtx (Pmode);
220 	}
221 
222       base = legitimize_pic_address (XEXP (addr, 0), reg, picreg);
223       addr = legitimize_pic_address (XEXP (addr, 1),
224 				     base == reg ? NULL_RTX : reg,
225 				     picreg);
226 
227       if (GET_CODE (addr) == CONST_INT)
228 	{
229 	  gcc_assert (! reload_in_progress && ! reload_completed);
230 	  addr = force_reg (Pmode, addr);
231 	}
232 
233       if (GET_CODE (addr) == PLUS && CONSTANT_P (XEXP (addr, 1)))
234 	{
235 	  base = gen_rtx_PLUS (Pmode, base, XEXP (addr, 0));
236 	  addr = XEXP (addr, 1);
237 	}
238 
239       return gen_rtx_PLUS (Pmode, base, addr);
240     }
241 
242   return new_rtx;
243 }
244 
245 /* Stack frame layout. */
246 
247 /* For a given REGNO, determine whether it must be saved in the function
248    prologue.  IS_INTHANDLER specifies whether we're generating a normal
249    prologue or an interrupt/exception one.  */
250 static bool
251 must_save_p (bool is_inthandler, unsigned regno)
252 {
253   if (D_REGNO_P (regno))
254     {
255       bool is_eh_return_reg = false;
256       if (crtl->calls_eh_return)
257 	{
258 	  unsigned j;
259 	  for (j = 0; ; j++)
260 	    {
261 	      unsigned test = EH_RETURN_DATA_REGNO (j);
262 	      if (test == INVALID_REGNUM)
263 		break;
264 	      if (test == regno)
265 		is_eh_return_reg = true;
266 	    }
267 	}
268 
269       return (is_eh_return_reg
270 	      || (df_regs_ever_live_p (regno)
271 		  && !fixed_regs[regno]
272 		  && (is_inthandler || !call_used_regs[regno])));
273     }
274   else if (P_REGNO_P (regno))
275     {
276       return ((df_regs_ever_live_p (regno)
277 	       && !fixed_regs[regno]
278 	       && (is_inthandler || !call_used_regs[regno]))
279 	      || (is_inthandler
280 		  && (ENABLE_WA_05000283 || ENABLE_WA_05000315)
281 		  && regno == REG_P5)
282 	      || (!TARGET_FDPIC
283 		  && regno == PIC_OFFSET_TABLE_REGNUM
284 		  && (crtl->uses_pic_offset_table
285 		      || (TARGET_ID_SHARED_LIBRARY && !crtl->is_leaf))));
286     }
287   else
288     return ((is_inthandler || !call_used_regs[regno])
289 	    && (df_regs_ever_live_p (regno)
290 		|| (!leaf_function_p () && call_used_regs[regno])));
291 
292 }
293 
294 /* Compute the number of DREGS to save with a push_multiple operation.
295    This could include registers that aren't modified in the function,
296    since push_multiple only takes a range of registers.
297    If IS_INTHANDLER, then everything that is live must be saved, even
298    if normally call-clobbered.
299    If CONSECUTIVE, return the number of registers we can save in one
300    instruction with a push/pop multiple instruction.  */
301 
302 static int
303 n_dregs_to_save (bool is_inthandler, bool consecutive)
304 {
305   int count = 0;
306   unsigned i;
307 
308   for (i = REG_R7 + 1; i-- != REG_R0;)
309     {
310       if (must_save_p (is_inthandler, i))
311 	count++;
312       else if (consecutive)
313 	return count;
314     }
315   return count;
316 }
317 
318 /* Like n_dregs_to_save, but compute number of PREGS to save.  */
319 
320 static int
321 n_pregs_to_save (bool is_inthandler, bool consecutive)
322 {
323   int count = 0;
324   unsigned i;
325 
326   for (i = REG_P5 + 1; i-- != REG_P0;)
327     if (must_save_p (is_inthandler, i))
328       count++;
329     else if (consecutive)
330       return count;
331   return count;
332 }
333 
334 /* Determine if we are going to save the frame pointer in the prologue.  */
335 
336 static bool
337 must_save_fp_p (void)
338 {
339   return df_regs_ever_live_p (REG_FP);
340 }
341 
342 /* Determine if we are going to save the RETS register.  */
343 static bool
344 must_save_rets_p (void)
345 {
346   return df_regs_ever_live_p (REG_RETS);
347 }
348 
349 static bool
350 stack_frame_needed_p (void)
351 {
352   /* EH return puts a new return address into the frame using an
353      address relative to the frame pointer.  */
354   if (crtl->calls_eh_return)
355     return true;
356   return frame_pointer_needed;
357 }
358 
359 /* Emit code to save registers in the prologue.  SAVEALL is nonzero if we
360    must save all registers; this is used for interrupt handlers.
361    SPREG contains (reg:SI REG_SP).  IS_INTHANDLER is true if we're doing
362    this for an interrupt (or exception) handler.  */
363 
364 static void
365 expand_prologue_reg_save (rtx spreg, int saveall, bool is_inthandler)
366 {
367   rtx predec1 = gen_rtx_PRE_DEC (SImode, spreg);
368   rtx predec = gen_rtx_MEM (SImode, predec1);
369   int ndregs = saveall ? 8 : n_dregs_to_save (is_inthandler, false);
370   int npregs = saveall ? 6 : n_pregs_to_save (is_inthandler, false);
371   int ndregs_consec = saveall ? 8 : n_dregs_to_save (is_inthandler, true);
372   int npregs_consec = saveall ? 6 : n_pregs_to_save (is_inthandler, true);
373   int dregno, pregno;
374   int total_consec = ndregs_consec + npregs_consec;
375   int i, d_to_save;
376 
377   if (saveall || is_inthandler)
378     {
379       rtx_insn *insn = emit_move_insn (predec, gen_rtx_REG (SImode, REG_ASTAT));
380 
381       RTX_FRAME_RELATED_P (insn) = 1;
382       for (dregno = REG_LT0; dregno <= REG_LB1; dregno++)
383 	if (! crtl->is_leaf
384 	    || cfun->machine->has_hardware_loops
385 	    || cfun->machine->has_loopreg_clobber
386 	    || (ENABLE_WA_05000257
387 		&& (dregno == REG_LC0 || dregno == REG_LC1)))
388 	  {
389 	    insn = emit_move_insn (predec, gen_rtx_REG (SImode, dregno));
390 	    RTX_FRAME_RELATED_P (insn) = 1;
391 	  }
392     }
393 
394   if (total_consec != 0)
395     {
396       rtx_insn *insn;
397       rtx val = GEN_INT (-total_consec * 4);
398       rtx pat = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (total_consec + 2));
399 
400       XVECEXP (pat, 0, 0) = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, val),
401 					    UNSPEC_PUSH_MULTIPLE);
402       XVECEXP (pat, 0, total_consec + 1) = gen_rtx_SET (VOIDmode, spreg,
403 							gen_rtx_PLUS (Pmode,
404 								      spreg,
405 								      val));
406       RTX_FRAME_RELATED_P (XVECEXP (pat, 0, total_consec + 1)) = 1;
407       d_to_save = ndregs_consec;
408       dregno = REG_R7 + 1 - ndregs_consec;
409       pregno = REG_P5 + 1 - npregs_consec;
410       for (i = 0; i < total_consec; i++)
411 	{
412 	  rtx memref = gen_rtx_MEM (word_mode,
413 				    gen_rtx_PLUS (Pmode, spreg,
414 						  GEN_INT (- i * 4 - 4)));
415 	  rtx subpat;
416 	  if (d_to_save > 0)
417 	    {
418 	      subpat = gen_rtx_SET (VOIDmode, memref, gen_rtx_REG (word_mode,
419 								   dregno++));
420 	      d_to_save--;
421 	    }
422 	  else
423 	    {
424 	      subpat = gen_rtx_SET (VOIDmode, memref, gen_rtx_REG (word_mode,
425 								   pregno++));
426 	    }
427 	  XVECEXP (pat, 0, i + 1) = subpat;
428 	  RTX_FRAME_RELATED_P (subpat) = 1;
429 	}
430       insn = emit_insn (pat);
431       RTX_FRAME_RELATED_P (insn) = 1;
432     }
433 
434   for (dregno = REG_R0; ndregs != ndregs_consec; dregno++)
435     {
436       if (must_save_p (is_inthandler, dregno))
437 	{
438 	  rtx_insn *insn =
439 	    emit_move_insn (predec, gen_rtx_REG (word_mode, dregno));
440 	  RTX_FRAME_RELATED_P (insn) = 1;
441 	  ndregs--;
442 	}
443     }
444   for (pregno = REG_P0; npregs != npregs_consec; pregno++)
445     {
446       if (must_save_p (is_inthandler, pregno))
447 	{
448 	  rtx_insn *insn =
449 	    emit_move_insn (predec, gen_rtx_REG (word_mode, pregno));
450 	  RTX_FRAME_RELATED_P (insn) = 1;
451 	  npregs--;
452 	}
453     }
454   for (i = REG_P7 + 1; i < REG_CC; i++)
455     if (saveall
456 	|| (is_inthandler
457 	    && (df_regs_ever_live_p (i)
458 		|| (!leaf_function_p () && call_used_regs[i]))))
459       {
460 	rtx_insn *insn;
461 	if (i == REG_A0 || i == REG_A1)
462 	  insn = emit_move_insn (gen_rtx_MEM (PDImode, predec1),
463 				 gen_rtx_REG (PDImode, i));
464 	else
465 	  insn = emit_move_insn (predec, gen_rtx_REG (SImode, i));
466 	RTX_FRAME_RELATED_P (insn) = 1;
467       }
468 }
469 
470 /* Emit code to restore registers in the epilogue.  SAVEALL is nonzero if we
471    must save all registers; this is used for interrupt handlers.
472    SPREG contains (reg:SI REG_SP).  IS_INTHANDLER is true if we're doing
473    this for an interrupt (or exception) handler.  */
474 
475 static void
476 expand_epilogue_reg_restore (rtx spreg, bool saveall, bool is_inthandler)
477 {
478   rtx postinc1 = gen_rtx_POST_INC (SImode, spreg);
479   rtx postinc = gen_rtx_MEM (SImode, postinc1);
480 
481   int ndregs = saveall ? 8 : n_dregs_to_save (is_inthandler, false);
482   int npregs = saveall ? 6 : n_pregs_to_save (is_inthandler, false);
483   int ndregs_consec = saveall ? 8 : n_dregs_to_save (is_inthandler, true);
484   int npregs_consec = saveall ? 6 : n_pregs_to_save (is_inthandler, true);
485   int total_consec = ndregs_consec + npregs_consec;
486   int i, regno;
487   rtx_insn *insn;
488 
489   /* A slightly crude technique to stop flow from trying to delete "dead"
490      insns.  */
491   MEM_VOLATILE_P (postinc) = 1;
492 
493   for (i = REG_CC - 1; i > REG_P7; i--)
494     if (saveall
495 	|| (is_inthandler
496 	    && (df_regs_ever_live_p (i)
497 		|| (!leaf_function_p () && call_used_regs[i]))))
498       {
499 	if (i == REG_A0 || i == REG_A1)
500 	  {
501 	    rtx mem = gen_rtx_MEM (PDImode, postinc1);
502 	    MEM_VOLATILE_P (mem) = 1;
503 	    emit_move_insn (gen_rtx_REG (PDImode, i), mem);
504 	  }
505 	else
506 	  emit_move_insn (gen_rtx_REG (SImode, i), postinc);
507       }
508 
509   regno = REG_P5 - npregs_consec;
510   for (; npregs != npregs_consec; regno--)
511     {
512       if (must_save_p (is_inthandler, regno))
513 	{
514 	  emit_move_insn (gen_rtx_REG (word_mode, regno), postinc);
515 	  npregs--;
516 	}
517     }
518   regno = REG_R7 - ndregs_consec;
519   for (; ndregs != ndregs_consec; regno--)
520     {
521       if (must_save_p (is_inthandler, regno))
522 	{
523 	  emit_move_insn (gen_rtx_REG (word_mode, regno), postinc);
524 	  ndregs--;
525 	}
526     }
527 
528   if (total_consec != 0)
529     {
530       rtx pat = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (total_consec + 1));
531       XVECEXP (pat, 0, 0)
532 	= gen_rtx_SET (VOIDmode, spreg,
533 		       gen_rtx_PLUS (Pmode, spreg,
534 				     GEN_INT (total_consec * 4)));
535 
536       if (npregs_consec > 0)
537 	regno = REG_P5 + 1;
538       else
539 	regno = REG_R7 + 1;
540 
541       for (i = 0; i < total_consec; i++)
542 	{
543 	  rtx addr = (i > 0
544 		      ? gen_rtx_PLUS (Pmode, spreg, GEN_INT (i * 4))
545 		      : spreg);
546 	  rtx memref = gen_rtx_MEM (word_mode, addr);
547 
548 	  regno--;
549 	  XVECEXP (pat, 0, i + 1)
550 	    = gen_rtx_SET (VOIDmode, gen_rtx_REG (word_mode, regno), memref);
551 
552 	  if (npregs_consec > 0)
553 	    {
554 	      if (--npregs_consec == 0)
555 		regno = REG_R7 + 1;
556 	    }
557 	}
558 
559       insn = emit_insn (pat);
560       RTX_FRAME_RELATED_P (insn) = 1;
561     }
562   if (saveall || is_inthandler)
563     {
564       for (regno = REG_LB1; regno >= REG_LT0; regno--)
565 	if (! crtl->is_leaf
566 	    || cfun->machine->has_hardware_loops
567 	    || cfun->machine->has_loopreg_clobber
568 	    || (ENABLE_WA_05000257 && (regno == REG_LC0 || regno == REG_LC1)))
569 	  emit_move_insn (gen_rtx_REG (SImode, regno), postinc);
570 
571       emit_move_insn (gen_rtx_REG (SImode, REG_ASTAT), postinc);
572     }
573 }
574 
575 /* Perform any needed actions needed for a function that is receiving a
576    variable number of arguments.
577 
578    CUM is as above.
579 
580    MODE and TYPE are the mode and type of the current parameter.
581 
582    PRETEND_SIZE is a variable that should be set to the amount of stack
583    that must be pushed by the prolog to pretend that our caller pushed
584    it.
585 
586    Normally, this macro will push all remaining incoming registers on the
587    stack and set PRETEND_SIZE to the length of the registers pushed.
588 
589    Blackfin specific :
590    - VDSP C compiler manual (our ABI) says that a variable args function
591      should save the R0, R1 and R2 registers in the stack.
592    - The caller will always leave space on the stack for the
593      arguments that are passed in registers, so we dont have
594      to leave any extra space.
595    - now, the vastart pointer can access all arguments from the stack.  */
596 
597 static void
598 setup_incoming_varargs (cumulative_args_t cum,
599 			machine_mode mode ATTRIBUTE_UNUSED,
600 			tree type ATTRIBUTE_UNUSED, int *pretend_size,
601 			int no_rtl)
602 {
603   rtx mem;
604   int i;
605 
606   if (no_rtl)
607     return;
608 
609   /* The move for named arguments will be generated automatically by the
610      compiler.  We need to generate the move rtx for the unnamed arguments
611      if they are in the first 3 words.  We assume at least 1 named argument
612      exists, so we never generate [ARGP] = R0 here.  */
613 
614   for (i = get_cumulative_args (cum)->words + 1; i < max_arg_registers; i++)
615     {
616       mem = gen_rtx_MEM (Pmode,
617 			 plus_constant (Pmode, arg_pointer_rtx,
618 					(i * UNITS_PER_WORD)));
619       emit_move_insn (mem, gen_rtx_REG (Pmode, i));
620     }
621 
622   *pretend_size = 0;
623 }
624 
625 /* Value should be nonzero if functions must have frame pointers.
626    Zero means the frame pointer need not be set up (and parms may
627    be accessed via the stack pointer) in functions that seem suitable.  */
628 
629 static bool
630 bfin_frame_pointer_required (void)
631 {
632   e_funkind fkind = funkind (TREE_TYPE (current_function_decl));
633 
634   if (fkind != SUBROUTINE)
635     return true;
636 
637   /* We turn on -fomit-frame-pointer if -momit-leaf-frame-pointer is used,
638      so we have to override it for non-leaf functions.  */
639   if (TARGET_OMIT_LEAF_FRAME_POINTER && ! crtl->is_leaf)
640     return true;
641 
642   return false;
643 }
644 
645 /* Return the number of registers pushed during the prologue.  */
646 
647 static int
648 n_regs_saved_by_prologue (void)
649 {
650   e_funkind fkind = funkind (TREE_TYPE (current_function_decl));
651   bool is_inthandler = fkind != SUBROUTINE;
652   tree attrs = TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl));
653   bool all = (lookup_attribute ("saveall", attrs) != NULL_TREE
654 	      || (is_inthandler && !crtl->is_leaf));
655   int ndregs = all ? 8 : n_dregs_to_save (is_inthandler, false);
656   int npregs = all ? 6 : n_pregs_to_save (is_inthandler, false);
657   int n = ndregs + npregs;
658   int i;
659 
660   if (all || stack_frame_needed_p ())
661     n += 2;
662   else
663     {
664       if (must_save_fp_p ())
665 	n++;
666       if (must_save_rets_p ())
667 	n++;
668     }
669 
670   if (fkind != SUBROUTINE || all)
671     {
672       /* Increment once for ASTAT.  */
673       n++;
674       if (! crtl->is_leaf
675 	  || cfun->machine->has_hardware_loops
676 	  || cfun->machine->has_loopreg_clobber)
677 	{
678 	  n += 6;
679 	}
680     }
681 
682   if (fkind != SUBROUTINE)
683     {
684       /* RETE/X/N.  */
685       if (lookup_attribute ("nesting", attrs))
686 	n++;
687     }
688 
689   for (i = REG_P7 + 1; i < REG_CC; i++)
690     if (all
691 	|| (fkind != SUBROUTINE
692 	    && (df_regs_ever_live_p (i)
693 		|| (!leaf_function_p () && call_used_regs[i]))))
694       n += i == REG_A0 || i == REG_A1 ? 2 : 1;
695 
696   return n;
697 }
698 
699 /* Given FROM and TO register numbers, say whether this elimination is
700    allowed.  Frame pointer elimination is automatically handled.
701 
702    All other eliminations are valid.  */
703 
704 static bool
705 bfin_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
706 {
707   return (to == STACK_POINTER_REGNUM ? ! frame_pointer_needed : true);
708 }
709 
710 /* Return the offset between two registers, one to be eliminated, and the other
711    its replacement, at the start of a routine.  */
712 
713 HOST_WIDE_INT
714 bfin_initial_elimination_offset (int from, int to)
715 {
716   HOST_WIDE_INT offset = 0;
717 
718   if (from == ARG_POINTER_REGNUM)
719     offset = n_regs_saved_by_prologue () * 4;
720 
721   if (to == STACK_POINTER_REGNUM)
722     {
723       if (crtl->outgoing_args_size >= FIXED_STACK_AREA)
724 	offset += crtl->outgoing_args_size;
725       else if (crtl->outgoing_args_size)
726 	offset += FIXED_STACK_AREA;
727 
728       offset += get_frame_size ();
729     }
730 
731   return offset;
732 }
733 
734 /* Emit code to load a constant CONSTANT into register REG; setting
735    RTX_FRAME_RELATED_P on all insns we generate if RELATED is true.
736    Make sure that the insns we generate need not be split.  */
737 
738 static void
739 frame_related_constant_load (rtx reg, HOST_WIDE_INT constant, bool related)
740 {
741   rtx_insn *insn;
742   rtx cst = GEN_INT (constant);
743 
744   if (constant >= -32768 && constant < 65536)
745     insn = emit_move_insn (reg, cst);
746   else
747     {
748       /* We don't call split_load_immediate here, since dwarf2out.c can get
749 	 confused about some of the more clever sequences it can generate.  */
750       insn = emit_insn (gen_movsi_high (reg, cst));
751       if (related)
752 	RTX_FRAME_RELATED_P (insn) = 1;
753       insn = emit_insn (gen_movsi_low (reg, reg, cst));
754     }
755   if (related)
756     RTX_FRAME_RELATED_P (insn) = 1;
757 }
758 
759 /* Generate efficient code to add a value to a P register.
760    Set RTX_FRAME_RELATED_P on the generated insns if FRAME is nonzero.
761    EPILOGUE_P is zero if this function is called for prologue,
762    otherwise it's nonzero. And it's less than zero if this is for
763    sibcall epilogue.  */
764 
765 static void
766 add_to_reg (rtx reg, HOST_WIDE_INT value, int frame, int epilogue_p)
767 {
768   if (value == 0)
769     return;
770 
771   /* Choose whether to use a sequence using a temporary register, or
772      a sequence with multiple adds.  We can add a signed 7-bit value
773      in one instruction.  */
774   if (value > 120 || value < -120)
775     {
776       rtx tmpreg;
777       rtx tmpreg2;
778       rtx_insn *insn;
779 
780       tmpreg2 = NULL_RTX;
781 
782       /* For prologue or normal epilogue, P1 can be safely used
783 	 as the temporary register. For sibcall epilogue, we try to find
784 	 a call used P register, which will be restored in epilogue.
785 	 If we cannot find such a P register, we have to use one I register
786 	 to help us.  */
787 
788       if (epilogue_p >= 0)
789 	tmpreg = gen_rtx_REG (SImode, REG_P1);
790       else
791 	{
792 	  int i;
793 	  for (i = REG_P0; i <= REG_P5; i++)
794 	    if ((df_regs_ever_live_p (i) && ! call_used_regs[i])
795 		|| (!TARGET_FDPIC
796 		    && i == PIC_OFFSET_TABLE_REGNUM
797 		    && (crtl->uses_pic_offset_table
798 			|| (TARGET_ID_SHARED_LIBRARY
799 			    && ! crtl->is_leaf))))
800 	      break;
801 	  if (i <= REG_P5)
802 	    tmpreg = gen_rtx_REG (SImode, i);
803 	  else
804 	    {
805 	      tmpreg = gen_rtx_REG (SImode, REG_P1);
806 	      tmpreg2 = gen_rtx_REG (SImode, REG_I0);
807 	      emit_move_insn (tmpreg2, tmpreg);
808 	    }
809 	}
810 
811       if (frame)
812 	frame_related_constant_load (tmpreg, value, TRUE);
813       else
814 	insn = emit_move_insn (tmpreg, GEN_INT (value));
815 
816       insn = emit_insn (gen_addsi3 (reg, reg, tmpreg));
817       if (frame)
818 	RTX_FRAME_RELATED_P (insn) = 1;
819 
820       if (tmpreg2 != NULL_RTX)
821 	emit_move_insn (tmpreg, tmpreg2);
822     }
823   else
824     do
825       {
826 	int size = value;
827 	rtx_insn *insn;
828 
829 	if (size > 60)
830 	  size = 60;
831 	else if (size < -60)
832 	  /* We could use -62, but that would leave the stack unaligned, so
833 	     it's no good.  */
834 	  size = -60;
835 
836 	insn = emit_insn (gen_addsi3 (reg, reg, GEN_INT (size)));
837 	if (frame)
838 	  RTX_FRAME_RELATED_P (insn) = 1;
839 	value -= size;
840       }
841     while (value != 0);
842 }
843 
844 /* Generate a LINK insn for a frame sized FRAME_SIZE.  If this constant
845    is too large, generate a sequence of insns that has the same effect.
846    SPREG contains (reg:SI REG_SP).  */
847 
848 static void
849 emit_link_insn (rtx spreg, HOST_WIDE_INT frame_size)
850 {
851   HOST_WIDE_INT link_size = frame_size;
852   rtx_insn *insn;
853   int i;
854 
855   if (link_size > 262140)
856     link_size = 262140;
857 
858   /* Use a LINK insn with as big a constant as possible, then subtract
859      any remaining size from the SP.  */
860   insn = emit_insn (gen_link (GEN_INT (-8 - link_size)));
861   RTX_FRAME_RELATED_P (insn) = 1;
862 
863   for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
864     {
865       rtx set = XVECEXP (PATTERN (insn), 0, i);
866       gcc_assert (GET_CODE (set) == SET);
867       RTX_FRAME_RELATED_P (set) = 1;
868     }
869 
870   frame_size -= link_size;
871 
872   if (frame_size > 0)
873     {
874       /* Must use a call-clobbered PREG that isn't the static chain.  */
875       rtx tmpreg = gen_rtx_REG (Pmode, REG_P1);
876 
877       frame_related_constant_load (tmpreg, -frame_size, TRUE);
878       insn = emit_insn (gen_addsi3 (spreg, spreg, tmpreg));
879       RTX_FRAME_RELATED_P (insn) = 1;
880     }
881 }
882 
883 /* Return the number of bytes we must reserve for outgoing arguments
884    in the current function's stack frame.  */
885 
886 static HOST_WIDE_INT
887 arg_area_size (void)
888 {
889   if (crtl->outgoing_args_size)
890     {
891       if (crtl->outgoing_args_size >= FIXED_STACK_AREA)
892 	return crtl->outgoing_args_size;
893       else
894 	return FIXED_STACK_AREA;
895     }
896   return 0;
897 }
898 
899 /* Save RETS and FP, and allocate a stack frame.  ALL is true if the
900    function must save all its registers (true only for certain interrupt
901    handlers).  */
902 
903 static void
904 do_link (rtx spreg, HOST_WIDE_INT frame_size, bool all)
905 {
906   frame_size += arg_area_size ();
907 
908   if (all
909       || stack_frame_needed_p ()
910       || (must_save_rets_p () && must_save_fp_p ()))
911     emit_link_insn (spreg, frame_size);
912   else
913     {
914       if (must_save_rets_p ())
915 	{
916 	  rtx pat = gen_movsi (gen_rtx_MEM (Pmode,
917 					    gen_rtx_PRE_DEC (Pmode, spreg)),
918 			       bfin_rets_rtx);
919 	  rtx_insn *insn = emit_insn (pat);
920 	  RTX_FRAME_RELATED_P (insn) = 1;
921 	}
922       if (must_save_fp_p ())
923 	{
924 	  rtx pat = gen_movsi (gen_rtx_MEM (Pmode,
925 					    gen_rtx_PRE_DEC (Pmode, spreg)),
926 			       gen_rtx_REG (Pmode, REG_FP));
927 	  rtx_insn *insn = emit_insn (pat);
928 	  RTX_FRAME_RELATED_P (insn) = 1;
929 	}
930       add_to_reg (spreg, -frame_size, 1, 0);
931     }
932 }
933 
934 /* Like do_link, but used for epilogues to deallocate the stack frame.
935    EPILOGUE_P is zero if this function is called for prologue,
936    otherwise it's nonzero. And it's less than zero if this is for
937    sibcall epilogue.  */
938 
939 static void
940 do_unlink (rtx spreg, HOST_WIDE_INT frame_size, bool all, int epilogue_p)
941 {
942   frame_size += arg_area_size ();
943 
944   if (stack_frame_needed_p ())
945     emit_insn (gen_unlink ());
946   else
947     {
948       rtx postinc = gen_rtx_MEM (Pmode, gen_rtx_POST_INC (Pmode, spreg));
949 
950       add_to_reg (spreg, frame_size, 0, epilogue_p);
951       if (all || must_save_fp_p ())
952 	{
953 	  rtx fpreg = gen_rtx_REG (Pmode, REG_FP);
954 	  emit_move_insn (fpreg, postinc);
955 	  emit_use (fpreg);
956 	}
957       if (all || must_save_rets_p ())
958 	{
959 	  emit_move_insn (bfin_rets_rtx, postinc);
960 	  emit_use (bfin_rets_rtx);
961 	}
962     }
963 }
964 
965 /* Generate a prologue suitable for a function of kind FKIND.  This is
966    called for interrupt and exception handler prologues.
967    SPREG contains (reg:SI REG_SP).  */
968 
969 static void
970 expand_interrupt_handler_prologue (rtx spreg, e_funkind fkind, bool all)
971 {
972   HOST_WIDE_INT frame_size = get_frame_size ();
973   rtx predec1 = gen_rtx_PRE_DEC (SImode, spreg);
974   rtx predec = gen_rtx_MEM (SImode, predec1);
975   rtx_insn *insn;
976   tree attrs = TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl));
977   tree kspisusp = lookup_attribute ("kspisusp", attrs);
978 
979   if (kspisusp)
980     {
981       insn = emit_move_insn (spreg, gen_rtx_REG (Pmode, REG_USP));
982       RTX_FRAME_RELATED_P (insn) = 1;
983     }
984 
985   /* We need space on the stack in case we need to save the argument
986      registers.  */
987   if (fkind == EXCPT_HANDLER)
988     {
989       insn = emit_insn (gen_addsi3 (spreg, spreg, GEN_INT (-12)));
990       RTX_FRAME_RELATED_P (insn) = 1;
991     }
992 
993   /* If we're calling other functions, they won't save their call-clobbered
994      registers, so we must save everything here.  */
995   if (!crtl->is_leaf)
996     all = true;
997   expand_prologue_reg_save (spreg, all, true);
998 
999   if (ENABLE_WA_05000283 || ENABLE_WA_05000315)
1000     {
1001       rtx chipid = GEN_INT (trunc_int_for_mode (0xFFC00014, SImode));
1002       rtx p5reg = gen_rtx_REG (Pmode, REG_P5);
1003       emit_insn (gen_movbi (bfin_cc_rtx, const1_rtx));
1004       emit_insn (gen_movsi_high (p5reg, chipid));
1005       emit_insn (gen_movsi_low (p5reg, p5reg, chipid));
1006       emit_insn (gen_dummy_load (p5reg, bfin_cc_rtx));
1007     }
1008 
1009   if (lookup_attribute ("nesting", attrs))
1010     {
1011       rtx srcreg = gen_rtx_REG (Pmode, ret_regs[fkind]);
1012       insn = emit_move_insn (predec, srcreg);
1013       RTX_FRAME_RELATED_P (insn) = 1;
1014     }
1015 
1016   do_link (spreg, frame_size, all);
1017 
1018   if (fkind == EXCPT_HANDLER)
1019     {
1020       rtx r0reg = gen_rtx_REG (SImode, REG_R0);
1021       rtx r1reg = gen_rtx_REG (SImode, REG_R1);
1022       rtx r2reg = gen_rtx_REG (SImode, REG_R2);
1023 
1024       emit_move_insn (r0reg, gen_rtx_REG (SImode, REG_SEQSTAT));
1025       emit_insn (gen_ashrsi3 (r0reg, r0reg, GEN_INT (26)));
1026       emit_insn (gen_ashlsi3 (r0reg, r0reg, GEN_INT (26)));
1027       emit_move_insn (r1reg, spreg);
1028       emit_move_insn (r2reg, gen_rtx_REG (Pmode, REG_FP));
1029       emit_insn (gen_addsi3 (r2reg, r2reg, GEN_INT (8)));
1030     }
1031 }
1032 
1033 /* Generate an epilogue suitable for a function of kind FKIND.  This is
1034    called for interrupt and exception handler epilogues.
1035    SPREG contains (reg:SI REG_SP).  */
1036 
1037 static void
1038 expand_interrupt_handler_epilogue (rtx spreg, e_funkind fkind, bool all)
1039 {
1040   tree attrs = TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl));
1041   rtx postinc1 = gen_rtx_POST_INC (SImode, spreg);
1042   rtx postinc = gen_rtx_MEM (SImode, postinc1);
1043 
1044   /* A slightly crude technique to stop flow from trying to delete "dead"
1045      insns.  */
1046   MEM_VOLATILE_P (postinc) = 1;
1047 
1048   do_unlink (spreg, get_frame_size (), all, 1);
1049 
1050   if (lookup_attribute ("nesting", attrs))
1051     {
1052       rtx srcreg = gen_rtx_REG (Pmode, ret_regs[fkind]);
1053       emit_move_insn (srcreg, postinc);
1054     }
1055 
1056   /* If we're calling other functions, they won't save their call-clobbered
1057      registers, so we must save (and restore) everything here.  */
1058   if (!crtl->is_leaf)
1059     all = true;
1060 
1061   expand_epilogue_reg_restore (spreg, all, true);
1062 
1063   /* Deallocate any space we left on the stack in case we needed to save the
1064      argument registers.  */
1065   if (fkind == EXCPT_HANDLER)
1066     emit_insn (gen_addsi3 (spreg, spreg, GEN_INT (12)));
1067 
1068   emit_jump_insn (gen_return_internal (gen_rtx_REG (Pmode, ret_regs[fkind])));
1069 }
1070 
1071 /* Used while emitting the prologue to generate code to load the correct value
1072    into the PIC register, which is passed in DEST.  */
1073 
1074 static rtx
1075 bfin_load_pic_reg (rtx dest)
1076 {
1077   struct cgraph_local_info *i = NULL;
1078   rtx addr;
1079 
1080   i = cgraph_node::local_info (current_function_decl);
1081 
1082   /* Functions local to the translation unit don't need to reload the
1083      pic reg, since the caller always passes a usable one.  */
1084   if (i && i->local)
1085     return pic_offset_table_rtx;
1086 
1087   if (global_options_set.x_bfin_library_id)
1088     addr = plus_constant (Pmode, pic_offset_table_rtx,
1089 			   -4 - bfin_library_id * 4);
1090   else
1091     addr = gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
1092 			 gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
1093 					 UNSPEC_LIBRARY_OFFSET));
1094   emit_insn (gen_movsi (dest, gen_rtx_MEM (Pmode, addr)));
1095   return dest;
1096 }
1097 
1098 /* Generate RTL for the prologue of the current function.  */
1099 
1100 void
1101 bfin_expand_prologue (void)
1102 {
1103   HOST_WIDE_INT frame_size = get_frame_size ();
1104   rtx spreg = gen_rtx_REG (Pmode, REG_SP);
1105   e_funkind fkind = funkind (TREE_TYPE (current_function_decl));
1106   rtx pic_reg_loaded = NULL_RTX;
1107   tree attrs = TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl));
1108   bool all = lookup_attribute ("saveall", attrs) != NULL_TREE;
1109 
1110   if (fkind != SUBROUTINE)
1111     {
1112       expand_interrupt_handler_prologue (spreg, fkind, all);
1113       return;
1114     }
1115 
1116   if (crtl->limit_stack
1117       || (TARGET_STACK_CHECK_L1
1118 	  && !DECL_NO_LIMIT_STACK (current_function_decl)))
1119     {
1120       HOST_WIDE_INT offset
1121 	= bfin_initial_elimination_offset (ARG_POINTER_REGNUM,
1122 					   STACK_POINTER_REGNUM);
1123       rtx lim = crtl->limit_stack ? stack_limit_rtx : NULL_RTX;
1124       rtx tmp = gen_rtx_REG (Pmode, REG_R3);
1125       rtx p2reg = gen_rtx_REG (Pmode, REG_P2);
1126 
1127       emit_move_insn (tmp, p2reg);
1128       if (!lim)
1129 	{
1130 	  emit_move_insn (p2reg, gen_int_mode (0xFFB00000, SImode));
1131 	  emit_move_insn (p2reg, gen_rtx_MEM (Pmode, p2reg));
1132 	  lim = p2reg;
1133 	}
1134       if (GET_CODE (lim) == SYMBOL_REF)
1135 	{
1136 	  if (TARGET_ID_SHARED_LIBRARY)
1137 	    {
1138 	      rtx p1reg = gen_rtx_REG (Pmode, REG_P1);
1139 	      rtx val;
1140 	      pic_reg_loaded = bfin_load_pic_reg (p2reg);
1141 	      val = legitimize_pic_address (stack_limit_rtx, p1reg,
1142 					    pic_reg_loaded);
1143 	      emit_move_insn (p1reg, val);
1144 	      frame_related_constant_load (p2reg, offset, FALSE);
1145 	      emit_insn (gen_addsi3 (p2reg, p2reg, p1reg));
1146 	      lim = p2reg;
1147 	    }
1148 	  else
1149 	    {
1150 	      rtx limit = plus_constant (Pmode, lim, offset);
1151 	      emit_move_insn (p2reg, limit);
1152 	      lim = p2reg;
1153 	    }
1154 	}
1155       else
1156 	{
1157 	  if (lim != p2reg)
1158 	    emit_move_insn (p2reg, lim);
1159 	  add_to_reg (p2reg, offset, 0, 0);
1160 	  lim = p2reg;
1161 	}
1162       emit_insn (gen_compare_lt (bfin_cc_rtx, spreg, lim));
1163       emit_insn (gen_trapifcc ());
1164       emit_move_insn (p2reg, tmp);
1165     }
1166   expand_prologue_reg_save (spreg, all, false);
1167 
1168   do_link (spreg, frame_size, all);
1169 
1170   if (TARGET_ID_SHARED_LIBRARY
1171       && !TARGET_SEP_DATA
1172       && (crtl->uses_pic_offset_table
1173 	  || !crtl->is_leaf))
1174     bfin_load_pic_reg (pic_offset_table_rtx);
1175 }
1176 
1177 /* Generate RTL for the epilogue of the current function.  NEED_RETURN is zero
1178    if this is for a sibcall.  EH_RETURN is nonzero if we're expanding an
1179    eh_return pattern. SIBCALL_P is true if this is a sibcall epilogue,
1180    false otherwise.  */
1181 
1182 void
1183 bfin_expand_epilogue (int need_return, int eh_return, bool sibcall_p)
1184 {
1185   rtx spreg = gen_rtx_REG (Pmode, REG_SP);
1186   e_funkind fkind = funkind (TREE_TYPE (current_function_decl));
1187   int e = sibcall_p ? -1 : 1;
1188   tree attrs = TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl));
1189   bool all = lookup_attribute ("saveall", attrs) != NULL_TREE;
1190 
1191   if (fkind != SUBROUTINE)
1192     {
1193       expand_interrupt_handler_epilogue (spreg, fkind, all);
1194       return;
1195     }
1196 
1197   do_unlink (spreg, get_frame_size (), all, e);
1198 
1199   expand_epilogue_reg_restore (spreg, all, false);
1200 
1201   /* Omit the return insn if this is for a sibcall.  */
1202   if (! need_return)
1203     return;
1204 
1205   if (eh_return)
1206     emit_insn (gen_addsi3 (spreg, spreg, gen_rtx_REG (Pmode, REG_P2)));
1207 
1208   emit_jump_insn (gen_return_internal (gen_rtx_REG (Pmode, REG_RETS)));
1209 }
1210 
1211 /* Return nonzero if register OLD_REG can be renamed to register NEW_REG.  */
1212 
1213 int
1214 bfin_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED,
1215 			   unsigned int new_reg)
1216 {
1217   /* Interrupt functions can only use registers that have already been
1218      saved by the prologue, even if they would normally be
1219      call-clobbered.  */
1220 
1221   if (funkind (TREE_TYPE (current_function_decl)) != SUBROUTINE
1222       && !df_regs_ever_live_p (new_reg))
1223     return 0;
1224 
1225   return 1;
1226 }
1227 
1228 /* Implement TARGET_EXTRA_LIVE_ON_ENTRY.  */
1229 static void
1230 bfin_extra_live_on_entry (bitmap regs)
1231 {
1232   if (TARGET_FDPIC)
1233     bitmap_set_bit (regs, FDPIC_REGNO);
1234 }
1235 
1236 /* Return the value of the return address for the frame COUNT steps up
1237    from the current frame, after the prologue.
1238    We punt for everything but the current frame by returning const0_rtx.  */
1239 
1240 rtx
1241 bfin_return_addr_rtx (int count)
1242 {
1243   if (count != 0)
1244     return const0_rtx;
1245 
1246   return get_hard_reg_initial_val (Pmode, REG_RETS);
1247 }
1248 
1249 static rtx
1250 bfin_delegitimize_address (rtx orig_x)
1251 {
1252   rtx x = orig_x;
1253 
1254   if (GET_CODE (x) != MEM)
1255     return orig_x;
1256 
1257   x = XEXP (x, 0);
1258   if (GET_CODE (x) == PLUS
1259       && GET_CODE (XEXP (x, 1)) == UNSPEC
1260       && XINT (XEXP (x, 1), 1) == UNSPEC_MOVE_PIC
1261       && GET_CODE (XEXP (x, 0)) == REG
1262       && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
1263     return XVECEXP (XEXP (x, 1), 0, 0);
1264 
1265   return orig_x;
1266 }
1267 
1268 /* This predicate is used to compute the length of a load/store insn.
1269    OP is a MEM rtx, we return nonzero if its addressing mode requires a
1270    32-bit instruction.  */
1271 
1272 int
1273 effective_address_32bit_p (rtx op, machine_mode mode)
1274 {
1275   HOST_WIDE_INT offset;
1276 
1277   mode = GET_MODE (op);
1278   op = XEXP (op, 0);
1279 
1280   if (GET_CODE (op) != PLUS)
1281     {
1282       gcc_assert (REG_P (op) || GET_CODE (op) == POST_INC
1283 		  || GET_CODE (op) == PRE_DEC || GET_CODE (op) == POST_DEC);
1284       return 0;
1285     }
1286 
1287   if (GET_CODE (XEXP (op, 1)) == UNSPEC)
1288     return 1;
1289 
1290   offset = INTVAL (XEXP (op, 1));
1291 
1292   /* All byte loads use a 16-bit offset.  */
1293   if (GET_MODE_SIZE (mode) == 1)
1294     return 1;
1295 
1296   if (GET_MODE_SIZE (mode) == 4)
1297     {
1298       /* Frame pointer relative loads can use a negative offset, all others
1299 	 are restricted to a small positive one.  */
1300       if (XEXP (op, 0) == frame_pointer_rtx)
1301 	return offset < -128 || offset > 60;
1302       return offset < 0 || offset > 60;
1303     }
1304 
1305   /* Must be HImode now.  */
1306   return offset < 0 || offset > 30;
1307 }
1308 
1309 /* Returns true if X is a memory reference using an I register.  */
1310 bool
1311 bfin_dsp_memref_p (rtx x)
1312 {
1313   if (! MEM_P (x))
1314     return false;
1315   x = XEXP (x, 0);
1316   if (GET_CODE (x) == POST_INC || GET_CODE (x) == PRE_INC
1317       || GET_CODE (x) == POST_DEC || GET_CODE (x) == PRE_DEC)
1318     x = XEXP (x, 0);
1319   return IREG_P (x);
1320 }
1321 
1322 /* Return cost of the memory address ADDR.
1323    All addressing modes are equally cheap on the Blackfin.  */
1324 
1325 static int
1326 bfin_address_cost (rtx addr ATTRIBUTE_UNUSED,
1327 		   machine_mode mode ATTRIBUTE_UNUSED,
1328 		   addr_space_t as ATTRIBUTE_UNUSED,
1329 		   bool speed ATTRIBUTE_UNUSED)
1330 {
1331   return 1;
1332 }
1333 
1334 /* Subroutine of print_operand; used to print a memory reference X to FILE.  */
1335 
1336 void
1337 print_address_operand (FILE *file, rtx x)
1338 {
1339   switch (GET_CODE (x))
1340     {
1341     case PLUS:
1342       output_address (XEXP (x, 0));
1343       fprintf (file, "+");
1344       output_address (XEXP (x, 1));
1345       break;
1346 
1347     case PRE_DEC:
1348       fprintf (file, "--");
1349       output_address (XEXP (x, 0));
1350       break;
1351     case POST_INC:
1352       output_address (XEXP (x, 0));
1353       fprintf (file, "++");
1354       break;
1355     case POST_DEC:
1356       output_address (XEXP (x, 0));
1357       fprintf (file, "--");
1358       break;
1359 
1360     default:
1361       gcc_assert (GET_CODE (x) != MEM);
1362       print_operand (file, x, 0);
1363       break;
1364     }
1365 }
1366 
1367 /* Adding intp DImode support by Tony
1368  * -- Q: (low  word)
1369  * -- R: (high word)
1370  */
1371 
1372 void
1373 print_operand (FILE *file, rtx x, char code)
1374 {
1375   machine_mode mode;
1376 
1377   if (code == '!')
1378     {
1379       if (GET_MODE (current_output_insn) == SImode)
1380 	fprintf (file, " ||");
1381       else
1382 	fprintf (file, ";");
1383       return;
1384     }
1385 
1386   mode = GET_MODE (x);
1387 
1388   switch (code)
1389     {
1390     case 'j':
1391       switch (GET_CODE (x))
1392 	{
1393 	case EQ:
1394 	  fprintf (file, "e");
1395 	  break;
1396 	case NE:
1397 	  fprintf (file, "ne");
1398 	  break;
1399 	case GT:
1400 	  fprintf (file, "g");
1401 	  break;
1402 	case LT:
1403 	  fprintf (file, "l");
1404 	  break;
1405 	case GE:
1406 	  fprintf (file, "ge");
1407 	  break;
1408 	case LE:
1409 	  fprintf (file, "le");
1410 	  break;
1411 	case GTU:
1412 	  fprintf (file, "g");
1413 	  break;
1414 	case LTU:
1415 	  fprintf (file, "l");
1416 	  break;
1417 	case GEU:
1418 	  fprintf (file, "ge");
1419 	  break;
1420 	case LEU:
1421 	  fprintf (file, "le");
1422 	  break;
1423 	default:
1424 	  output_operand_lossage ("invalid %%j value");
1425 	}
1426       break;
1427 
1428     case 'J':					 /* reverse logic */
1429       switch (GET_CODE(x))
1430 	{
1431 	case EQ:
1432 	  fprintf (file, "ne");
1433 	  break;
1434 	case NE:
1435 	  fprintf (file, "e");
1436 	  break;
1437 	case GT:
1438 	  fprintf (file, "le");
1439 	  break;
1440 	case LT:
1441 	  fprintf (file, "ge");
1442 	  break;
1443 	case GE:
1444 	  fprintf (file, "l");
1445 	  break;
1446 	case LE:
1447 	  fprintf (file, "g");
1448 	  break;
1449 	case GTU:
1450 	  fprintf (file, "le");
1451 	  break;
1452 	case LTU:
1453 	  fprintf (file, "ge");
1454 	  break;
1455 	case GEU:
1456 	  fprintf (file, "l");
1457 	  break;
1458 	case LEU:
1459 	  fprintf (file, "g");
1460 	  break;
1461 	default:
1462 	  output_operand_lossage ("invalid %%J value");
1463 	}
1464       break;
1465 
1466     default:
1467       switch (GET_CODE (x))
1468 	{
1469 	case REG:
1470 	  if (code == 'h')
1471 	    {
1472 	      if (REGNO (x) < 32)
1473 		fprintf (file, "%s", short_reg_names[REGNO (x)]);
1474 	      else
1475 		output_operand_lossage ("invalid operand for code '%c'", code);
1476 	    }
1477 	  else if (code == 'd')
1478 	    {
1479 	      if (REGNO (x) < 32)
1480 		fprintf (file, "%s", high_reg_names[REGNO (x)]);
1481 	      else
1482 		output_operand_lossage ("invalid operand for code '%c'", code);
1483 	    }
1484 	  else if (code == 'w')
1485 	    {
1486 	      if (REGNO (x) == REG_A0 || REGNO (x) == REG_A1)
1487 		fprintf (file, "%s.w", reg_names[REGNO (x)]);
1488 	      else
1489 		output_operand_lossage ("invalid operand for code '%c'", code);
1490 	    }
1491 	  else if (code == 'x')
1492 	    {
1493 	      if (REGNO (x) == REG_A0 || REGNO (x) == REG_A1)
1494 		fprintf (file, "%s.x", reg_names[REGNO (x)]);
1495 	      else
1496 		output_operand_lossage ("invalid operand for code '%c'", code);
1497 	    }
1498 	  else if (code == 'v')
1499 	    {
1500 	      if (REGNO (x) == REG_A0)
1501 		fprintf (file, "AV0");
1502 	      else if (REGNO (x) == REG_A1)
1503 		fprintf (file, "AV1");
1504 	      else
1505 		output_operand_lossage ("invalid operand for code '%c'", code);
1506 	    }
1507 	  else if (code == 'D')
1508 	    {
1509 	      if (D_REGNO_P (REGNO (x)))
1510 		fprintf (file, "%s", dregs_pair_names[REGNO (x)]);
1511 	      else
1512 		output_operand_lossage ("invalid operand for code '%c'", code);
1513 	    }
1514 	  else if (code == 'H')
1515 	    {
1516 	      if ((mode == DImode || mode == DFmode) && REG_P (x))
1517 		fprintf (file, "%s", reg_names[REGNO (x) + 1]);
1518 	      else
1519 		output_operand_lossage ("invalid operand for code '%c'", code);
1520 	    }
1521 	  else if (code == 'T')
1522 	    {
1523 	      if (D_REGNO_P (REGNO (x)))
1524 		fprintf (file, "%s", byte_reg_names[REGNO (x)]);
1525 	      else
1526 		output_operand_lossage ("invalid operand for code '%c'", code);
1527 	    }
1528 	  else
1529 	    fprintf (file, "%s", reg_names[REGNO (x)]);
1530 	  break;
1531 
1532 	case MEM:
1533 	  fputc ('[', file);
1534 	  x = XEXP (x,0);
1535 	  print_address_operand (file, x);
1536 	  fputc (']', file);
1537 	  break;
1538 
1539 	case CONST_INT:
1540 	  if (code == 'M')
1541 	    {
1542 	      switch (INTVAL (x))
1543 		{
1544 		case MACFLAG_NONE:
1545 		  break;
1546 		case MACFLAG_FU:
1547 		  fputs ("(FU)", file);
1548 		  break;
1549 		case MACFLAG_T:
1550 		  fputs ("(T)", file);
1551 		  break;
1552 		case MACFLAG_TFU:
1553 		  fputs ("(TFU)", file);
1554 		  break;
1555 		case MACFLAG_W32:
1556 		  fputs ("(W32)", file);
1557 		  break;
1558 		case MACFLAG_IS:
1559 		  fputs ("(IS)", file);
1560 		  break;
1561 		case MACFLAG_IU:
1562 		  fputs ("(IU)", file);
1563 		  break;
1564 		case MACFLAG_IH:
1565 		  fputs ("(IH)", file);
1566 		  break;
1567 		case MACFLAG_M:
1568 		  fputs ("(M)", file);
1569 		  break;
1570 		case MACFLAG_IS_M:
1571 		  fputs ("(IS,M)", file);
1572 		  break;
1573 		case MACFLAG_ISS2:
1574 		  fputs ("(ISS2)", file);
1575 		  break;
1576 		case MACFLAG_S2RND:
1577 		  fputs ("(S2RND)", file);
1578 		  break;
1579 		default:
1580 		  gcc_unreachable ();
1581 		}
1582 	      break;
1583 	    }
1584 	  else if (code == 'b')
1585 	    {
1586 	      if (INTVAL (x) == 0)
1587 		fputs ("+=", file);
1588 	      else if (INTVAL (x) == 1)
1589 		fputs ("-=", file);
1590 	      else
1591 		gcc_unreachable ();
1592 	      break;
1593 	    }
1594 	  /* Moves to half registers with d or h modifiers always use unsigned
1595 	     constants.  */
1596 	  else if (code == 'd')
1597 	    x = GEN_INT ((INTVAL (x) >> 16) & 0xffff);
1598 	  else if (code == 'h')
1599 	    x = GEN_INT (INTVAL (x) & 0xffff);
1600 	  else if (code == 'N')
1601 	    x = GEN_INT (-INTVAL (x));
1602 	  else if (code == 'X')
1603 	    x = GEN_INT (exact_log2 (0xffffffff & INTVAL (x)));
1604 	  else if (code == 'Y')
1605 	    x = GEN_INT (exact_log2 (0xffffffff & ~INTVAL (x)));
1606 	  else if (code == 'Z')
1607 	    /* Used for LINK insns.  */
1608 	    x = GEN_INT (-8 - INTVAL (x));
1609 
1610 	  /* fall through */
1611 
1612 	case SYMBOL_REF:
1613 	  output_addr_const (file, x);
1614 	  break;
1615 
1616 	case CONST_DOUBLE:
1617 	  output_operand_lossage ("invalid const_double operand");
1618 	  break;
1619 
1620 	case UNSPEC:
1621 	  switch (XINT (x, 1))
1622 	    {
1623 	    case UNSPEC_MOVE_PIC:
1624 	      output_addr_const (file, XVECEXP (x, 0, 0));
1625 	      fprintf (file, "@GOT");
1626 	      break;
1627 
1628 	    case UNSPEC_MOVE_FDPIC:
1629 	      output_addr_const (file, XVECEXP (x, 0, 0));
1630 	      fprintf (file, "@GOT17M4");
1631 	      break;
1632 
1633 	    case UNSPEC_FUNCDESC_GOT17M4:
1634 	      output_addr_const (file, XVECEXP (x, 0, 0));
1635 	      fprintf (file, "@FUNCDESC_GOT17M4");
1636 	      break;
1637 
1638 	    case UNSPEC_LIBRARY_OFFSET:
1639 	      fprintf (file, "_current_shared_library_p5_offset_");
1640 	      break;
1641 
1642 	    default:
1643 	      gcc_unreachable ();
1644 	    }
1645 	  break;
1646 
1647 	default:
1648 	  output_addr_const (file, x);
1649 	}
1650     }
1651 }
1652 
1653 /* Argument support functions.  */
1654 
1655 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1656    for a call to a function whose data type is FNTYPE.
1657    For a library call, FNTYPE is 0.
1658    VDSP C Compiler manual, our ABI says that
1659    first 3 words of arguments will use R0, R1 and R2.
1660 */
1661 
1662 void
1663 init_cumulative_args (CUMULATIVE_ARGS *cum, tree fntype,
1664 		      rtx libname ATTRIBUTE_UNUSED)
1665 {
1666   static CUMULATIVE_ARGS zero_cum;
1667 
1668   *cum = zero_cum;
1669 
1670   /* Set up the number of registers to use for passing arguments.  */
1671 
1672   cum->nregs = max_arg_registers;
1673   cum->arg_regs = arg_regs;
1674 
1675   cum->call_cookie = CALL_NORMAL;
1676   /* Check for a longcall attribute.  */
1677   if (fntype && lookup_attribute ("shortcall", TYPE_ATTRIBUTES (fntype)))
1678     cum->call_cookie |= CALL_SHORT;
1679   else if (fntype && lookup_attribute ("longcall", TYPE_ATTRIBUTES (fntype)))
1680     cum->call_cookie |= CALL_LONG;
1681 
1682   return;
1683 }
1684 
1685 /* Update the data in CUM to advance over an argument
1686    of mode MODE and data type TYPE.
1687    (TYPE is null for libcalls where that information may not be available.)  */
1688 
1689 static void
1690 bfin_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
1691 			   const_tree type, bool named ATTRIBUTE_UNUSED)
1692 {
1693   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
1694   int count, bytes, words;
1695 
1696   bytes = (mode == BLKmode) ? int_size_in_bytes (type) : GET_MODE_SIZE (mode);
1697   words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1698 
1699   cum->words += words;
1700   cum->nregs -= words;
1701 
1702   if (cum->nregs <= 0)
1703     {
1704       cum->nregs = 0;
1705       cum->arg_regs = NULL;
1706     }
1707   else
1708     {
1709       for (count = 1; count <= words; count++)
1710         cum->arg_regs++;
1711     }
1712 
1713   return;
1714 }
1715 
1716 /* Define where to put the arguments to a function.
1717    Value is zero to push the argument on the stack,
1718    or a hard register in which to store the argument.
1719 
1720    MODE is the argument's machine mode.
1721    TYPE is the data type of the argument (as a tree).
1722     This is null for libcalls where that information may
1723     not be available.
1724    CUM is a variable of type CUMULATIVE_ARGS which gives info about
1725     the preceding args and about the function being called.
1726    NAMED is nonzero if this argument is a named parameter
1727     (otherwise it is an extra parameter matching an ellipsis).  */
1728 
1729 static rtx
1730 bfin_function_arg (cumulative_args_t cum_v, machine_mode mode,
1731 		   const_tree type, bool named ATTRIBUTE_UNUSED)
1732 {
1733   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
1734   int bytes
1735     = (mode == BLKmode) ? int_size_in_bytes (type) : GET_MODE_SIZE (mode);
1736 
1737   if (mode == VOIDmode)
1738     /* Compute operand 2 of the call insn.  */
1739     return GEN_INT (cum->call_cookie);
1740 
1741   if (bytes == -1)
1742     return NULL_RTX;
1743 
1744   if (cum->nregs)
1745     return gen_rtx_REG (mode, *(cum->arg_regs));
1746 
1747   return NULL_RTX;
1748 }
1749 
1750 /* For an arg passed partly in registers and partly in memory,
1751    this is the number of bytes passed in registers.
1752    For args passed entirely in registers or entirely in memory, zero.
1753 
1754    Refer VDSP C Compiler manual, our ABI.
1755    First 3 words are in registers. So, if an argument is larger
1756    than the registers available, it will span the register and
1757    stack.   */
1758 
1759 static int
1760 bfin_arg_partial_bytes (cumulative_args_t cum, machine_mode mode,
1761 			tree type ATTRIBUTE_UNUSED,
1762 			bool named ATTRIBUTE_UNUSED)
1763 {
1764   int bytes
1765     = (mode == BLKmode) ? int_size_in_bytes (type) : GET_MODE_SIZE (mode);
1766   int bytes_left = get_cumulative_args (cum)->nregs * UNITS_PER_WORD;
1767 
1768   if (bytes == -1)
1769     return 0;
1770 
1771   if (bytes_left == 0)
1772     return 0;
1773   if (bytes > bytes_left)
1774     return bytes_left;
1775   return 0;
1776 }
1777 
1778 /* Variable sized types are passed by reference.  */
1779 
1780 static bool
1781 bfin_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
1782 			machine_mode mode ATTRIBUTE_UNUSED,
1783 			const_tree type, bool named ATTRIBUTE_UNUSED)
1784 {
1785   return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
1786 }
1787 
1788 /* Decide whether a type should be returned in memory (true)
1789    or in a register (false).  This is called by the macro
1790    TARGET_RETURN_IN_MEMORY.  */
1791 
1792 static bool
1793 bfin_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
1794 {
1795   int size = int_size_in_bytes (type);
1796   return size > 2 * UNITS_PER_WORD || size == -1;
1797 }
1798 
1799 /* Register in which address to store a structure value
1800    is passed to a function.  */
1801 static rtx
1802 bfin_struct_value_rtx (tree fntype ATTRIBUTE_UNUSED,
1803 		      int incoming ATTRIBUTE_UNUSED)
1804 {
1805   return gen_rtx_REG (Pmode, REG_P0);
1806 }
1807 
1808 /* Return true when register may be used to pass function parameters.  */
1809 
1810 bool
1811 function_arg_regno_p (int n)
1812 {
1813   int i;
1814   for (i = 0; arg_regs[i] != -1; i++)
1815     if (n == arg_regs[i])
1816       return true;
1817   return false;
1818 }
1819 
1820 /* Returns 1 if OP contains a symbol reference */
1821 
1822 int
1823 symbolic_reference_mentioned_p (rtx op)
1824 {
1825   register const char *fmt;
1826   register int i;
1827 
1828   if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
1829     return 1;
1830 
1831   fmt = GET_RTX_FORMAT (GET_CODE (op));
1832   for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
1833     {
1834       if (fmt[i] == 'E')
1835 	{
1836 	  register int j;
1837 
1838 	  for (j = XVECLEN (op, i) - 1; j >= 0; j--)
1839 	    if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
1840 	      return 1;
1841 	}
1842 
1843       else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
1844 	return 1;
1845     }
1846 
1847   return 0;
1848 }
1849 
1850 /* Decide whether we can make a sibling call to a function.  DECL is the
1851    declaration of the function being targeted by the call and EXP is the
1852    CALL_EXPR representing the call.  */
1853 
1854 static bool
1855 bfin_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED,
1856 			      tree exp ATTRIBUTE_UNUSED)
1857 {
1858   struct cgraph_local_info *this_func, *called_func;
1859   e_funkind fkind = funkind (TREE_TYPE (current_function_decl));
1860   if (fkind != SUBROUTINE)
1861     return false;
1862   if (!TARGET_ID_SHARED_LIBRARY || TARGET_SEP_DATA)
1863     return true;
1864 
1865   /* When compiling for ID shared libraries, can't sibcall a local function
1866      from a non-local function, because the local function thinks it does
1867      not need to reload P5 in the prologue, but the sibcall wil pop P5 in the
1868      sibcall epilogue, and we end up with the wrong value in P5.  */
1869 
1870   if (!decl)
1871     /* Not enough information.  */
1872     return false;
1873 
1874   this_func = cgraph_node::local_info (current_function_decl);
1875   called_func = cgraph_node::local_info (decl);
1876   if (!called_func)
1877     return false;
1878   return !called_func->local || this_func->local;
1879 }
1880 
1881 /* Write a template for a trampoline to F.  */
1882 
1883 static void
1884 bfin_asm_trampoline_template (FILE *f)
1885 {
1886   if (TARGET_FDPIC)
1887     {
1888       fprintf (f, "\t.dd\t0x00000000\n");	/* 0 */
1889       fprintf (f, "\t.dd\t0x00000000\n");	/* 0 */
1890       fprintf (f, "\t.dd\t0x0000e109\n");	/* p1.l = fn low */
1891       fprintf (f, "\t.dd\t0x0000e149\n");	/* p1.h = fn high */
1892       fprintf (f, "\t.dd\t0x0000e10a\n");	/* p2.l = sc low */
1893       fprintf (f, "\t.dd\t0x0000e14a\n");	/* p2.h = sc high */
1894       fprintf (f, "\t.dw\t0xac4b\n");		/* p3 = [p1 + 4] */
1895       fprintf (f, "\t.dw\t0x9149\n");		/* p1 = [p1] */
1896       fprintf (f, "\t.dw\t0x0051\n");		/* jump (p1)*/
1897     }
1898   else
1899     {
1900       fprintf (f, "\t.dd\t0x0000e109\n");	/* p1.l = fn low */
1901       fprintf (f, "\t.dd\t0x0000e149\n");	/* p1.h = fn high */
1902       fprintf (f, "\t.dd\t0x0000e10a\n");	/* p2.l = sc low */
1903       fprintf (f, "\t.dd\t0x0000e14a\n");	/* p2.h = sc high */
1904       fprintf (f, "\t.dw\t0x0051\n");		/* jump (p1)*/
1905     }
1906 }
1907 
1908 /* Emit RTL insns to initialize the variable parts of a trampoline at
1909    M_TRAMP. FNDECL is the target function.  CHAIN_VALUE is an RTX for
1910    the static chain value for the function.  */
1911 
1912 static void
1913 bfin_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
1914 {
1915   rtx t1 = copy_to_reg (XEXP (DECL_RTL (fndecl), 0));
1916   rtx t2 = copy_to_reg (chain_value);
1917   rtx mem;
1918   int i = 0;
1919 
1920   emit_block_move (m_tramp, assemble_trampoline_template (),
1921 		   GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
1922 
1923   if (TARGET_FDPIC)
1924     {
1925       rtx a = force_reg (Pmode, plus_constant (Pmode, XEXP (m_tramp, 0), 8));
1926       mem = adjust_address (m_tramp, Pmode, 0);
1927       emit_move_insn (mem, a);
1928       i = 8;
1929     }
1930 
1931   mem = adjust_address (m_tramp, HImode, i + 2);
1932   emit_move_insn (mem, gen_lowpart (HImode, t1));
1933   emit_insn (gen_ashrsi3 (t1, t1, GEN_INT (16)));
1934   mem = adjust_address (m_tramp, HImode, i + 6);
1935   emit_move_insn (mem, gen_lowpart (HImode, t1));
1936 
1937   mem = adjust_address (m_tramp, HImode, i + 10);
1938   emit_move_insn (mem, gen_lowpart (HImode, t2));
1939   emit_insn (gen_ashrsi3 (t2, t2, GEN_INT (16)));
1940   mem = adjust_address (m_tramp, HImode, i + 14);
1941   emit_move_insn (mem, gen_lowpart (HImode, t2));
1942 }
1943 
1944 /* Emit insns to move operands[1] into operands[0].  */
1945 
1946 void
1947 emit_pic_move (rtx *operands, machine_mode mode ATTRIBUTE_UNUSED)
1948 {
1949   rtx temp = reload_in_progress ? operands[0] : gen_reg_rtx (Pmode);
1950 
1951   gcc_assert (!TARGET_FDPIC || !(reload_in_progress || reload_completed));
1952   if (GET_CODE (operands[0]) == MEM && SYMBOLIC_CONST (operands[1]))
1953     operands[1] = force_reg (SImode, operands[1]);
1954   else
1955     operands[1] = legitimize_pic_address (operands[1], temp,
1956 					  TARGET_FDPIC ? OUR_FDPIC_REG
1957 					  : pic_offset_table_rtx);
1958 }
1959 
1960 /* Expand a move operation in mode MODE.  The operands are in OPERANDS.
1961    Returns true if no further code must be generated, false if the caller
1962    should generate an insn to move OPERANDS[1] to OPERANDS[0].  */
1963 
1964 bool
1965 expand_move (rtx *operands, machine_mode mode)
1966 {
1967   rtx op = operands[1];
1968   if ((TARGET_ID_SHARED_LIBRARY || TARGET_FDPIC)
1969       && SYMBOLIC_CONST (op))
1970     emit_pic_move (operands, mode);
1971   else if (mode == SImode && GET_CODE (op) == CONST
1972 	   && GET_CODE (XEXP (op, 0)) == PLUS
1973 	   && GET_CODE (XEXP (XEXP (op, 0), 0)) == SYMBOL_REF
1974 	   && !targetm.legitimate_constant_p (mode, op))
1975     {
1976       rtx dest = operands[0];
1977       rtx op0, op1;
1978       gcc_assert (!reload_in_progress && !reload_completed);
1979       op = XEXP (op, 0);
1980       op0 = force_reg (mode, XEXP (op, 0));
1981       op1 = XEXP (op, 1);
1982       if (!insn_data[CODE_FOR_addsi3].operand[2].predicate (op1, mode))
1983 	op1 = force_reg (mode, op1);
1984       if (GET_CODE (dest) == MEM)
1985 	dest = gen_reg_rtx (mode);
1986       emit_insn (gen_addsi3 (dest, op0, op1));
1987       if (dest == operands[0])
1988 	return true;
1989       operands[1] = dest;
1990     }
1991   /* Don't generate memory->memory or constant->memory moves, go through a
1992      register */
1993   else if ((reload_in_progress | reload_completed) == 0
1994 	   && GET_CODE (operands[0]) == MEM
1995     	   && GET_CODE (operands[1]) != REG)
1996     operands[1] = force_reg (mode, operands[1]);
1997   return false;
1998 }
1999 
2000 /* Split one or more DImode RTL references into pairs of SImode
2001    references.  The RTL can be REG, offsettable MEM, integer constant, or
2002    CONST_DOUBLE.  "operands" is a pointer to an array of DImode RTL to
2003    split and "num" is its length.  lo_half and hi_half are output arrays
2004    that parallel "operands".  */
2005 
2006 void
2007 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
2008 {
2009   while (num--)
2010     {
2011       rtx op = operands[num];
2012 
2013       /* simplify_subreg refuse to split volatile memory addresses,
2014          but we still have to handle it.  */
2015       if (GET_CODE (op) == MEM)
2016 	{
2017 	  lo_half[num] = adjust_address (op, SImode, 0);
2018 	  hi_half[num] = adjust_address (op, SImode, 4);
2019 	}
2020       else
2021 	{
2022 	  lo_half[num] = simplify_gen_subreg (SImode, op,
2023 					      GET_MODE (op) == VOIDmode
2024 					      ? DImode : GET_MODE (op), 0);
2025 	  hi_half[num] = simplify_gen_subreg (SImode, op,
2026 					      GET_MODE (op) == VOIDmode
2027 					      ? DImode : GET_MODE (op), 4);
2028 	}
2029     }
2030 }
2031 
2032 bool
2033 bfin_longcall_p (rtx op, int call_cookie)
2034 {
2035   gcc_assert (GET_CODE (op) == SYMBOL_REF);
2036   if (SYMBOL_REF_WEAK (op))
2037     return 1;
2038   if (call_cookie & CALL_SHORT)
2039     return 0;
2040   if (call_cookie & CALL_LONG)
2041     return 1;
2042   if (TARGET_LONG_CALLS)
2043     return 1;
2044   return 0;
2045 }
2046 
2047 /* Expand a call instruction.  FNADDR is the call target, RETVAL the return value.
2048    COOKIE is a CONST_INT holding the call_cookie prepared init_cumulative_args.
2049    SIBCALL is nonzero if this is a sibling call.  */
2050 
2051 void
2052 bfin_expand_call (rtx retval, rtx fnaddr, rtx callarg1, rtx cookie, int sibcall)
2053 {
2054   rtx use = NULL, call;
2055   rtx callee = XEXP (fnaddr, 0);
2056   int nelts = 3;
2057   rtx pat;
2058   rtx picreg = get_hard_reg_initial_val (SImode, FDPIC_REGNO);
2059   rtx retsreg = gen_rtx_REG (Pmode, REG_RETS);
2060   int n;
2061 
2062   /* In an untyped call, we can get NULL for operand 2.  */
2063   if (cookie == NULL_RTX)
2064     cookie = const0_rtx;
2065 
2066   /* Static functions and indirect calls don't need the pic register.  */
2067   if (!TARGET_FDPIC && flag_pic
2068       && GET_CODE (callee) == SYMBOL_REF
2069       && !SYMBOL_REF_LOCAL_P (callee))
2070     use_reg (&use, pic_offset_table_rtx);
2071 
2072   if (TARGET_FDPIC)
2073     {
2074       int caller_in_sram, callee_in_sram;
2075 
2076       /* 0 is not in sram, 1 is in L1 sram, 2 is in L2 sram.  */
2077       caller_in_sram = callee_in_sram = 0;
2078 
2079       if (lookup_attribute ("l1_text",
2080 			    DECL_ATTRIBUTES (cfun->decl)) != NULL_TREE)
2081 	caller_in_sram = 1;
2082       else if (lookup_attribute ("l2",
2083 				 DECL_ATTRIBUTES (cfun->decl)) != NULL_TREE)
2084 	caller_in_sram = 2;
2085 
2086       if (GET_CODE (callee) == SYMBOL_REF
2087 	  && SYMBOL_REF_DECL (callee) && DECL_P (SYMBOL_REF_DECL (callee)))
2088 	{
2089 	  if (lookup_attribute
2090 	      ("l1_text",
2091 	       DECL_ATTRIBUTES (SYMBOL_REF_DECL (callee))) != NULL_TREE)
2092 	    callee_in_sram = 1;
2093 	  else if (lookup_attribute
2094 		   ("l2",
2095 		    DECL_ATTRIBUTES (SYMBOL_REF_DECL (callee))) != NULL_TREE)
2096 	    callee_in_sram = 2;
2097 	}
2098 
2099       if (GET_CODE (callee) != SYMBOL_REF
2100 	  || bfin_longcall_p (callee, INTVAL (cookie))
2101 	  || (GET_CODE (callee) == SYMBOL_REF
2102 	      && !SYMBOL_REF_LOCAL_P (callee)
2103 	      && TARGET_INLINE_PLT)
2104 	  || caller_in_sram != callee_in_sram
2105 	  || (caller_in_sram && callee_in_sram
2106 	      && (GET_CODE (callee) != SYMBOL_REF
2107 		  || !SYMBOL_REF_LOCAL_P (callee))))
2108 	{
2109 	  rtx addr = callee;
2110 	  if (! address_operand (addr, Pmode))
2111 	    addr = force_reg (Pmode, addr);
2112 
2113 	  fnaddr = gen_reg_rtx (SImode);
2114 	  emit_insn (gen_load_funcdescsi (fnaddr, addr));
2115 	  fnaddr = gen_rtx_MEM (Pmode, fnaddr);
2116 
2117 	  picreg = gen_reg_rtx (SImode);
2118 	  emit_insn (gen_load_funcdescsi (picreg,
2119 					  plus_constant (Pmode, addr, 4)));
2120 	}
2121 
2122       nelts++;
2123     }
2124   else if ((!register_no_elim_operand (callee, Pmode)
2125 	    && GET_CODE (callee) != SYMBOL_REF)
2126 	   || (GET_CODE (callee) == SYMBOL_REF
2127 	       && ((TARGET_ID_SHARED_LIBRARY && !TARGET_LEAF_ID_SHARED_LIBRARY)
2128 		   || bfin_longcall_p (callee, INTVAL (cookie)))))
2129     {
2130       callee = copy_to_mode_reg (Pmode, callee);
2131       fnaddr = gen_rtx_MEM (Pmode, callee);
2132     }
2133   call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
2134 
2135   if (retval)
2136     call = gen_rtx_SET (VOIDmode, retval, call);
2137 
2138   pat = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nelts));
2139   n = 0;
2140   XVECEXP (pat, 0, n++) = call;
2141   if (TARGET_FDPIC)
2142     XVECEXP (pat, 0, n++) = gen_rtx_USE (VOIDmode, picreg);
2143   XVECEXP (pat, 0, n++) = gen_rtx_USE (VOIDmode, cookie);
2144   if (sibcall)
2145     XVECEXP (pat, 0, n++) = ret_rtx;
2146   else
2147     XVECEXP (pat, 0, n++) = gen_rtx_CLOBBER (VOIDmode, retsreg);
2148   call = emit_call_insn (pat);
2149   if (use)
2150     CALL_INSN_FUNCTION_USAGE (call) = use;
2151 }
2152 
2153 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE.  */
2154 
2155 int
2156 hard_regno_mode_ok (int regno, machine_mode mode)
2157 {
2158   /* Allow only dregs to store value of mode HI or QI */
2159   enum reg_class rclass = REGNO_REG_CLASS (regno);
2160 
2161   if (mode == CCmode)
2162     return 0;
2163 
2164   if (mode == V2HImode)
2165     return D_REGNO_P (regno);
2166   if (rclass == CCREGS)
2167     return mode == BImode;
2168   if (mode == PDImode || mode == V2PDImode)
2169     return regno == REG_A0 || regno == REG_A1;
2170 
2171   /* Allow all normal 32-bit regs, except REG_M3, in case regclass ever comes
2172      up with a bad register class (such as ALL_REGS) for DImode.  */
2173   if (mode == DImode)
2174     return regno < REG_M3;
2175 
2176   if (mode == SImode
2177       && TEST_HARD_REG_BIT (reg_class_contents[PROLOGUE_REGS], regno))
2178     return 1;
2179 
2180   return TEST_HARD_REG_BIT (reg_class_contents[MOST_REGS], regno);
2181 }
2182 
2183 /* Implements target hook vector_mode_supported_p.  */
2184 
2185 static bool
2186 bfin_vector_mode_supported_p (machine_mode mode)
2187 {
2188   return mode == V2HImode;
2189 }
2190 
2191 /* Worker function for TARGET_REGISTER_MOVE_COST.  */
2192 
2193 static int
2194 bfin_register_move_cost (machine_mode mode,
2195 			 reg_class_t class1, reg_class_t class2)
2196 {
2197   /* These need secondary reloads, so they're more expensive.  */
2198   if ((class1 == CCREGS && !reg_class_subset_p (class2, DREGS))
2199       || (class2 == CCREGS && !reg_class_subset_p (class1, DREGS)))
2200     return 4;
2201 
2202   /* If optimizing for size, always prefer reg-reg over reg-memory moves.  */
2203   if (optimize_size)
2204     return 2;
2205 
2206   if (GET_MODE_CLASS (mode) == MODE_INT)
2207     {
2208       /* Discourage trying to use the accumulators.  */
2209       if (TEST_HARD_REG_BIT (reg_class_contents[class1], REG_A0)
2210 	  || TEST_HARD_REG_BIT (reg_class_contents[class1], REG_A1)
2211 	  || TEST_HARD_REG_BIT (reg_class_contents[class2], REG_A0)
2212 	  || TEST_HARD_REG_BIT (reg_class_contents[class2], REG_A1))
2213 	return 20;
2214     }
2215   return 2;
2216 }
2217 
2218 /* Worker function for TARGET_MEMORY_MOVE_COST.
2219 
2220    ??? In theory L1 memory has single-cycle latency.  We should add a switch
2221    that tells the compiler whether we expect to use only L1 memory for the
2222    program; it'll make the costs more accurate.  */
2223 
2224 static int
2225 bfin_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
2226 		       reg_class_t rclass,
2227 		       bool in ATTRIBUTE_UNUSED)
2228 {
2229   /* Make memory accesses slightly more expensive than any register-register
2230      move.  Also, penalize non-DP registers, since they need secondary
2231      reloads to load and store.  */
2232   if (! reg_class_subset_p (rclass, DPREGS))
2233     return 10;
2234 
2235   return 8;
2236 }
2237 
2238 /* Inform reload about cases where moving X with a mode MODE to a register in
2239    RCLASS requires an extra scratch register.  Return the class needed for the
2240    scratch register.  */
2241 
2242 static reg_class_t
2243 bfin_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
2244 		       machine_mode mode, secondary_reload_info *sri)
2245 {
2246   /* If we have HImode or QImode, we can only use DREGS as secondary registers;
2247      in most other cases we can also use PREGS.  */
2248   enum reg_class default_class = GET_MODE_SIZE (mode) >= 4 ? DPREGS : DREGS;
2249   enum reg_class x_class = NO_REGS;
2250   enum rtx_code code = GET_CODE (x);
2251   enum reg_class rclass = (enum reg_class) rclass_i;
2252 
2253   if (code == SUBREG)
2254     x = SUBREG_REG (x), code = GET_CODE (x);
2255   if (REG_P (x))
2256     {
2257       int regno = REGNO (x);
2258       if (regno >= FIRST_PSEUDO_REGISTER)
2259 	regno = reg_renumber[regno];
2260 
2261       if (regno == -1)
2262 	code = MEM;
2263       else
2264 	x_class = REGNO_REG_CLASS (regno);
2265     }
2266 
2267   /* We can be asked to reload (plus (FP) (large_constant)) into a DREG.
2268      This happens as a side effect of register elimination, and we need
2269      a scratch register to do it.  */
2270   if (fp_plus_const_operand (x, mode))
2271     {
2272       rtx op2 = XEXP (x, 1);
2273       int large_constant_p = ! satisfies_constraint_Ks7 (op2);
2274 
2275       if (rclass == PREGS || rclass == PREGS_CLOBBERED)
2276 	return NO_REGS;
2277       /* If destination is a DREG, we can do this without a scratch register
2278 	 if the constant is valid for an add instruction.  */
2279       if ((rclass == DREGS || rclass == DPREGS)
2280 	  && ! large_constant_p)
2281 	return NO_REGS;
2282       /* Reloading to anything other than a DREG?  Use a PREG scratch
2283 	 register.  */
2284       sri->icode = CODE_FOR_reload_insi;
2285       return NO_REGS;
2286     }
2287 
2288   /* Data can usually be moved freely between registers of most classes.
2289      AREGS are an exception; they can only move to or from another register
2290      in AREGS or one in DREGS.  They can also be assigned the constant 0.  */
2291   if (x_class == AREGS || x_class == EVEN_AREGS || x_class == ODD_AREGS)
2292     return (rclass == DREGS || rclass == AREGS || rclass == EVEN_AREGS
2293 	    || rclass == ODD_AREGS
2294 	    ? NO_REGS : DREGS);
2295 
2296   if (rclass == AREGS || rclass == EVEN_AREGS || rclass == ODD_AREGS)
2297     {
2298       if (code == MEM)
2299 	{
2300 	  sri->icode = in_p ? CODE_FOR_reload_inpdi : CODE_FOR_reload_outpdi;
2301 	  return NO_REGS;
2302 	}
2303 
2304       if (x != const0_rtx && x_class != DREGS)
2305 	{
2306 	  return DREGS;
2307 	}
2308       else
2309 	return NO_REGS;
2310     }
2311 
2312   /* CCREGS can only be moved from/to DREGS.  */
2313   if (rclass == CCREGS && x_class != DREGS)
2314     return DREGS;
2315   if (x_class == CCREGS && rclass != DREGS)
2316     return DREGS;
2317 
2318   /* All registers other than AREGS can load arbitrary constants.  The only
2319      case that remains is MEM.  */
2320   if (code == MEM)
2321     if (! reg_class_subset_p (rclass, default_class))
2322       return default_class;
2323 
2324   return NO_REGS;
2325 }
2326 
2327 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.  */
2328 
2329 static bool
2330 bfin_class_likely_spilled_p (reg_class_t rclass)
2331 {
2332   switch (rclass)
2333     {
2334       case PREGS_CLOBBERED:
2335       case PROLOGUE_REGS:
2336       case P0REGS:
2337       case D0REGS:
2338       case D1REGS:
2339       case D2REGS:
2340       case CCREGS:
2341         return true;
2342 
2343       default:
2344         break;
2345     }
2346 
2347   return false;
2348 }
2349 
2350 static struct machine_function *
2351 bfin_init_machine_status (void)
2352 {
2353   return ggc_cleared_alloc<machine_function> ();
2354 }
2355 
2356 /* Implement the TARGET_OPTION_OVERRIDE hook.  */
2357 
2358 static void
2359 bfin_option_override (void)
2360 {
2361   /* If processor type is not specified, enable all workarounds.  */
2362   if (bfin_cpu_type == BFIN_CPU_UNKNOWN)
2363     {
2364       int i;
2365 
2366       for (i = 0; bfin_cpus[i].name != NULL; i++)
2367 	bfin_workarounds |= bfin_cpus[i].workarounds;
2368 
2369       bfin_si_revision = 0xffff;
2370     }
2371 
2372   if (bfin_csync_anomaly == 1)
2373     bfin_workarounds |= WA_SPECULATIVE_SYNCS;
2374   else if (bfin_csync_anomaly == 0)
2375     bfin_workarounds &= ~WA_SPECULATIVE_SYNCS;
2376 
2377   if (bfin_specld_anomaly == 1)
2378     bfin_workarounds |= WA_SPECULATIVE_LOADS;
2379   else if (bfin_specld_anomaly == 0)
2380     bfin_workarounds &= ~WA_SPECULATIVE_LOADS;
2381 
2382   if (TARGET_OMIT_LEAF_FRAME_POINTER)
2383     flag_omit_frame_pointer = 1;
2384 
2385 #ifdef SUBTARGET_FDPIC_NOT_SUPPORTED
2386   if (TARGET_FDPIC)
2387     error ("-mfdpic is not supported, please use a bfin-linux-uclibc target");
2388 #endif
2389 
2390   /* Library identification */
2391   if (global_options_set.x_bfin_library_id && ! TARGET_ID_SHARED_LIBRARY)
2392     error ("-mshared-library-id= specified without -mid-shared-library");
2393 
2394   if (stack_limit_rtx && TARGET_FDPIC)
2395     {
2396       warning (0, "-fstack-limit- options are ignored with -mfdpic; use -mstack-check-l1");
2397       stack_limit_rtx = NULL_RTX;
2398     }
2399 
2400   if (stack_limit_rtx && TARGET_STACK_CHECK_L1)
2401     error ("can%'t use multiple stack checking methods together");
2402 
2403   if (TARGET_ID_SHARED_LIBRARY && TARGET_FDPIC)
2404     error ("ID shared libraries and FD-PIC mode can%'t be used together");
2405 
2406   /* Don't allow the user to specify -mid-shared-library and -msep-data
2407      together, as it makes little sense from a user's point of view...  */
2408   if (TARGET_SEP_DATA && TARGET_ID_SHARED_LIBRARY)
2409     error ("cannot specify both -msep-data and -mid-shared-library");
2410   /* ... internally, however, it's nearly the same.  */
2411   if (TARGET_SEP_DATA)
2412     target_flags |= MASK_ID_SHARED_LIBRARY | MASK_LEAF_ID_SHARED_LIBRARY;
2413 
2414   if (TARGET_ID_SHARED_LIBRARY && flag_pic == 0)
2415     flag_pic = 1;
2416 
2417   /* There is no single unaligned SI op for PIC code.  Sometimes we
2418      need to use ".4byte" and sometimes we need to use ".picptr".
2419      See bfin_assemble_integer for details.  */
2420   if (TARGET_FDPIC)
2421     targetm.asm_out.unaligned_op.si = 0;
2422 
2423   /* Silently turn off flag_pic if not doing FDPIC or ID shared libraries,
2424      since we don't support it and it'll just break.  */
2425   if (flag_pic && !TARGET_FDPIC && !TARGET_ID_SHARED_LIBRARY)
2426     flag_pic = 0;
2427 
2428   if (TARGET_MULTICORE && bfin_cpu_type != BFIN_CPU_BF561)
2429     error ("-mmulticore can only be used with BF561");
2430 
2431   if (TARGET_COREA && !TARGET_MULTICORE)
2432     error ("-mcorea should be used with -mmulticore");
2433 
2434   if (TARGET_COREB && !TARGET_MULTICORE)
2435     error ("-mcoreb should be used with -mmulticore");
2436 
2437   if (TARGET_COREA && TARGET_COREB)
2438     error ("-mcorea and -mcoreb can%'t be used together");
2439 
2440   flag_schedule_insns = 0;
2441 
2442   init_machine_status = bfin_init_machine_status;
2443 }
2444 
2445 /* Return the destination address of BRANCH.
2446    We need to use this instead of get_attr_length, because the
2447    cbranch_with_nops pattern conservatively sets its length to 6, and
2448    we still prefer to use shorter sequences.  */
2449 
2450 static int
2451 branch_dest (rtx_insn *branch)
2452 {
2453   rtx dest;
2454   int dest_uid;
2455   rtx pat = PATTERN (branch);
2456   if (GET_CODE (pat) == PARALLEL)
2457     pat = XVECEXP (pat, 0, 0);
2458   dest = SET_SRC (pat);
2459   if (GET_CODE (dest) == IF_THEN_ELSE)
2460     dest = XEXP (dest, 1);
2461   dest = XEXP (dest, 0);
2462   dest_uid = INSN_UID (dest);
2463   return INSN_ADDRESSES (dest_uid);
2464 }
2465 
2466 /* Return nonzero if INSN is annotated with a REG_BR_PROB note that indicates
2467    it's a branch that's predicted taken.  */
2468 
2469 static int
2470 cbranch_predicted_taken_p (rtx insn)
2471 {
2472   rtx x = find_reg_note (insn, REG_BR_PROB, 0);
2473 
2474   if (x)
2475     {
2476       int pred_val = XINT (x, 0);
2477 
2478       return pred_val >= REG_BR_PROB_BASE / 2;
2479     }
2480 
2481   return 0;
2482 }
2483 
2484 /* Templates for use by asm_conditional_branch.  */
2485 
2486 static const char *ccbranch_templates[][3] = {
2487   { "if !cc jump %3;",  "if cc jump 4 (bp); jump.s %3;",  "if cc jump 6 (bp); jump.l %3;" },
2488   { "if cc jump %3;",   "if !cc jump 4 (bp); jump.s %3;", "if !cc jump 6 (bp); jump.l %3;" },
2489   { "if !cc jump %3 (bp);",  "if cc jump 4; jump.s %3;",  "if cc jump 6; jump.l %3;" },
2490   { "if cc jump %3 (bp);",  "if !cc jump 4; jump.s %3;",  "if !cc jump 6; jump.l %3;" },
2491 };
2492 
2493 /* Output INSN, which is a conditional branch instruction with operands
2494    OPERANDS.
2495 
2496    We deal with the various forms of conditional branches that can be generated
2497    by bfin_reorg to prevent the hardware from doing speculative loads, by
2498    - emitting a sufficient number of nops, if N_NOPS is nonzero, or
2499    - always emitting the branch as predicted taken, if PREDICT_TAKEN is true.
2500    Either of these is only necessary if the branch is short, otherwise the
2501    template we use ends in an unconditional jump which flushes the pipeline
2502    anyway.  */
2503 
2504 void
2505 asm_conditional_branch (rtx_insn *insn, rtx *operands, int n_nops, int predict_taken)
2506 {
2507   int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
2508   /* Note : offset for instructions like if cc jmp; jump.[sl] offset
2509             is to be taken from start of if cc rather than jump.
2510             Range for jump.s is (-4094, 4096) instead of (-4096, 4094)
2511   */
2512   int len = (offset >= -1024 && offset <= 1022 ? 0
2513 	     : offset >= -4094 && offset <= 4096 ? 1
2514 	     : 2);
2515   int bp = predict_taken && len == 0 ? 1 : cbranch_predicted_taken_p (insn);
2516   int idx = (bp << 1) | (GET_CODE (operands[0]) == EQ ? BRF : BRT);
2517   output_asm_insn (ccbranch_templates[idx][len], operands);
2518   gcc_assert (n_nops == 0 || !bp);
2519   if (len == 0)
2520     while (n_nops-- > 0)
2521       output_asm_insn ("nop;", NULL);
2522 }
2523 
2524 /* Emit rtl for a comparison operation CMP in mode MODE.  Operands have been
2525    stored in bfin_compare_op0 and bfin_compare_op1 already.  */
2526 
2527 rtx
2528 bfin_gen_compare (rtx cmp, machine_mode mode ATTRIBUTE_UNUSED)
2529 {
2530   enum rtx_code code1, code2;
2531   rtx op0 = XEXP (cmp, 0), op1 = XEXP (cmp, 1);
2532   rtx tem = bfin_cc_rtx;
2533   enum rtx_code code = GET_CODE (cmp);
2534 
2535   /* If we have a BImode input, then we already have a compare result, and
2536      do not need to emit another comparison.  */
2537   if (GET_MODE (op0) == BImode)
2538     {
2539       gcc_assert ((code == NE || code == EQ) && op1 == const0_rtx);
2540       tem = op0, code2 = code;
2541     }
2542   else
2543     {
2544       switch (code) {
2545 	/* bfin has these conditions */
2546       case EQ:
2547       case LT:
2548       case LE:
2549       case LEU:
2550       case LTU:
2551 	code1 = code;
2552 	code2 = NE;
2553 	break;
2554       default:
2555 	code1 = reverse_condition (code);
2556 	code2 = EQ;
2557 	break;
2558       }
2559       emit_insn (gen_rtx_SET (VOIDmode, tem,
2560 			      gen_rtx_fmt_ee (code1, BImode, op0, op1)));
2561     }
2562 
2563   return gen_rtx_fmt_ee (code2, BImode, tem, CONST0_RTX (BImode));
2564 }
2565 
2566 /* Return nonzero iff C has exactly one bit set if it is interpreted
2567    as a 32-bit constant.  */
2568 
2569 int
2570 log2constp (unsigned HOST_WIDE_INT c)
2571 {
2572   c &= 0xFFFFFFFF;
2573   return c != 0 && (c & (c-1)) == 0;
2574 }
2575 
2576 /* Returns the number of consecutive least significant zeros in the binary
2577    representation of *V.
2578    We modify *V to contain the original value arithmetically shifted right by
2579    the number of zeroes.  */
2580 
2581 static int
2582 shiftr_zero (HOST_WIDE_INT *v)
2583 {
2584   unsigned HOST_WIDE_INT tmp = *v;
2585   unsigned HOST_WIDE_INT sgn;
2586   int n = 0;
2587 
2588   if (tmp == 0)
2589     return 0;
2590 
2591   sgn = tmp & ((unsigned HOST_WIDE_INT) 1 << (HOST_BITS_PER_WIDE_INT - 1));
2592   while ((tmp & 0x1) == 0 && n <= 32)
2593     {
2594       tmp = (tmp >> 1) | sgn;
2595       n++;
2596     }
2597   *v = tmp;
2598   return n;
2599 }
2600 
2601 /* After reload, split the load of an immediate constant.  OPERANDS are the
2602    operands of the movsi_insn pattern which we are splitting.  We return
2603    nonzero if we emitted a sequence to load the constant, zero if we emitted
2604    nothing because we want to use the splitter's default sequence.  */
2605 
2606 int
2607 split_load_immediate (rtx operands[])
2608 {
2609   HOST_WIDE_INT val = INTVAL (operands[1]);
2610   HOST_WIDE_INT tmp;
2611   HOST_WIDE_INT shifted = val;
2612   HOST_WIDE_INT shifted_compl = ~val;
2613   int num_zero = shiftr_zero (&shifted);
2614   int num_compl_zero = shiftr_zero (&shifted_compl);
2615   unsigned int regno = REGNO (operands[0]);
2616 
2617   /* This case takes care of single-bit set/clear constants, which we could
2618      also implement with BITSET/BITCLR.  */
2619   if (num_zero
2620       && shifted >= -32768 && shifted < 65536
2621       && (D_REGNO_P (regno)
2622 	  || (regno >= REG_P0 && regno <= REG_P7 && num_zero <= 2)))
2623     {
2624       emit_insn (gen_movsi (operands[0], gen_int_mode (shifted, SImode)));
2625       emit_insn (gen_ashlsi3 (operands[0], operands[0], GEN_INT (num_zero)));
2626       return 1;
2627     }
2628 
2629   tmp = val & 0xFFFF;
2630   tmp |= -(tmp & 0x8000);
2631 
2632   /* If high word has one bit set or clear, try to use a bit operation.  */
2633   if (D_REGNO_P (regno))
2634     {
2635       if (log2constp (val & 0xFFFF0000))
2636 	{
2637 	  emit_insn (gen_movsi (operands[0], GEN_INT (val & 0xFFFF)));
2638 	  emit_insn (gen_iorsi3 (operands[0], operands[0],
2639 				 gen_int_mode (val & 0xFFFF0000, SImode)));
2640 	  return 1;
2641 	}
2642       else if (log2constp (val | 0xFFFF) && (val & 0x8000) != 0)
2643 	{
2644 	  emit_insn (gen_movsi (operands[0], GEN_INT (tmp)));
2645 	  emit_insn (gen_andsi3 (operands[0], operands[0],
2646 				 gen_int_mode (val | 0xFFFF, SImode)));
2647 	}
2648     }
2649 
2650   if (D_REGNO_P (regno))
2651     {
2652       if (tmp >= -64 && tmp <= 63)
2653 	{
2654 	  emit_insn (gen_movsi (operands[0], GEN_INT (tmp)));
2655 	  emit_insn (gen_movstricthi_high (operands[0],
2656 					   gen_int_mode (val & -65536,
2657 							 SImode)));
2658 	  return 1;
2659 	}
2660 
2661       if ((val & 0xFFFF0000) == 0)
2662 	{
2663 	  emit_insn (gen_movsi (operands[0], const0_rtx));
2664 	  emit_insn (gen_movsi_low (operands[0], operands[0], operands[1]));
2665 	  return 1;
2666 	}
2667 
2668       if ((val & 0xFFFF0000) == 0xFFFF0000)
2669 	{
2670 	  emit_insn (gen_movsi (operands[0], constm1_rtx));
2671 	  emit_insn (gen_movsi_low (operands[0], operands[0], operands[1]));
2672 	  return 1;
2673 	}
2674     }
2675 
2676   /* Need DREGs for the remaining case.  */
2677   if (regno > REG_R7)
2678     return 0;
2679 
2680   if (optimize_size
2681       && num_compl_zero && shifted_compl >= -64 && shifted_compl <= 63)
2682     {
2683       /* If optimizing for size, generate a sequence that has more instructions
2684 	 but is shorter.  */
2685       emit_insn (gen_movsi (operands[0], gen_int_mode (shifted_compl, SImode)));
2686       emit_insn (gen_ashlsi3 (operands[0], operands[0],
2687 			      GEN_INT (num_compl_zero)));
2688       emit_insn (gen_one_cmplsi2 (operands[0], operands[0]));
2689       return 1;
2690     }
2691   return 0;
2692 }
2693 
2694 /* Return true if the legitimate memory address for a memory operand of mode
2695    MODE.  Return false if not.  */
2696 
2697 static bool
2698 bfin_valid_add (machine_mode mode, HOST_WIDE_INT value)
2699 {
2700   unsigned HOST_WIDE_INT v = value > 0 ? value : -value;
2701   int sz = GET_MODE_SIZE (mode);
2702   int shift = sz == 1 ? 0 : sz == 2 ? 1 : 2;
2703   /* The usual offsettable_memref machinery doesn't work so well for this
2704      port, so we deal with the problem here.  */
2705   if (value > 0 && sz == 8)
2706     v += 4;
2707   return (v & ~(0x7fff << shift)) == 0;
2708 }
2709 
2710 static bool
2711 bfin_valid_reg_p (unsigned int regno, int strict, machine_mode mode,
2712 		  enum rtx_code outer_code)
2713 {
2714   if (strict)
2715     return REGNO_OK_FOR_BASE_STRICT_P (regno, mode, outer_code, SCRATCH);
2716   else
2717     return REGNO_OK_FOR_BASE_NONSTRICT_P (regno, mode, outer_code, SCRATCH);
2718 }
2719 
2720 /* Recognize an RTL expression that is a valid memory address for an
2721    instruction.  The MODE argument is the machine mode for the MEM expression
2722    that wants to use this address.
2723 
2724    Blackfin addressing modes are as follows:
2725 
2726       [preg]
2727       [preg + imm16]
2728 
2729       B [ Preg + uimm15 ]
2730       W [ Preg + uimm16m2 ]
2731       [ Preg + uimm17m4 ]
2732 
2733       [preg++]
2734       [preg--]
2735       [--sp]
2736 */
2737 
2738 static bool
2739 bfin_legitimate_address_p (machine_mode mode, rtx x, bool strict)
2740 {
2741   switch (GET_CODE (x)) {
2742   case REG:
2743     if (bfin_valid_reg_p (REGNO (x), strict, mode, MEM))
2744       return true;
2745     break;
2746   case PLUS:
2747     if (REG_P (XEXP (x, 0))
2748 	&& bfin_valid_reg_p (REGNO (XEXP (x, 0)), strict, mode, PLUS)
2749 	&& ((GET_CODE (XEXP (x, 1)) == UNSPEC && mode == SImode)
2750 	    || (GET_CODE (XEXP (x, 1)) == CONST_INT
2751 		&& bfin_valid_add (mode, INTVAL (XEXP (x, 1))))))
2752       return true;
2753     break;
2754   case POST_INC:
2755   case POST_DEC:
2756     if (LEGITIMATE_MODE_FOR_AUTOINC_P (mode)
2757 	&& REG_P (XEXP (x, 0))
2758 	&& bfin_valid_reg_p (REGNO (XEXP (x, 0)), strict, mode, POST_INC))
2759       return true;
2760   case PRE_DEC:
2761     if (LEGITIMATE_MODE_FOR_AUTOINC_P (mode)
2762 	&& XEXP (x, 0) == stack_pointer_rtx
2763 	&& REG_P (XEXP (x, 0))
2764 	&& bfin_valid_reg_p (REGNO (XEXP (x, 0)), strict, mode, PRE_DEC))
2765       return true;
2766     break;
2767   default:
2768     break;
2769   }
2770   return false;
2771 }
2772 
2773 /* Decide whether we can force certain constants to memory.  If we
2774    decide we can't, the caller should be able to cope with it in
2775    another way.  */
2776 
2777 static bool
2778 bfin_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED,
2779 			     rtx x ATTRIBUTE_UNUSED)
2780 {
2781   /* We have only one class of non-legitimate constants, and our movsi
2782      expander knows how to handle them.  Dropping these constants into the
2783      data section would only shift the problem - we'd still get relocs
2784      outside the object, in the data section rather than the text section.  */
2785   return true;
2786 }
2787 
2788 /* Ensure that for any constant of the form symbol + offset, the offset
2789    remains within the object.  Any other constants are ok.
2790    This ensures that flat binaries never have to deal with relocations
2791    crossing section boundaries.  */
2792 
2793 static bool
2794 bfin_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
2795 {
2796   rtx sym;
2797   HOST_WIDE_INT offset;
2798 
2799   if (GET_CODE (x) != CONST)
2800     return true;
2801 
2802   x = XEXP (x, 0);
2803   gcc_assert (GET_CODE (x) == PLUS);
2804 
2805   sym = XEXP (x, 0);
2806   x = XEXP (x, 1);
2807   if (GET_CODE (sym) != SYMBOL_REF
2808       || GET_CODE (x) != CONST_INT)
2809     return true;
2810   offset = INTVAL (x);
2811 
2812   if (SYMBOL_REF_DECL (sym) == 0)
2813     return true;
2814   if (offset < 0
2815       || offset >= int_size_in_bytes (TREE_TYPE (SYMBOL_REF_DECL (sym))))
2816     return false;
2817 
2818   return true;
2819 }
2820 
2821 static bool
2822 bfin_rtx_costs (rtx x, int code_i, int outer_code_i, int opno, int *total,
2823 		bool speed)
2824 {
2825   enum rtx_code code = (enum rtx_code) code_i;
2826   enum rtx_code outer_code = (enum rtx_code) outer_code_i;
2827   int cost2 = COSTS_N_INSNS (1);
2828   rtx op0, op1;
2829 
2830   switch (code)
2831     {
2832     case CONST_INT:
2833       if (outer_code == SET || outer_code == PLUS)
2834         *total = satisfies_constraint_Ks7 (x) ? 0 : cost2;
2835       else if (outer_code == AND)
2836         *total = log2constp (~INTVAL (x)) ? 0 : cost2;
2837       else if (outer_code == LE || outer_code == LT || outer_code == EQ)
2838         *total = (INTVAL (x) >= -4 && INTVAL (x) <= 3) ? 0 : cost2;
2839       else if (outer_code == LEU || outer_code == LTU)
2840         *total = (INTVAL (x) >= 0 && INTVAL (x) <= 7) ? 0 : cost2;
2841       else if (outer_code == MULT)
2842         *total = (INTVAL (x) == 2 || INTVAL (x) == 4) ? 0 : cost2;
2843       else if (outer_code == ASHIFT && (INTVAL (x) == 1 || INTVAL (x) == 2))
2844         *total = 0;
2845       else if (outer_code == ASHIFT || outer_code == ASHIFTRT
2846 	       || outer_code == LSHIFTRT)
2847         *total = (INTVAL (x) >= 0 && INTVAL (x) <= 31) ? 0 : cost2;
2848       else if (outer_code == IOR || outer_code == XOR)
2849         *total = (INTVAL (x) & (INTVAL (x) - 1)) == 0 ? 0 : cost2;
2850       else
2851 	*total = cost2;
2852       return true;
2853 
2854     case CONST:
2855     case LABEL_REF:
2856     case SYMBOL_REF:
2857     case CONST_DOUBLE:
2858       *total = COSTS_N_INSNS (2);
2859       return true;
2860 
2861     case PLUS:
2862       op0 = XEXP (x, 0);
2863       op1 = XEXP (x, 1);
2864       if (GET_MODE (x) == SImode)
2865 	{
2866 	  if (GET_CODE (op0) == MULT
2867 	      && GET_CODE (XEXP (op0, 1)) == CONST_INT)
2868 	    {
2869 	      HOST_WIDE_INT val = INTVAL (XEXP (op0, 1));
2870 	      if (val == 2 || val == 4)
2871 		{
2872 		  *total = cost2;
2873 		  *total += rtx_cost (XEXP (op0, 0), outer_code, opno, speed);
2874 		  *total += rtx_cost (op1, outer_code, opno, speed);
2875 		  return true;
2876 		}
2877 	    }
2878 	  *total = cost2;
2879 	  if (GET_CODE (op0) != REG
2880 	      && (GET_CODE (op0) != SUBREG || GET_CODE (SUBREG_REG (op0)) != REG))
2881 	    *total += set_src_cost (op0, speed);
2882 #if 0 /* We'd like to do this for accuracy, but it biases the loop optimizer
2883 	 towards creating too many induction variables.  */
2884 	  if (!reg_or_7bit_operand (op1, SImode))
2885 	    *total += set_src_cost (op1, speed);
2886 #endif
2887 	}
2888       else if (GET_MODE (x) == DImode)
2889 	{
2890 	  *total = 6 * cost2;
2891 	  if (GET_CODE (op1) != CONST_INT
2892 	      || !satisfies_constraint_Ks7 (op1))
2893 	    *total += rtx_cost (op1, PLUS, 1, speed);
2894 	  if (GET_CODE (op0) != REG
2895 	      && (GET_CODE (op0) != SUBREG || GET_CODE (SUBREG_REG (op0)) != REG))
2896 	    *total += rtx_cost (op0, PLUS, 0, speed);
2897 	}
2898       return true;
2899 
2900     case MINUS:
2901       if (GET_MODE (x) == DImode)
2902 	*total = 6 * cost2;
2903       else
2904 	*total = cost2;
2905       return true;
2906 
2907     case ASHIFT:
2908     case ASHIFTRT:
2909     case LSHIFTRT:
2910       if (GET_MODE (x) == DImode)
2911 	*total = 6 * cost2;
2912       else
2913 	*total = cost2;
2914 
2915       op0 = XEXP (x, 0);
2916       op1 = XEXP (x, 1);
2917       if (GET_CODE (op0) != REG
2918 	  && (GET_CODE (op0) != SUBREG || GET_CODE (SUBREG_REG (op0)) != REG))
2919 	*total += rtx_cost (op0, code, 0, speed);
2920 
2921       return true;
2922 
2923     case IOR:
2924     case AND:
2925     case XOR:
2926       op0 = XEXP (x, 0);
2927       op1 = XEXP (x, 1);
2928 
2929       /* Handle special cases of IOR: rotates, ALIGN insns, movstricthi_high.  */
2930       if (code == IOR)
2931 	{
2932 	  if ((GET_CODE (op0) == LSHIFTRT && GET_CODE (op1) == ASHIFT)
2933 	      || (GET_CODE (op0) == ASHIFT && GET_CODE (op1) == ZERO_EXTEND)
2934 	      || (GET_CODE (op0) == ASHIFT && GET_CODE (op1) == LSHIFTRT)
2935 	      || (GET_CODE (op0) == AND && GET_CODE (op1) == CONST_INT))
2936 	    {
2937 	      *total = cost2;
2938 	      return true;
2939 	    }
2940 	}
2941 
2942       if (GET_CODE (op0) != REG
2943 	  && (GET_CODE (op0) != SUBREG || GET_CODE (SUBREG_REG (op0)) != REG))
2944 	*total += rtx_cost (op0, code, 0, speed);
2945 
2946       if (GET_MODE (x) == DImode)
2947 	{
2948 	  *total = 2 * cost2;
2949 	  return true;
2950 	}
2951       *total = cost2;
2952       if (GET_MODE (x) != SImode)
2953 	return true;
2954 
2955       if (code == AND)
2956 	{
2957 	  if (! rhs_andsi3_operand (XEXP (x, 1), SImode))
2958 	    *total += rtx_cost (XEXP (x, 1), code, 1, speed);
2959 	}
2960       else
2961 	{
2962 	  if (! regorlog2_operand (XEXP (x, 1), SImode))
2963 	    *total += rtx_cost (XEXP (x, 1), code, 1, speed);
2964 	}
2965 
2966       return true;
2967 
2968     case ZERO_EXTRACT:
2969     case SIGN_EXTRACT:
2970       if (outer_code == SET
2971 	  && XEXP (x, 1) == const1_rtx
2972 	  && GET_CODE (XEXP (x, 2)) == CONST_INT)
2973 	{
2974 	  *total = 2 * cost2;
2975 	  return true;
2976 	}
2977       /* fall through */
2978 
2979     case SIGN_EXTEND:
2980     case ZERO_EXTEND:
2981       *total = cost2;
2982       return true;
2983 
2984     case MULT:
2985 	{
2986 	  op0 = XEXP (x, 0);
2987 	  op1 = XEXP (x, 1);
2988 	  if (GET_CODE (op0) == GET_CODE (op1)
2989 	      && (GET_CODE (op0) == ZERO_EXTEND
2990 		  || GET_CODE (op0) == SIGN_EXTEND))
2991 	    {
2992 	      *total = COSTS_N_INSNS (1);
2993 	      op0 = XEXP (op0, 0);
2994 	      op1 = XEXP (op1, 0);
2995 	    }
2996 	  else if (!speed)
2997 	    *total = COSTS_N_INSNS (1);
2998 	  else
2999 	    *total = COSTS_N_INSNS (3);
3000 
3001 	  if (GET_CODE (op0) != REG
3002 	      && (GET_CODE (op0) != SUBREG || GET_CODE (SUBREG_REG (op0)) != REG))
3003 	    *total += rtx_cost (op0, MULT, 0, speed);
3004 	  if (GET_CODE (op1) != REG
3005 	      && (GET_CODE (op1) != SUBREG || GET_CODE (SUBREG_REG (op1)) != REG))
3006 	    *total += rtx_cost (op1, MULT, 1, speed);
3007 	}
3008       return true;
3009 
3010     case UDIV:
3011     case UMOD:
3012       *total = COSTS_N_INSNS (32);
3013       return true;
3014 
3015     case VEC_CONCAT:
3016     case VEC_SELECT:
3017       if (outer_code == SET)
3018 	*total = cost2;
3019       return true;
3020 
3021     default:
3022       return false;
3023     }
3024 }
3025 
3026 /* Used for communication between {push,pop}_multiple_operation (which
3027    we use not only as a predicate) and the corresponding output functions.  */
3028 static int first_preg_to_save, first_dreg_to_save;
3029 static int n_regs_to_save;
3030 
3031 int
3032 analyze_push_multiple_operation (rtx op)
3033 {
3034   int lastdreg = 8, lastpreg = 6;
3035   int i, group;
3036 
3037   first_preg_to_save = lastpreg;
3038   first_dreg_to_save = lastdreg;
3039   for (i = 1, group = 0; i < XVECLEN (op, 0) - 1; i++)
3040     {
3041       rtx t = XVECEXP (op, 0, i);
3042       rtx src, dest;
3043       int regno;
3044 
3045       if (GET_CODE (t) != SET)
3046 	return 0;
3047 
3048       src = SET_SRC (t);
3049       dest = SET_DEST (t);
3050       if (GET_CODE (dest) != MEM || ! REG_P (src))
3051 	return 0;
3052       dest = XEXP (dest, 0);
3053       if (GET_CODE (dest) != PLUS
3054 	  || ! REG_P (XEXP (dest, 0))
3055 	  || REGNO (XEXP (dest, 0)) != REG_SP
3056 	  || GET_CODE (XEXP (dest, 1)) != CONST_INT
3057 	  || INTVAL (XEXP (dest, 1)) != -i * 4)
3058 	return 0;
3059 
3060       regno = REGNO (src);
3061       if (group == 0)
3062 	{
3063 	  if (D_REGNO_P (regno))
3064 	    {
3065 	      group = 1;
3066 	      first_dreg_to_save = lastdreg = regno - REG_R0;
3067 	    }
3068 	  else if (regno >= REG_P0 && regno <= REG_P7)
3069 	    {
3070 	      group = 2;
3071 	      first_preg_to_save = lastpreg = regno - REG_P0;
3072 	    }
3073 	  else
3074 	    return 0;
3075 
3076 	  continue;
3077 	}
3078 
3079       if (group == 1)
3080 	{
3081 	  if (regno >= REG_P0 && regno <= REG_P7)
3082 	    {
3083 	      group = 2;
3084 	      first_preg_to_save = lastpreg = regno - REG_P0;
3085 	    }
3086 	  else if (regno != REG_R0 + lastdreg + 1)
3087 	    return 0;
3088 	  else
3089 	    lastdreg++;
3090 	}
3091       else if (group == 2)
3092 	{
3093 	  if (regno != REG_P0 + lastpreg + 1)
3094 	    return 0;
3095 	  lastpreg++;
3096 	}
3097     }
3098   n_regs_to_save = 8 - first_dreg_to_save + 6 - first_preg_to_save;
3099   return 1;
3100 }
3101 
3102 int
3103 analyze_pop_multiple_operation (rtx op)
3104 {
3105   int lastdreg = 8, lastpreg = 6;
3106   int i, group;
3107 
3108   for (i = 1, group = 0; i < XVECLEN (op, 0); i++)
3109     {
3110       rtx t = XVECEXP (op, 0, i);
3111       rtx src, dest;
3112       int regno;
3113 
3114       if (GET_CODE (t) != SET)
3115 	return 0;
3116 
3117       src = SET_SRC (t);
3118       dest = SET_DEST (t);
3119       if (GET_CODE (src) != MEM || ! REG_P (dest))
3120 	return 0;
3121       src = XEXP (src, 0);
3122 
3123       if (i == 1)
3124 	{
3125 	  if (! REG_P (src) || REGNO (src) != REG_SP)
3126 	    return 0;
3127 	}
3128       else if (GET_CODE (src) != PLUS
3129 	       || ! REG_P (XEXP (src, 0))
3130 	       || REGNO (XEXP (src, 0)) != REG_SP
3131 	       || GET_CODE (XEXP (src, 1)) != CONST_INT
3132 	       || INTVAL (XEXP (src, 1)) != (i - 1) * 4)
3133 	return 0;
3134 
3135       regno = REGNO (dest);
3136       if (group == 0)
3137 	{
3138 	  if (regno == REG_R7)
3139 	    {
3140 	      group = 1;
3141 	      lastdreg = 7;
3142 	    }
3143 	  else if (regno != REG_P0 + lastpreg - 1)
3144 	    return 0;
3145 	  else
3146 	    lastpreg--;
3147 	}
3148       else if (group == 1)
3149 	{
3150 	  if (regno != REG_R0 + lastdreg - 1)
3151 	    return 0;
3152 	  else
3153 	    lastdreg--;
3154 	}
3155     }
3156   first_dreg_to_save = lastdreg;
3157   first_preg_to_save = lastpreg;
3158   n_regs_to_save = 8 - first_dreg_to_save + 6 - first_preg_to_save;
3159   return 1;
3160 }
3161 
3162 /* Emit assembly code for one multi-register push described by INSN, with
3163    operands in OPERANDS.  */
3164 
3165 void
3166 output_push_multiple (rtx insn, rtx *operands)
3167 {
3168   char buf[80];
3169   int ok;
3170 
3171   /* Validate the insn again, and compute first_[dp]reg_to_save. */
3172   ok = analyze_push_multiple_operation (PATTERN (insn));
3173   gcc_assert (ok);
3174 
3175   if (first_dreg_to_save == 8)
3176     sprintf (buf, "[--sp] = ( p5:%d );\n", first_preg_to_save);
3177   else if (first_preg_to_save == 6)
3178     sprintf (buf, "[--sp] = ( r7:%d );\n", first_dreg_to_save);
3179   else
3180     sprintf (buf, "[--sp] = ( r7:%d, p5:%d );\n",
3181 	     first_dreg_to_save, first_preg_to_save);
3182 
3183   output_asm_insn (buf, operands);
3184 }
3185 
3186 /* Emit assembly code for one multi-register pop described by INSN, with
3187    operands in OPERANDS.  */
3188 
3189 void
3190 output_pop_multiple (rtx insn, rtx *operands)
3191 {
3192   char buf[80];
3193   int ok;
3194 
3195   /* Validate the insn again, and compute first_[dp]reg_to_save. */
3196   ok = analyze_pop_multiple_operation (PATTERN (insn));
3197   gcc_assert (ok);
3198 
3199   if (first_dreg_to_save == 8)
3200     sprintf (buf, "( p5:%d ) = [sp++];\n", first_preg_to_save);
3201   else if (first_preg_to_save == 6)
3202     sprintf (buf, "( r7:%d ) = [sp++];\n", first_dreg_to_save);
3203   else
3204     sprintf (buf, "( r7:%d, p5:%d ) = [sp++];\n",
3205 	     first_dreg_to_save, first_preg_to_save);
3206 
3207   output_asm_insn (buf, operands);
3208 }
3209 
3210 /* Adjust DST and SRC by OFFSET bytes, and generate one move in mode MODE.  */
3211 
3212 static void
3213 single_move_for_movmem (rtx dst, rtx src, machine_mode mode, HOST_WIDE_INT offset)
3214 {
3215   rtx scratch = gen_reg_rtx (mode);
3216   rtx srcmem, dstmem;
3217 
3218   srcmem = adjust_address_nv (src, mode, offset);
3219   dstmem = adjust_address_nv (dst, mode, offset);
3220   emit_move_insn (scratch, srcmem);
3221   emit_move_insn (dstmem, scratch);
3222 }
3223 
3224 /* Expand a string move operation of COUNT_EXP bytes from SRC to DST, with
3225    alignment ALIGN_EXP.  Return true if successful, false if we should fall
3226    back on a different method.  */
3227 
3228 bool
3229 bfin_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp)
3230 {
3231   rtx srcreg, destreg, countreg;
3232   HOST_WIDE_INT align = 0;
3233   unsigned HOST_WIDE_INT count = 0;
3234 
3235   if (GET_CODE (align_exp) == CONST_INT)
3236     align = INTVAL (align_exp);
3237   if (GET_CODE (count_exp) == CONST_INT)
3238     {
3239       count = INTVAL (count_exp);
3240 #if 0
3241       if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
3242 	return false;
3243 #endif
3244     }
3245 
3246   /* If optimizing for size, only do single copies inline.  */
3247   if (optimize_size)
3248     {
3249       if (count == 2 && align < 2)
3250 	return false;
3251       if (count == 4 && align < 4)
3252 	return false;
3253       if (count != 1 && count != 2 && count != 4)
3254 	return false;
3255     }
3256   if (align < 2 && count != 1)
3257     return false;
3258 
3259   destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
3260   if (destreg != XEXP (dst, 0))
3261     dst = replace_equiv_address_nv (dst, destreg);
3262   srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
3263   if (srcreg != XEXP (src, 0))
3264     src = replace_equiv_address_nv (src, srcreg);
3265 
3266   if (count != 0 && align >= 2)
3267     {
3268       unsigned HOST_WIDE_INT offset = 0;
3269 
3270       if (align >= 4)
3271 	{
3272 	  if ((count & ~3) == 4)
3273 	    {
3274 	      single_move_for_movmem (dst, src, SImode, offset);
3275 	      offset = 4;
3276 	    }
3277 	  else if (count & ~3)
3278 	    {
3279 	      HOST_WIDE_INT new_count = ((count >> 2) & 0x3fffffff) - 1;
3280 	      countreg = copy_to_mode_reg (Pmode, GEN_INT (new_count));
3281 
3282 	      emit_insn (gen_rep_movsi (destreg, srcreg, countreg, destreg, srcreg));
3283 	      cfun->machine->has_loopreg_clobber = true;
3284 	    }
3285 	  if (count & 2)
3286 	    {
3287 	      single_move_for_movmem (dst, src, HImode, offset);
3288 	      offset += 2;
3289 	    }
3290 	}
3291       else
3292 	{
3293 	  if ((count & ~1) == 2)
3294 	    {
3295 	      single_move_for_movmem (dst, src, HImode, offset);
3296 	      offset = 2;
3297 	    }
3298 	  else if (count & ~1)
3299 	    {
3300 	      HOST_WIDE_INT new_count = ((count >> 1) & 0x7fffffff) - 1;
3301 	      countreg = copy_to_mode_reg (Pmode, GEN_INT (new_count));
3302 
3303 	      emit_insn (gen_rep_movhi (destreg, srcreg, countreg, destreg, srcreg));
3304 	      cfun->machine->has_loopreg_clobber = true;
3305 	    }
3306 	}
3307       if (count & 1)
3308 	{
3309 	  single_move_for_movmem (dst, src, QImode, offset);
3310 	}
3311       return true;
3312     }
3313   return false;
3314 }
3315 
3316 /* Compute the alignment for a local variable.
3317    TYPE is the data type, and ALIGN is the alignment that
3318    the object would ordinarily have.  The value of this macro is used
3319    instead of that alignment to align the object.  */
3320 
3321 unsigned
3322 bfin_local_alignment (tree type, unsigned align)
3323 {
3324   /* Increasing alignment for (relatively) big types allows the builtin
3325      memcpy can use 32 bit loads/stores.  */
3326   if (TYPE_SIZE (type)
3327       && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
3328       && wi::gtu_p (TYPE_SIZE (type), 8)
3329       && align < 32)
3330     return 32;
3331   return align;
3332 }
3333 
3334 /* Implement TARGET_SCHED_ISSUE_RATE.  */
3335 
3336 static int
3337 bfin_issue_rate (void)
3338 {
3339   return 3;
3340 }
3341 
3342 static int
3343 bfin_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn, int cost)
3344 {
3345   enum attr_type dep_insn_type;
3346   int dep_insn_code_number;
3347 
3348   /* Anti and output dependencies have zero cost.  */
3349   if (REG_NOTE_KIND (link) != 0)
3350     return 0;
3351 
3352   dep_insn_code_number = recog_memoized (dep_insn);
3353 
3354   /* If we can't recognize the insns, we can't really do anything.  */
3355   if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
3356     return cost;
3357 
3358   dep_insn_type = get_attr_type (dep_insn);
3359 
3360   if (dep_insn_type == TYPE_MOVE || dep_insn_type == TYPE_MCLD)
3361     {
3362       rtx pat = PATTERN (dep_insn);
3363       rtx dest, src;
3364 
3365       if (GET_CODE (pat) == PARALLEL)
3366 	pat = XVECEXP (pat, 0, 0);
3367       dest = SET_DEST (pat);
3368       src = SET_SRC (pat);
3369       if (! ADDRESS_REGNO_P (REGNO (dest))
3370 	  || ! (MEM_P (src) || D_REGNO_P (REGNO (src))))
3371 	return cost;
3372       return cost + (dep_insn_type == TYPE_MOVE ? 4 : 3);
3373     }
3374 
3375   return cost;
3376 }
3377 
3378 /* This function acts like NEXT_INSN, but is aware of three-insn bundles and
3379    skips all subsequent parallel instructions if INSN is the start of such
3380    a group.  */
3381 static rtx_insn *
3382 find_next_insn_start (rtx_insn *insn)
3383 {
3384   if (GET_MODE (insn) == SImode)
3385     {
3386       while (GET_MODE (insn) != QImode)
3387 	insn = NEXT_INSN (insn);
3388     }
3389   return NEXT_INSN (insn);
3390 }
3391 
3392 /* This function acts like PREV_INSN, but is aware of three-insn bundles and
3393    skips all subsequent parallel instructions if INSN is the start of such
3394    a group.  */
3395 static rtx_insn *
3396 find_prev_insn_start (rtx_insn *insn)
3397 {
3398   insn = PREV_INSN (insn);
3399   gcc_assert (GET_MODE (insn) != SImode);
3400   if (GET_MODE (insn) == QImode)
3401     {
3402       while (GET_MODE (PREV_INSN (insn)) == SImode)
3403 	insn = PREV_INSN (insn);
3404     }
3405   return insn;
3406 }
3407 
3408 /* Implement TARGET_CAN_USE_DOLOOP_P.  */
3409 
3410 static bool
3411 bfin_can_use_doloop_p (const widest_int &, const widest_int &iterations_max,
3412 		       unsigned int, bool)
3413 {
3414   /* Due to limitations in the hardware (an initial loop count of 0
3415      does not loop 2^32 times) we must avoid to generate a hardware
3416      loops when we cannot rule out this case.  */
3417   if (!flag_unsafe_loop_optimizations
3418       && wi::geu_p (iterations_max, 0xFFFFFFFF))
3419     return false;
3420   return true;
3421 }
3422 
3423 /* Increment the counter for the number of loop instructions in the
3424    current function.  */
3425 
3426 void
3427 bfin_hardware_loop (void)
3428 {
3429   cfun->machine->has_hardware_loops++;
3430 }
3431 
3432 /* Maximum loop nesting depth.  */
3433 #define MAX_LOOP_DEPTH 2
3434 
3435 /* Maximum size of a loop.  */
3436 #define MAX_LOOP_LENGTH 2042
3437 
3438 /* Maximum distance of the LSETUP instruction from the loop start.  */
3439 #define MAX_LSETUP_DISTANCE 30
3440 
3441 /* Estimate the length of INSN conservatively.  */
3442 
3443 static int
3444 length_for_loop (rtx_insn *insn)
3445 {
3446   int length = 0;
3447   if (JUMP_P (insn) && any_condjump_p (insn) && !optimize_size)
3448     {
3449       if (ENABLE_WA_SPECULATIVE_SYNCS)
3450 	length = 8;
3451       else if (ENABLE_WA_SPECULATIVE_LOADS)
3452 	length = 6;
3453     }
3454   else if (LABEL_P (insn))
3455     {
3456       if (ENABLE_WA_SPECULATIVE_SYNCS)
3457 	length = 4;
3458     }
3459 
3460   if (NONDEBUG_INSN_P (insn))
3461     length += get_attr_length (insn);
3462 
3463   return length;
3464 }
3465 
3466 /* Optimize LOOP.  */
3467 
3468 static bool
3469 hwloop_optimize (hwloop_info loop)
3470 {
3471   basic_block bb;
3472   rtx_insn *insn, *last_insn;
3473   rtx loop_init, start_label, end_label;
3474   rtx iter_reg, scratchreg, scratch_init, scratch_init_insn;
3475   rtx lc_reg, lt_reg, lb_reg;
3476   rtx seq_end;
3477   rtx_insn *seq;
3478   int length;
3479   bool clobber0, clobber1;
3480 
3481   if (loop->depth > MAX_LOOP_DEPTH)
3482     {
3483       if (dump_file)
3484 	fprintf (dump_file, ";; loop %d too deep\n", loop->loop_no);
3485       return false;
3486     }
3487 
3488   /* Get the loop iteration register.  */
3489   iter_reg = loop->iter_reg;
3490 
3491   gcc_assert (REG_P (iter_reg));
3492 
3493   scratchreg = NULL_RTX;
3494   scratch_init = iter_reg;
3495   scratch_init_insn = NULL_RTX;
3496   if (!PREG_P (iter_reg) && loop->incoming_src)
3497     {
3498       basic_block bb_in = loop->incoming_src;
3499       int i;
3500       for (i = REG_P0; i <= REG_P5; i++)
3501 	if ((df_regs_ever_live_p (i)
3502 	     || (funkind (TREE_TYPE (current_function_decl)) == SUBROUTINE
3503 		 && call_used_regs[i]))
3504 	    && !REGNO_REG_SET_P (df_get_live_out (bb_in), i))
3505 	  {
3506 	    scratchreg = gen_rtx_REG (SImode, i);
3507 	    break;
3508 	  }
3509       for (insn = BB_END (bb_in); insn != BB_HEAD (bb_in);
3510 	   insn = PREV_INSN (insn))
3511 	{
3512 	  rtx set;
3513 	  if (NOTE_P (insn) || BARRIER_P (insn))
3514 	    continue;
3515 	  set = single_set (insn);
3516 	  if (set && rtx_equal_p (SET_DEST (set), iter_reg))
3517 	    {
3518 	      if (CONSTANT_P (SET_SRC (set)))
3519 		{
3520 		  scratch_init = SET_SRC (set);
3521 		  scratch_init_insn = insn;
3522 		}
3523 	      break;
3524 	    }
3525 	  else if (reg_mentioned_p (iter_reg, PATTERN (insn)))
3526 	    break;
3527 	}
3528     }
3529 
3530   if (loop->incoming_src)
3531     {
3532       /* Make sure the predecessor is before the loop start label, as required by
3533 	 the LSETUP instruction.  */
3534       length = 0;
3535       insn = BB_END (loop->incoming_src);
3536       /* If we have to insert the LSETUP before a jump, count that jump in the
3537 	 length.  */
3538       if (vec_safe_length (loop->incoming) > 1
3539 	  || !(loop->incoming->last ()->flags & EDGE_FALLTHRU))
3540 	{
3541 	  gcc_assert (JUMP_P (insn));
3542 	  insn = PREV_INSN (insn);
3543 	}
3544 
3545       for (; insn && insn != loop->start_label; insn = NEXT_INSN (insn))
3546 	length += length_for_loop (insn);
3547 
3548       if (!insn)
3549 	{
3550 	  if (dump_file)
3551 	    fprintf (dump_file, ";; loop %d lsetup not before loop_start\n",
3552 		     loop->loop_no);
3553 	  return false;
3554 	}
3555 
3556       /* Account for the pop of a scratch register where necessary.  */
3557       if (!PREG_P (iter_reg) && scratchreg == NULL_RTX
3558 	  && ENABLE_WA_LOAD_LCREGS)
3559 	length += 2;
3560 
3561       if (length > MAX_LSETUP_DISTANCE)
3562 	{
3563 	  if (dump_file)
3564 	    fprintf (dump_file, ";; loop %d lsetup too far away\n", loop->loop_no);
3565 	  return false;
3566 	}
3567     }
3568 
3569   /* Check if start_label appears before loop_end and calculate the
3570      offset between them.  We calculate the length of instructions
3571      conservatively.  */
3572   length = 0;
3573   for (insn = loop->start_label;
3574        insn && insn != loop->loop_end;
3575        insn = NEXT_INSN (insn))
3576     length += length_for_loop (insn);
3577 
3578   if (!insn)
3579     {
3580       if (dump_file)
3581 	fprintf (dump_file, ";; loop %d start_label not before loop_end\n",
3582 		 loop->loop_no);
3583       return false;
3584     }
3585 
3586   loop->length = length;
3587   if (loop->length > MAX_LOOP_LENGTH)
3588     {
3589       if (dump_file)
3590 	fprintf (dump_file, ";; loop %d too long\n", loop->loop_no);
3591       return false;
3592     }
3593 
3594   /* Scan all the blocks to make sure they don't use iter_reg.  */
3595   if (loop->iter_reg_used || loop->iter_reg_used_outside)
3596     {
3597       if (dump_file)
3598 	fprintf (dump_file, ";; loop %d uses iterator\n", loop->loop_no);
3599       return false;
3600     }
3601 
3602   clobber0 = (TEST_HARD_REG_BIT (loop->regs_set_in_loop, REG_LC0)
3603 	      || TEST_HARD_REG_BIT (loop->regs_set_in_loop, REG_LB0)
3604 	      || TEST_HARD_REG_BIT (loop->regs_set_in_loop, REG_LT0));
3605   clobber1 = (TEST_HARD_REG_BIT (loop->regs_set_in_loop, REG_LC1)
3606 	      || TEST_HARD_REG_BIT (loop->regs_set_in_loop, REG_LB1)
3607 	      || TEST_HARD_REG_BIT (loop->regs_set_in_loop, REG_LT1));
3608   if (clobber0 && clobber1)
3609     {
3610       if (dump_file)
3611 	fprintf (dump_file, ";; loop %d no loop reg available\n",
3612 		 loop->loop_no);
3613       return false;
3614     }
3615 
3616   /* There should be an instruction before the loop_end instruction
3617      in the same basic block. And the instruction must not be
3618      - JUMP
3619      - CONDITIONAL BRANCH
3620      - CALL
3621      - CSYNC
3622      - SSYNC
3623      - Returns (RTS, RTN, etc.)  */
3624 
3625   bb = loop->tail;
3626   last_insn = find_prev_insn_start (loop->loop_end);
3627 
3628   while (1)
3629     {
3630       for (; last_insn != BB_HEAD (bb);
3631 	   last_insn = find_prev_insn_start (last_insn))
3632 	if (NONDEBUG_INSN_P (last_insn))
3633 	  break;
3634 
3635       if (last_insn != BB_HEAD (bb))
3636 	break;
3637 
3638       if (single_pred_p (bb)
3639 	  && single_pred_edge (bb)->flags & EDGE_FALLTHRU
3640 	  && single_pred (bb) != ENTRY_BLOCK_PTR_FOR_FN (cfun))
3641 	{
3642 	  bb = single_pred (bb);
3643 	  last_insn = BB_END (bb);
3644 	  continue;
3645 	}
3646       else
3647 	{
3648 	  last_insn = NULL;
3649 	  break;
3650 	}
3651     }
3652 
3653   if (!last_insn)
3654     {
3655       if (dump_file)
3656 	fprintf (dump_file, ";; loop %d has no last instruction\n",
3657 		 loop->loop_no);
3658       return false;
3659     }
3660 
3661   if (JUMP_P (last_insn) && !any_condjump_p (last_insn))
3662     {
3663       if (dump_file)
3664 	fprintf (dump_file, ";; loop %d has bad last instruction\n",
3665 		 loop->loop_no);
3666       return false;
3667     }
3668   /* In all other cases, try to replace a bad last insn with a nop.  */
3669   else if (JUMP_P (last_insn)
3670 	   || CALL_P (last_insn)
3671 	   || get_attr_type (last_insn) == TYPE_SYNC
3672 	   || get_attr_type (last_insn) == TYPE_CALL
3673 	   || get_attr_seq_insns (last_insn) == SEQ_INSNS_MULTI
3674 	   || recog_memoized (last_insn) == CODE_FOR_return_internal
3675 	   || GET_CODE (PATTERN (last_insn)) == ASM_INPUT
3676 	   || asm_noperands (PATTERN (last_insn)) >= 0)
3677     {
3678       if (loop->length + 2 > MAX_LOOP_LENGTH)
3679 	{
3680 	  if (dump_file)
3681 	    fprintf (dump_file, ";; loop %d too long\n", loop->loop_no);
3682 	  return false;
3683 	}
3684       if (dump_file)
3685 	fprintf (dump_file, ";; loop %d has bad last insn; replace with nop\n",
3686 		 loop->loop_no);
3687 
3688       last_insn = emit_insn_after (gen_forced_nop (), last_insn);
3689     }
3690 
3691   loop->last_insn = last_insn;
3692 
3693   /* The loop is good for replacement.  */
3694   start_label = loop->start_label;
3695   end_label = gen_label_rtx ();
3696   iter_reg = loop->iter_reg;
3697 
3698   if (loop->depth == 1 && !clobber1)
3699     {
3700       lc_reg = gen_rtx_REG (SImode, REG_LC1);
3701       lb_reg = gen_rtx_REG (SImode, REG_LB1);
3702       lt_reg = gen_rtx_REG (SImode, REG_LT1);
3703       SET_HARD_REG_BIT (loop->regs_set_in_loop, REG_LC1);
3704     }
3705   else
3706     {
3707       lc_reg = gen_rtx_REG (SImode, REG_LC0);
3708       lb_reg = gen_rtx_REG (SImode, REG_LB0);
3709       lt_reg = gen_rtx_REG (SImode, REG_LT0);
3710       SET_HARD_REG_BIT (loop->regs_set_in_loop, REG_LC0);
3711     }
3712 
3713   loop->end_label = end_label;
3714 
3715   /* Create a sequence containing the loop setup.  */
3716   start_sequence ();
3717 
3718   /* LSETUP only accepts P registers.  If we have one, we can use it,
3719      otherwise there are several ways of working around the problem.
3720      If we're not affected by anomaly 312, we can load the LC register
3721      from any iteration register, and use LSETUP without initialization.
3722      If we've found a P scratch register that's not live here, we can
3723      instead copy the iter_reg into that and use an initializing LSETUP.
3724      If all else fails, push and pop P0 and use it as a scratch.  */
3725   if (P_REGNO_P (REGNO (iter_reg)))
3726     {
3727       loop_init = gen_lsetup_with_autoinit (lt_reg, start_label,
3728 					    lb_reg, end_label,
3729 					    lc_reg, iter_reg);
3730       seq_end = emit_insn (loop_init);
3731     }
3732   else if (!ENABLE_WA_LOAD_LCREGS && DPREG_P (iter_reg))
3733     {
3734       emit_insn (gen_movsi (lc_reg, iter_reg));
3735       loop_init = gen_lsetup_without_autoinit (lt_reg, start_label,
3736 					       lb_reg, end_label,
3737 					       lc_reg);
3738       seq_end = emit_insn (loop_init);
3739     }
3740   else if (scratchreg != NULL_RTX)
3741     {
3742       emit_insn (gen_movsi (scratchreg, scratch_init));
3743       loop_init = gen_lsetup_with_autoinit (lt_reg, start_label,
3744 					    lb_reg, end_label,
3745 					    lc_reg, scratchreg);
3746       seq_end = emit_insn (loop_init);
3747       if (scratch_init_insn != NULL_RTX)
3748 	delete_insn (scratch_init_insn);
3749     }
3750   else
3751     {
3752       rtx p0reg = gen_rtx_REG (SImode, REG_P0);
3753       rtx push = gen_frame_mem (SImode,
3754 				gen_rtx_PRE_DEC (SImode, stack_pointer_rtx));
3755       rtx pop = gen_frame_mem (SImode,
3756 			       gen_rtx_POST_INC (SImode, stack_pointer_rtx));
3757       emit_insn (gen_movsi (push, p0reg));
3758       emit_insn (gen_movsi (p0reg, scratch_init));
3759       loop_init = gen_lsetup_with_autoinit (lt_reg, start_label,
3760 					    lb_reg, end_label,
3761 					    lc_reg, p0reg);
3762       emit_insn (loop_init);
3763       seq_end = emit_insn (gen_movsi (p0reg, pop));
3764       if (scratch_init_insn != NULL_RTX)
3765 	delete_insn (scratch_init_insn);
3766     }
3767 
3768   if (dump_file)
3769     {
3770       fprintf (dump_file, ";; replacing loop %d initializer with\n",
3771 	       loop->loop_no);
3772       print_rtl_single (dump_file, loop_init);
3773       fprintf (dump_file, ";; replacing loop %d terminator with\n",
3774 	       loop->loop_no);
3775       print_rtl_single (dump_file, loop->loop_end);
3776     }
3777 
3778   /* If the loop isn't entered at the top, also create a jump to the entry
3779      point.  */
3780   if (!loop->incoming_src && loop->head != loop->incoming_dest)
3781     {
3782       rtx label = BB_HEAD (loop->incoming_dest);
3783       /* If we're jumping to the final basic block in the loop, and there's
3784 	 only one cheap instruction before the end (typically an increment of
3785 	 an induction variable), we can just emit a copy here instead of a
3786 	 jump.  */
3787       if (loop->incoming_dest == loop->tail
3788 	  && next_real_insn (label) == last_insn
3789 	  && asm_noperands (last_insn) < 0
3790 	  && GET_CODE (PATTERN (last_insn)) == SET)
3791 	{
3792 	  seq_end = emit_insn (copy_rtx (PATTERN (last_insn)));
3793 	}
3794       else
3795 	{
3796 	  emit_jump_insn (gen_jump (label));
3797 	  seq_end = emit_barrier ();
3798 	}
3799     }
3800 
3801   seq = get_insns ();
3802   end_sequence ();
3803 
3804   if (loop->incoming_src)
3805     {
3806       rtx_insn *prev = BB_END (loop->incoming_src);
3807       if (vec_safe_length (loop->incoming) > 1
3808 	  || !(loop->incoming->last ()->flags & EDGE_FALLTHRU))
3809 	{
3810 	  gcc_assert (JUMP_P (prev));
3811 	  prev = PREV_INSN (prev);
3812 	}
3813       emit_insn_after (seq, prev);
3814     }
3815   else
3816     {
3817       basic_block new_bb;
3818       edge e;
3819       edge_iterator ei;
3820 
3821 #ifdef ENABLE_CHECKING
3822       if (loop->head != loop->incoming_dest)
3823 	{
3824 	  /* We aren't entering the loop at the top.  Since we've established
3825 	     that the loop is entered only at one point, this means there
3826 	     can't be fallthru edges into the head.  Any such fallthru edges
3827 	     would become invalid when we insert the new block, so verify
3828 	     that this does not in fact happen.  */
3829 	  FOR_EACH_EDGE (e, ei, loop->head->preds)
3830 	    gcc_assert (!(e->flags & EDGE_FALLTHRU));
3831 	}
3832 #endif
3833 
3834       emit_insn_before (seq, BB_HEAD (loop->head));
3835       seq = emit_label_before (gen_label_rtx (), seq);
3836 
3837       new_bb = create_basic_block (seq, seq_end, loop->head->prev_bb);
3838       FOR_EACH_EDGE (e, ei, loop->incoming)
3839 	{
3840 	  if (!(e->flags & EDGE_FALLTHRU)
3841 	      || e->dest != loop->head)
3842 	    redirect_edge_and_branch_force (e, new_bb);
3843 	  else
3844 	    redirect_edge_succ (e, new_bb);
3845 	}
3846       e = make_edge (new_bb, loop->head, 0);
3847     }
3848 
3849   delete_insn (loop->loop_end);
3850   /* Insert the loop end label before the last instruction of the loop.  */
3851   emit_label_before (loop->end_label, loop->last_insn);
3852 
3853   return true;
3854 }
3855 
3856 /* A callback for the hw-doloop pass.  Called when a loop we have discovered
3857    turns out not to be optimizable; we have to split the doloop_end pattern
3858    into a subtract and a test.  */
3859 static void
3860 hwloop_fail (hwloop_info loop)
3861 {
3862   rtx insn = loop->loop_end;
3863 
3864   if (DPREG_P (loop->iter_reg))
3865     {
3866       /* If loop->iter_reg is a DREG or PREG, we can split it here
3867 	 without scratch register.  */
3868       rtx insn, test;
3869 
3870       emit_insn_before (gen_addsi3 (loop->iter_reg,
3871 				    loop->iter_reg,
3872 				    constm1_rtx),
3873 			loop->loop_end);
3874 
3875       test = gen_rtx_NE (VOIDmode, loop->iter_reg, const0_rtx);
3876       insn = emit_jump_insn_before (gen_cbranchsi4 (test,
3877 						    loop->iter_reg, const0_rtx,
3878 						    loop->start_label),
3879 				    loop->loop_end);
3880 
3881       JUMP_LABEL (insn) = loop->start_label;
3882       LABEL_NUSES (loop->start_label)++;
3883       delete_insn (loop->loop_end);
3884     }
3885   else
3886     {
3887       splitting_loops = 1;
3888       try_split (PATTERN (insn), insn, 1);
3889       splitting_loops = 0;
3890     }
3891 }
3892 
3893 /* A callback for the hw-doloop pass.  This function examines INSN; if
3894    it is a loop_end pattern we recognize, return the reg rtx for the
3895    loop counter.  Otherwise, return NULL_RTX.  */
3896 
3897 static rtx
3898 hwloop_pattern_reg (rtx_insn *insn)
3899 {
3900   rtx reg;
3901 
3902   if (!JUMP_P (insn) || recog_memoized (insn) != CODE_FOR_loop_end)
3903     return NULL_RTX;
3904 
3905   reg = SET_DEST (XVECEXP (PATTERN (insn), 0, 1));
3906   if (!REG_P (reg))
3907     return NULL_RTX;
3908   return reg;
3909 }
3910 
3911 static struct hw_doloop_hooks bfin_doloop_hooks =
3912 {
3913   hwloop_pattern_reg,
3914   hwloop_optimize,
3915   hwloop_fail
3916 };
3917 
3918 /* Run from machine_dependent_reorg, this pass looks for doloop_end insns
3919    and tries to rewrite the RTL of these loops so that proper Blackfin
3920    hardware loops are generated.  */
3921 
3922 static void
3923 bfin_reorg_loops (void)
3924 {
3925   reorg_loops (true, &bfin_doloop_hooks);
3926 }
3927 
3928 /* Possibly generate a SEQUENCE out of three insns found in SLOT.
3929    Returns true if we modified the insn chain, false otherwise.  */
3930 static bool
3931 gen_one_bundle (rtx_insn *slot[3])
3932 {
3933   gcc_assert (slot[1] != NULL_RTX);
3934 
3935   /* Don't add extra NOPs if optimizing for size.  */
3936   if (optimize_size
3937       && (slot[0] == NULL_RTX || slot[2] == NULL_RTX))
3938     return false;
3939 
3940   /* Verify that we really can do the multi-issue.  */
3941   if (slot[0])
3942     {
3943       rtx_insn *t = NEXT_INSN (slot[0]);
3944       while (t != slot[1])
3945 	{
3946 	  if (! NOTE_P (t) || NOTE_KIND (t) != NOTE_INSN_DELETED)
3947 	    return false;
3948 	  t = NEXT_INSN (t);
3949 	}
3950     }
3951   if (slot[2])
3952     {
3953       rtx_insn *t = NEXT_INSN (slot[1]);
3954       while (t != slot[2])
3955 	{
3956 	  if (! NOTE_P (t) || NOTE_KIND (t) != NOTE_INSN_DELETED)
3957 	    return false;
3958 	  t = NEXT_INSN (t);
3959 	}
3960     }
3961 
3962   if (slot[0] == NULL_RTX)
3963     {
3964       slot[0] = emit_insn_before (gen_mnop (), slot[1]);
3965       df_insn_rescan (slot[0]);
3966     }
3967   if (slot[2] == NULL_RTX)
3968     {
3969       slot[2] = emit_insn_after (gen_forced_nop (), slot[1]);
3970       df_insn_rescan (slot[2]);
3971     }
3972 
3973   /* Avoid line number information being printed inside one bundle.  */
3974   if (INSN_LOCATION (slot[1])
3975       && INSN_LOCATION (slot[1]) != INSN_LOCATION (slot[0]))
3976     INSN_LOCATION (slot[1]) = INSN_LOCATION (slot[0]);
3977   if (INSN_LOCATION (slot[2])
3978       && INSN_LOCATION (slot[2]) != INSN_LOCATION (slot[0]))
3979     INSN_LOCATION (slot[2]) = INSN_LOCATION (slot[0]);
3980 
3981   /* Terminate them with "|| " instead of ";" in the output.  */
3982   PUT_MODE (slot[0], SImode);
3983   PUT_MODE (slot[1], SImode);
3984   /* Terminate the bundle, for the benefit of reorder_var_tracking_notes.  */
3985   PUT_MODE (slot[2], QImode);
3986   return true;
3987 }
3988 
3989 /* Go through all insns, and use the information generated during scheduling
3990    to generate SEQUENCEs to represent bundles of instructions issued
3991    simultaneously.  */
3992 
3993 static void
3994 bfin_gen_bundles (void)
3995 {
3996   basic_block bb;
3997   FOR_EACH_BB_FN (bb, cfun)
3998     {
3999       rtx_insn *insn, *next;
4000       rtx_insn *slot[3];
4001       int n_filled = 0;
4002 
4003       slot[0] = slot[1] = slot[2] = NULL;
4004       for (insn = BB_HEAD (bb);; insn = next)
4005 	{
4006 	  int at_end;
4007 	  rtx delete_this = NULL_RTX;
4008 
4009 	  if (NONDEBUG_INSN_P (insn))
4010 	    {
4011 	      enum attr_type type = get_attr_type (insn);
4012 
4013 	      if (type == TYPE_STALL)
4014 		{
4015 		  gcc_assert (n_filled == 0);
4016 		  delete_this = insn;
4017 		}
4018 	      else
4019 		{
4020 		  if (type == TYPE_DSP32 || type == TYPE_DSP32SHIFTIMM)
4021 		    slot[0] = insn;
4022 		  else if (slot[1] == NULL_RTX)
4023 		    slot[1] = insn;
4024 		  else
4025 		    slot[2] = insn;
4026 		  n_filled++;
4027 		}
4028 	    }
4029 
4030 	  next = NEXT_INSN (insn);
4031 	  while (next && insn != BB_END (bb)
4032 		 && !(INSN_P (next)
4033 		      && GET_CODE (PATTERN (next)) != USE
4034 		      && GET_CODE (PATTERN (next)) != CLOBBER))
4035 	    {
4036 	      insn = next;
4037 	      next = NEXT_INSN (insn);
4038 	    }
4039 
4040 	  /* BB_END can change due to emitting extra NOPs, so check here.  */
4041 	  at_end = insn == BB_END (bb);
4042 	  if (delete_this == NULL_RTX && (at_end || GET_MODE (next) == TImode))
4043 	    {
4044 	      if ((n_filled < 2
4045 		   || !gen_one_bundle (slot))
4046 		  && slot[0] != NULL_RTX)
4047 		{
4048 		  rtx pat = PATTERN (slot[0]);
4049 		  if (GET_CODE (pat) == SET
4050 		      && GET_CODE (SET_SRC (pat)) == UNSPEC
4051 		      && XINT (SET_SRC (pat), 1) == UNSPEC_32BIT)
4052 		    {
4053 		      SET_SRC (pat) = XVECEXP (SET_SRC (pat), 0, 0);
4054 		      INSN_CODE (slot[0]) = -1;
4055 		      df_insn_rescan (slot[0]);
4056 		    }
4057 		}
4058 	      n_filled = 0;
4059 	      slot[0] = slot[1] = slot[2] = NULL;
4060 	    }
4061 	  if (delete_this != NULL_RTX)
4062 	    delete_insn (delete_this);
4063 	  if (at_end)
4064 	    break;
4065 	}
4066     }
4067 }
4068 
4069 /* Ensure that no var tracking notes are emitted in the middle of a
4070    three-instruction bundle.  */
4071 
4072 static void
4073 reorder_var_tracking_notes (void)
4074 {
4075   basic_block bb;
4076   FOR_EACH_BB_FN (bb, cfun)
4077     {
4078       rtx_insn *insn, *next;
4079       rtx_insn *queue = NULL;
4080       bool in_bundle = false;
4081 
4082       for (insn = BB_HEAD (bb); insn != BB_END (bb); insn = next)
4083 	{
4084 	  next = NEXT_INSN (insn);
4085 
4086 	  if (INSN_P (insn))
4087 	    {
4088 	      /* Emit queued up notes at the last instruction of a bundle.  */
4089 	      if (GET_MODE (insn) == QImode)
4090 		{
4091 		  while (queue)
4092 		    {
4093 		      rtx_insn *next_queue = PREV_INSN (queue);
4094 		      SET_PREV_INSN (NEXT_INSN (insn)) = queue;
4095 		      SET_NEXT_INSN (queue) = NEXT_INSN (insn);
4096 		      SET_NEXT_INSN (insn) = queue;
4097 		      SET_PREV_INSN (queue) = insn;
4098 		      queue = next_queue;
4099 		    }
4100 		  in_bundle = false;
4101 		}
4102 	      else if (GET_MODE (insn) == SImode)
4103 		in_bundle = true;
4104 	    }
4105 	  else if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_VAR_LOCATION)
4106 	    {
4107 	      if (in_bundle)
4108 		{
4109 		  rtx_insn *prev = PREV_INSN (insn);
4110 		  SET_PREV_INSN (next) = prev;
4111 		  SET_NEXT_INSN (prev) = next;
4112 
4113 		  SET_PREV_INSN (insn) = queue;
4114 		  queue = insn;
4115 		}
4116 	    }
4117 	}
4118     }
4119 }
4120 
4121 /* On some silicon revisions, functions shorter than a certain number of cycles
4122    can cause unpredictable behaviour.  Work around this by adding NOPs as
4123    needed.  */
4124 static void
4125 workaround_rts_anomaly (void)
4126 {
4127   rtx_insn *insn, *first_insn = NULL;
4128   int cycles = 4;
4129 
4130   if (! ENABLE_WA_RETS)
4131     return;
4132 
4133   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
4134     {
4135       rtx pat;
4136 
4137       if (BARRIER_P (insn))
4138 	return;
4139 
4140       if (NOTE_P (insn) || LABEL_P (insn))
4141 	continue;
4142 
4143       if (JUMP_TABLE_DATA_P (insn))
4144 	continue;
4145 
4146       if (first_insn == NULL_RTX)
4147 	first_insn = insn;
4148       pat = PATTERN (insn);
4149       if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER
4150 	  || GET_CODE (pat) == ASM_INPUT
4151 	  || asm_noperands (pat) >= 0)
4152 	continue;
4153 
4154       if (CALL_P (insn))
4155 	return;
4156 
4157       if (JUMP_P (insn))
4158 	{
4159 	  if (recog_memoized (insn) == CODE_FOR_return_internal)
4160 	    break;
4161 
4162 	  /* Nothing to worry about for direct jumps.  */
4163 	  if (!any_condjump_p (insn))
4164 	    return;
4165 	  if (cycles <= 1)
4166 	    return;
4167 	  cycles--;
4168 	}
4169       else if (INSN_P (insn))
4170 	{
4171 	  rtx pat = PATTERN (insn);
4172 	  int this_cycles = 1;
4173 
4174 	  if (GET_CODE (pat) == PARALLEL)
4175 	    {
4176 	      if (analyze_push_multiple_operation (pat)
4177 		  || analyze_pop_multiple_operation (pat))
4178 		this_cycles = n_regs_to_save;
4179 	    }
4180 	  else
4181 	    {
4182 	      int icode = recog_memoized (insn);
4183 
4184 	      if (icode == CODE_FOR_link)
4185 		this_cycles = 4;
4186 	      else if (icode == CODE_FOR_unlink)
4187 		this_cycles = 3;
4188 	      else if (icode == CODE_FOR_mulsi3)
4189 		this_cycles = 5;
4190 	    }
4191 	  if (this_cycles >= cycles)
4192 	    return;
4193 
4194 	  cycles -= this_cycles;
4195 	}
4196     }
4197   while (cycles > 0)
4198     {
4199       emit_insn_before (gen_nop (), first_insn);
4200       cycles--;
4201     }
4202 }
4203 
4204 /* Return an insn type for INSN that can be used by the caller for anomaly
4205    workarounds.  This differs from plain get_attr_type in that it handles
4206    SEQUENCEs.  */
4207 
4208 static enum attr_type
4209 type_for_anomaly (rtx_insn *insn)
4210 {
4211   rtx pat = PATTERN (insn);
4212   if (rtx_sequence *seq = dyn_cast <rtx_sequence *> (pat))
4213     {
4214       enum attr_type t;
4215       t = get_attr_type (seq->insn (1));
4216       if (t == TYPE_MCLD)
4217 	return t;
4218       t = get_attr_type (seq->insn (2));
4219       if (t == TYPE_MCLD)
4220 	return t;
4221       return TYPE_MCST;
4222     }
4223   else
4224     return get_attr_type (insn);
4225 }
4226 
4227 /* Return true iff the address found in MEM is based on the register
4228    NP_REG and optionally has a positive offset.  */
4229 static bool
4230 harmless_null_pointer_p (rtx mem, int np_reg)
4231 {
4232   mem = XEXP (mem, 0);
4233   if (GET_CODE (mem) == POST_INC || GET_CODE (mem) == POST_DEC)
4234     mem = XEXP (mem, 0);
4235   if (REG_P (mem) && (int) REGNO (mem) == np_reg)
4236     return true;
4237   if (GET_CODE (mem) == PLUS
4238       && REG_P (XEXP (mem, 0)) && (int) REGNO (XEXP (mem, 0)) == np_reg)
4239     {
4240       mem = XEXP (mem, 1);
4241       if (GET_CODE (mem) == CONST_INT && INTVAL (mem) > 0)
4242 	return true;
4243     }
4244   return false;
4245 }
4246 
4247 /* Return nonzero if INSN contains any loads that may trap.  */
4248 
4249 static bool
4250 trapping_loads_p (rtx_insn *insn, int np_reg, bool after_np_branch)
4251 {
4252   rtx mem = SET_SRC (single_set (insn));
4253 
4254   if (!after_np_branch)
4255     np_reg = -1;
4256   return ((np_reg == -1 || !harmless_null_pointer_p (mem, np_reg))
4257 	  && may_trap_p (mem));
4258 }
4259 
4260 /* Return INSN if it is of TYPE_MCLD.  Alternatively, if INSN is the start of
4261    a three-insn bundle, see if one of them is a load and return that if so.
4262    Return NULL if the insn does not contain loads.  */
4263 static rtx_insn *
4264 find_load (rtx_insn *insn)
4265 {
4266   if (!NONDEBUG_INSN_P (insn))
4267     return NULL;
4268   if (get_attr_type (insn) == TYPE_MCLD)
4269     return insn;
4270   if (GET_MODE (insn) != SImode)
4271     return NULL;
4272   do {
4273     insn = NEXT_INSN (insn);
4274     if ((GET_MODE (insn) == SImode || GET_MODE (insn) == QImode)
4275 	&& get_attr_type (insn) == TYPE_MCLD)
4276       return insn;
4277   } while (GET_MODE (insn) != QImode);
4278   return NULL;
4279 }
4280 
4281 /* Determine whether PAT is an indirect call pattern.  */
4282 static bool
4283 indirect_call_p (rtx pat)
4284 {
4285   if (GET_CODE (pat) == PARALLEL)
4286     pat = XVECEXP (pat, 0, 0);
4287   if (GET_CODE (pat) == SET)
4288     pat = SET_SRC (pat);
4289   gcc_assert (GET_CODE (pat) == CALL);
4290   pat = XEXP (pat, 0);
4291   gcc_assert (GET_CODE (pat) == MEM);
4292   pat = XEXP (pat, 0);
4293 
4294   return REG_P (pat);
4295 }
4296 
4297 /* During workaround_speculation, track whether we're in the shadow of a
4298    conditional branch that tests a P register for NULL.  If so, we can omit
4299    emitting NOPs if we see a load from that P register, since a speculative
4300    access at address 0 isn't a problem, and the load is executed in all other
4301    cases anyway.
4302    Global for communication with note_np_check_stores through note_stores.
4303    */
4304 int np_check_regno = -1;
4305 bool np_after_branch = false;
4306 
4307 /* Subroutine of workaround_speculation, called through note_stores.  */
4308 static void
4309 note_np_check_stores (rtx x, const_rtx pat ATTRIBUTE_UNUSED,
4310 		      void *data ATTRIBUTE_UNUSED)
4311 {
4312   if (REG_P (x) && (REGNO (x) == REG_CC || (int) REGNO (x) == np_check_regno))
4313     np_check_regno = -1;
4314 }
4315 
4316 static void
4317 workaround_speculation (void)
4318 {
4319   rtx_insn *insn, *next;
4320   rtx_insn *last_condjump = NULL;
4321   int cycles_since_jump = INT_MAX;
4322   int delay_added = 0;
4323 
4324   if (! ENABLE_WA_SPECULATIVE_LOADS && ! ENABLE_WA_SPECULATIVE_SYNCS
4325       && ! ENABLE_WA_INDIRECT_CALLS)
4326     return;
4327 
4328   /* First pass: find predicted-false branches; if something after them
4329      needs nops, insert them or change the branch to predict true.  */
4330   for (insn = get_insns (); insn; insn = next)
4331     {
4332       rtx pat;
4333       int delay_needed = 0;
4334 
4335       next = find_next_insn_start (insn);
4336 
4337       if (NOTE_P (insn) || BARRIER_P (insn))
4338 	continue;
4339       if (JUMP_TABLE_DATA_P (insn))
4340 	continue;
4341 
4342       if (LABEL_P (insn))
4343 	{
4344 	  np_check_regno = -1;
4345 	  continue;
4346 	}
4347 
4348       pat = PATTERN (insn);
4349       if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER)
4350 	continue;
4351 
4352       if (GET_CODE (pat) == ASM_INPUT || asm_noperands (pat) >= 0)
4353 	{
4354 	  np_check_regno = -1;
4355 	  continue;
4356 	}
4357 
4358       if (JUMP_P (insn))
4359 	{
4360 	  /* Is this a condjump based on a null pointer comparison we saw
4361 	     earlier?  */
4362 	  if (np_check_regno != -1
4363 	      && recog_memoized (insn) == CODE_FOR_cbranchbi4)
4364 	    {
4365 	      rtx op = XEXP (SET_SRC (PATTERN (insn)), 0);
4366 	      gcc_assert (GET_CODE (op) == EQ || GET_CODE (op) == NE);
4367 	      if (GET_CODE (op) == NE)
4368 		np_after_branch = true;
4369 	    }
4370 	  if (any_condjump_p (insn)
4371 	      && ! cbranch_predicted_taken_p (insn))
4372 	    {
4373 	      last_condjump = insn;
4374 	      delay_added = 0;
4375 	      cycles_since_jump = 0;
4376 	    }
4377 	  else
4378 	    cycles_since_jump = INT_MAX;
4379 	}
4380       else if (CALL_P (insn))
4381 	{
4382 	  np_check_regno = -1;
4383 	  if (cycles_since_jump < INT_MAX)
4384 	    cycles_since_jump++;
4385 	  if (indirect_call_p (pat) && ENABLE_WA_INDIRECT_CALLS)
4386 	    {
4387 	      delay_needed = 3;
4388 	    }
4389 	}
4390       else if (NONDEBUG_INSN_P (insn))
4391 	{
4392 	  rtx_insn *load_insn = find_load (insn);
4393 	  enum attr_type type = type_for_anomaly (insn);
4394 
4395 	  if (cycles_since_jump < INT_MAX)
4396 	    cycles_since_jump++;
4397 
4398 	  /* Detect a comparison of a P register with zero.  If we later
4399 	     see a condjump based on it, we have found a null pointer
4400 	     check.  */
4401 	  if (recog_memoized (insn) == CODE_FOR_compare_eq)
4402 	    {
4403 	      rtx src = SET_SRC (PATTERN (insn));
4404 	      if (REG_P (XEXP (src, 0))
4405 		  && P_REGNO_P (REGNO (XEXP (src, 0)))
4406 		  && XEXP (src, 1) == const0_rtx)
4407 		{
4408 		  np_check_regno = REGNO (XEXP (src, 0));
4409 		  np_after_branch = false;
4410 		}
4411 	      else
4412 		np_check_regno = -1;
4413 	    }
4414 
4415 	  if (load_insn && ENABLE_WA_SPECULATIVE_LOADS)
4416 	    {
4417 	      if (trapping_loads_p (load_insn, np_check_regno,
4418 				    np_after_branch))
4419 		delay_needed = 4;
4420 	    }
4421 	  else if (type == TYPE_SYNC && ENABLE_WA_SPECULATIVE_SYNCS)
4422 	    delay_needed = 3;
4423 
4424 	  /* See if we need to forget about a null pointer comparison
4425 	     we found earlier.  */
4426 	  if (recog_memoized (insn) != CODE_FOR_compare_eq)
4427 	    {
4428 	      note_stores (PATTERN (insn), note_np_check_stores, NULL);
4429 	      if (np_check_regno != -1)
4430 		{
4431 		  if (find_regno_note (insn, REG_INC, np_check_regno))
4432 		    np_check_regno = -1;
4433 		}
4434 	    }
4435 
4436 	}
4437 
4438       if (delay_needed > cycles_since_jump
4439 	  && (delay_needed - cycles_since_jump) > delay_added)
4440 	{
4441 	  rtx pat1;
4442 	  int num_clobbers;
4443 	  rtx *op = recog_data.operand;
4444 
4445 	  delay_needed -= cycles_since_jump;
4446 
4447 	  extract_insn (last_condjump);
4448 	  if (optimize_size)
4449 	    {
4450 	      pat1 = gen_cbranch_predicted_taken (op[0], op[1], op[2],
4451 						 op[3]);
4452 	      cycles_since_jump = INT_MAX;
4453 	    }
4454 	  else
4455 	    {
4456 	      /* Do not adjust cycles_since_jump in this case, so that
4457 		 we'll increase the number of NOPs for a subsequent insn
4458 		 if necessary.  */
4459 	      pat1 = gen_cbranch_with_nops (op[0], op[1], op[2], op[3],
4460 					    GEN_INT (delay_needed));
4461 	      delay_added = delay_needed;
4462 	    }
4463 	  PATTERN (last_condjump) = pat1;
4464 	  INSN_CODE (last_condjump) = recog (pat1, insn, &num_clobbers);
4465 	}
4466       if (CALL_P (insn))
4467 	{
4468 	  cycles_since_jump = INT_MAX;
4469 	  delay_added = 0;
4470 	}
4471     }
4472 
4473   /* Second pass: for predicted-true branches, see if anything at the
4474      branch destination needs extra nops.  */
4475   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
4476     {
4477       int cycles_since_jump;
4478       if (JUMP_P (insn)
4479 	  && any_condjump_p (insn)
4480 	  && (INSN_CODE (insn) == CODE_FOR_cbranch_predicted_taken
4481 	      || cbranch_predicted_taken_p (insn)))
4482 	{
4483 	  rtx_insn *target = JUMP_LABEL_AS_INSN (insn);
4484 	  rtx label = target;
4485 	  rtx_insn *next_tgt;
4486 
4487 	  cycles_since_jump = 0;
4488 	  for (; target && cycles_since_jump < 3; target = next_tgt)
4489 	    {
4490 	      rtx pat;
4491 
4492 	      next_tgt = find_next_insn_start (target);
4493 
4494 	      if (NOTE_P (target) || BARRIER_P (target) || LABEL_P (target))
4495 		continue;
4496 
4497 	      if (JUMP_TABLE_DATA_P (target))
4498 		continue;
4499 
4500 	      pat = PATTERN (target);
4501 	      if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER
4502 		  || GET_CODE (pat) == ASM_INPUT
4503 		  || asm_noperands (pat) >= 0)
4504 		continue;
4505 
4506 	      if (NONDEBUG_INSN_P (target))
4507 		{
4508 		  rtx_insn *load_insn = find_load (target);
4509 		  enum attr_type type = type_for_anomaly (target);
4510 		  int delay_needed = 0;
4511 		  if (cycles_since_jump < INT_MAX)
4512 		    cycles_since_jump++;
4513 
4514 		  if (load_insn && ENABLE_WA_SPECULATIVE_LOADS)
4515 		    {
4516 		      if (trapping_loads_p (load_insn, -1, false))
4517 			delay_needed = 2;
4518 		    }
4519 		  else if (type == TYPE_SYNC && ENABLE_WA_SPECULATIVE_SYNCS)
4520 		    delay_needed = 2;
4521 
4522 		  if (delay_needed > cycles_since_jump)
4523 		    {
4524 		      rtx prev = prev_real_insn (label);
4525 		      delay_needed -= cycles_since_jump;
4526 		      if (dump_file)
4527 			fprintf (dump_file, "Adding %d nops after %d\n",
4528 				 delay_needed, INSN_UID (label));
4529 		      if (JUMP_P (prev)
4530 			  && INSN_CODE (prev) == CODE_FOR_cbranch_with_nops)
4531 			{
4532 			  rtx x;
4533 			  HOST_WIDE_INT v;
4534 
4535 			  if (dump_file)
4536 			    fprintf (dump_file,
4537 				     "Reducing nops on insn %d.\n",
4538 				     INSN_UID (prev));
4539 			  x = PATTERN (prev);
4540 			  x = XVECEXP (x, 0, 1);
4541 			  v = INTVAL (XVECEXP (x, 0, 0)) - delay_needed;
4542 			  XVECEXP (x, 0, 0) = GEN_INT (v);
4543 			}
4544 		      while (delay_needed-- > 0)
4545 			emit_insn_after (gen_nop (), label);
4546 		      break;
4547 		    }
4548 		}
4549 	    }
4550 	}
4551     }
4552 }
4553 
4554 /* Called just before the final scheduling pass.  If we need to insert NOPs
4555    later on to work around speculative loads, insert special placeholder
4556    insns that cause loads to be delayed for as many cycles as necessary
4557    (and possible).  This reduces the number of NOPs we need to add.
4558    The dummy insns we generate are later removed by bfin_gen_bundles.  */
4559 static void
4560 add_sched_insns_for_speculation (void)
4561 {
4562   rtx_insn *insn;
4563 
4564   if (! ENABLE_WA_SPECULATIVE_LOADS && ! ENABLE_WA_SPECULATIVE_SYNCS
4565       && ! ENABLE_WA_INDIRECT_CALLS)
4566     return;
4567 
4568   /* First pass: find predicted-false branches; if something after them
4569      needs nops, insert them or change the branch to predict true.  */
4570   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
4571     {
4572       rtx pat;
4573 
4574       if (NOTE_P (insn) || BARRIER_P (insn) || LABEL_P (insn))
4575 	continue;
4576       if (JUMP_TABLE_DATA_P (insn))
4577 	continue;
4578 
4579       pat = PATTERN (insn);
4580       if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER
4581 	  || GET_CODE (pat) == ASM_INPUT
4582 	  || asm_noperands (pat) >= 0)
4583 	continue;
4584 
4585       if (JUMP_P (insn))
4586 	{
4587 	  if (any_condjump_p (insn)
4588 	      && !cbranch_predicted_taken_p (insn))
4589 	    {
4590 	      rtx n = next_real_insn (insn);
4591 	      emit_insn_before (gen_stall (GEN_INT (3)), n);
4592 	    }
4593 	}
4594     }
4595 
4596   /* Second pass: for predicted-true branches, see if anything at the
4597      branch destination needs extra nops.  */
4598   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
4599     {
4600       if (JUMP_P (insn)
4601 	  && any_condjump_p (insn)
4602 	  && (cbranch_predicted_taken_p (insn)))
4603 	{
4604 	  rtx target = JUMP_LABEL (insn);
4605 	  rtx_insn *next = next_real_insn (target);
4606 
4607 	  if (GET_CODE (PATTERN (next)) == UNSPEC_VOLATILE
4608 	      && get_attr_type (next) == TYPE_STALL)
4609 	    continue;
4610 	  emit_insn_before (gen_stall (GEN_INT (1)), next);
4611 	}
4612     }
4613 }
4614 
4615 /* We use the machine specific reorg pass for emitting CSYNC instructions
4616    after conditional branches as needed.
4617 
4618    The Blackfin is unusual in that a code sequence like
4619      if cc jump label
4620      r0 = (p0)
4621    may speculatively perform the load even if the condition isn't true.  This
4622    happens for a branch that is predicted not taken, because the pipeline
4623    isn't flushed or stalled, so the early stages of the following instructions,
4624    which perform the memory reference, are allowed to execute before the
4625    jump condition is evaluated.
4626    Therefore, we must insert additional instructions in all places where this
4627    could lead to incorrect behavior.  The manual recommends CSYNC, while
4628    VDSP seems to use NOPs (even though its corresponding compiler option is
4629    named CSYNC).
4630 
4631    When optimizing for speed, we emit NOPs, which seems faster than a CSYNC.
4632    When optimizing for size, we turn the branch into a predicted taken one.
4633    This may be slower due to mispredicts, but saves code size.  */
4634 
4635 static void
4636 bfin_reorg (void)
4637 {
4638   /* We are freeing block_for_insn in the toplev to keep compatibility
4639      with old MDEP_REORGS that are not CFG based.  Recompute it now.  */
4640   compute_bb_for_insn ();
4641 
4642   if (flag_schedule_insns_after_reload)
4643     {
4644       splitting_for_sched = 1;
4645       split_all_insns ();
4646       splitting_for_sched = 0;
4647 
4648       add_sched_insns_for_speculation ();
4649 
4650       timevar_push (TV_SCHED2);
4651       if (flag_selective_scheduling2
4652 	  && !maybe_skip_selective_scheduling ())
4653         run_selective_scheduling ();
4654       else
4655 	schedule_insns ();
4656       timevar_pop (TV_SCHED2);
4657 
4658       /* Examine the schedule and insert nops as necessary for 64-bit parallel
4659 	 instructions.  */
4660       bfin_gen_bundles ();
4661     }
4662 
4663   df_analyze ();
4664 
4665   /* Doloop optimization */
4666   if (cfun->machine->has_hardware_loops)
4667     bfin_reorg_loops ();
4668 
4669   workaround_speculation ();
4670 
4671   if (flag_var_tracking)
4672     {
4673       timevar_push (TV_VAR_TRACKING);
4674       variable_tracking_main ();
4675       reorder_var_tracking_notes ();
4676       timevar_pop (TV_VAR_TRACKING);
4677     }
4678 
4679   df_finish_pass (false);
4680 
4681   workaround_rts_anomaly ();
4682 }
4683 
4684 /* Handle interrupt_handler, exception_handler and nmi_handler function
4685    attributes; arguments as in struct attribute_spec.handler.  */
4686 
4687 static tree
4688 handle_int_attribute (tree *node, tree name,
4689 		      tree args ATTRIBUTE_UNUSED,
4690 		      int flags ATTRIBUTE_UNUSED,
4691 		      bool *no_add_attrs)
4692 {
4693   tree x = *node;
4694   if (TREE_CODE (x) == FUNCTION_DECL)
4695     x = TREE_TYPE (x);
4696 
4697   if (TREE_CODE (x) != FUNCTION_TYPE)
4698     {
4699       warning (OPT_Wattributes, "%qE attribute only applies to functions",
4700 	       name);
4701       *no_add_attrs = true;
4702     }
4703   else if (funkind (x) != SUBROUTINE)
4704     error ("multiple function type attributes specified");
4705 
4706   return NULL_TREE;
4707 }
4708 
4709 /* Return 0 if the attributes for two types are incompatible, 1 if they
4710    are compatible, and 2 if they are nearly compatible (which causes a
4711    warning to be generated).  */
4712 
4713 static int
4714 bfin_comp_type_attributes (const_tree type1, const_tree type2)
4715 {
4716   e_funkind kind1, kind2;
4717 
4718   if (TREE_CODE (type1) != FUNCTION_TYPE)
4719     return 1;
4720 
4721   kind1 = funkind (type1);
4722   kind2 = funkind (type2);
4723 
4724   if (kind1 != kind2)
4725     return 0;
4726 
4727   /*  Check for mismatched modifiers */
4728   if (!lookup_attribute ("nesting", TYPE_ATTRIBUTES (type1))
4729       != !lookup_attribute ("nesting", TYPE_ATTRIBUTES (type2)))
4730     return 0;
4731 
4732   if (!lookup_attribute ("saveall", TYPE_ATTRIBUTES (type1))
4733       != !lookup_attribute ("saveall", TYPE_ATTRIBUTES (type2)))
4734     return 0;
4735 
4736   if (!lookup_attribute ("kspisusp", TYPE_ATTRIBUTES (type1))
4737       != !lookup_attribute ("kspisusp", TYPE_ATTRIBUTES (type2)))
4738     return 0;
4739 
4740   if (!lookup_attribute ("longcall", TYPE_ATTRIBUTES (type1))
4741       != !lookup_attribute ("longcall", TYPE_ATTRIBUTES (type2)))
4742     return 0;
4743 
4744   return 1;
4745 }
4746 
4747 /* Handle a "longcall" or "shortcall" attribute; arguments as in
4748    struct attribute_spec.handler.  */
4749 
4750 static tree
4751 bfin_handle_longcall_attribute (tree *node, tree name,
4752 				tree args ATTRIBUTE_UNUSED,
4753 				int flags ATTRIBUTE_UNUSED,
4754 				bool *no_add_attrs)
4755 {
4756   if (TREE_CODE (*node) != FUNCTION_TYPE
4757       && TREE_CODE (*node) != FIELD_DECL
4758       && TREE_CODE (*node) != TYPE_DECL)
4759     {
4760       warning (OPT_Wattributes, "%qE attribute only applies to functions",
4761 	       name);
4762       *no_add_attrs = true;
4763     }
4764 
4765   if ((strcmp (IDENTIFIER_POINTER (name), "longcall") == 0
4766        && lookup_attribute ("shortcall", TYPE_ATTRIBUTES (*node)))
4767       || (strcmp (IDENTIFIER_POINTER (name), "shortcall") == 0
4768 	  && lookup_attribute ("longcall", TYPE_ATTRIBUTES (*node))))
4769     {
4770       warning (OPT_Wattributes,
4771 	       "can%'t apply both longcall and shortcall attributes to the same function");
4772       *no_add_attrs = true;
4773     }
4774 
4775   return NULL_TREE;
4776 }
4777 
4778 /* Handle a "l1_text" attribute; arguments as in
4779    struct attribute_spec.handler.  */
4780 
4781 static tree
4782 bfin_handle_l1_text_attribute (tree *node, tree name, tree ARG_UNUSED (args),
4783 			       int ARG_UNUSED (flags), bool *no_add_attrs)
4784 {
4785   tree decl = *node;
4786 
4787   if (TREE_CODE (decl) != FUNCTION_DECL)
4788     {
4789       error ("%qE attribute only applies to functions",
4790 	     name);
4791       *no_add_attrs = true;
4792     }
4793 
4794   /* The decl may have already been given a section attribute
4795      from a previous declaration. Ensure they match.  */
4796   else if (DECL_SECTION_NAME (decl) != NULL
4797 	   && strcmp (DECL_SECTION_NAME (decl),
4798 		      ".l1.text") != 0)
4799     {
4800       error ("section of %q+D conflicts with previous declaration",
4801 	     decl);
4802       *no_add_attrs = true;
4803     }
4804   else
4805     set_decl_section_name (decl, ".l1.text");
4806 
4807   return NULL_TREE;
4808 }
4809 
4810 /* Handle a "l1_data", "l1_data_A" or "l1_data_B" attribute;
4811    arguments as in struct attribute_spec.handler.  */
4812 
4813 static tree
4814 bfin_handle_l1_data_attribute (tree *node, tree name, tree ARG_UNUSED (args),
4815 			       int ARG_UNUSED (flags), bool *no_add_attrs)
4816 {
4817   tree decl = *node;
4818 
4819   if (TREE_CODE (decl) != VAR_DECL)
4820     {
4821       error ("%qE attribute only applies to variables",
4822 	     name);
4823       *no_add_attrs = true;
4824     }
4825   else if (current_function_decl != NULL_TREE
4826 	   && !TREE_STATIC (decl))
4827     {
4828       error ("%qE attribute cannot be specified for local variables",
4829 	     name);
4830       *no_add_attrs = true;
4831     }
4832   else
4833     {
4834       const char *section_name;
4835 
4836       if (strcmp (IDENTIFIER_POINTER (name), "l1_data") == 0)
4837 	section_name = ".l1.data";
4838       else if (strcmp (IDENTIFIER_POINTER (name), "l1_data_A") == 0)
4839 	section_name = ".l1.data.A";
4840       else if (strcmp (IDENTIFIER_POINTER (name), "l1_data_B") == 0)
4841 	section_name = ".l1.data.B";
4842       else
4843 	gcc_unreachable ();
4844 
4845       /* The decl may have already been given a section attribute
4846 	 from a previous declaration. Ensure they match.  */
4847       if (DECL_SECTION_NAME (decl) != NULL
4848 	  && strcmp (DECL_SECTION_NAME (decl),
4849 		     section_name) != 0)
4850 	{
4851 	  error ("section of %q+D conflicts with previous declaration",
4852 		 decl);
4853 	  *no_add_attrs = true;
4854 	}
4855       else
4856 	set_decl_section_name (decl, section_name);
4857     }
4858 
4859  return NULL_TREE;
4860 }
4861 
4862 /* Handle a "l2" attribute; arguments as in struct attribute_spec.handler.  */
4863 
4864 static tree
4865 bfin_handle_l2_attribute (tree *node, tree ARG_UNUSED (name),
4866 			  tree ARG_UNUSED (args), int ARG_UNUSED (flags),
4867 			  bool *no_add_attrs)
4868 {
4869   tree decl = *node;
4870 
4871   if (TREE_CODE (decl) == FUNCTION_DECL)
4872     {
4873       if (DECL_SECTION_NAME (decl) != NULL
4874 	  && strcmp (DECL_SECTION_NAME (decl),
4875 		     ".l2.text") != 0)
4876 	{
4877 	  error ("section of %q+D conflicts with previous declaration",
4878 		 decl);
4879 	  *no_add_attrs = true;
4880 	}
4881       else
4882 	set_decl_section_name (decl, ".l2.text");
4883     }
4884   else if (TREE_CODE (decl) == VAR_DECL)
4885     {
4886       if (DECL_SECTION_NAME (decl) != NULL
4887 	  && strcmp (DECL_SECTION_NAME (decl),
4888 		     ".l2.data") != 0)
4889 	{
4890 	  error ("section of %q+D conflicts with previous declaration",
4891 		 decl);
4892 	  *no_add_attrs = true;
4893 	}
4894       else
4895 	set_decl_section_name (decl, ".l2.data");
4896     }
4897 
4898   return NULL_TREE;
4899 }
4900 
4901 /* Table of valid machine attributes.  */
4902 static const struct attribute_spec bfin_attribute_table[] =
4903 {
4904   /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
4905        affects_type_identity } */
4906   { "interrupt_handler", 0, 0, false, true,  true, handle_int_attribute,
4907     false },
4908   { "exception_handler", 0, 0, false, true,  true, handle_int_attribute,
4909     false },
4910   { "nmi_handler", 0, 0, false, true,  true, handle_int_attribute, false },
4911   { "nesting", 0, 0, false, true,  true, NULL, false },
4912   { "kspisusp", 0, 0, false, true,  true, NULL, false },
4913   { "saveall", 0, 0, false, true,  true, NULL, false },
4914   { "longcall",  0, 0, false, true,  true,  bfin_handle_longcall_attribute,
4915     false },
4916   { "shortcall", 0, 0, false, true,  true,  bfin_handle_longcall_attribute,
4917     false },
4918   { "l1_text", 0, 0, true, false, false,  bfin_handle_l1_text_attribute,
4919     false },
4920   { "l1_data", 0, 0, true, false, false,  bfin_handle_l1_data_attribute,
4921     false },
4922   { "l1_data_A", 0, 0, true, false, false, bfin_handle_l1_data_attribute,
4923     false },
4924   { "l1_data_B", 0, 0, true, false, false,  bfin_handle_l1_data_attribute,
4925     false },
4926   { "l2", 0, 0, true, false, false,  bfin_handle_l2_attribute, false },
4927   { NULL, 0, 0, false, false, false, NULL, false }
4928 };
4929 
4930 /* Implementation of TARGET_ASM_INTEGER.  When using FD-PIC, we need to
4931    tell the assembler to generate pointers to function descriptors in
4932    some cases.  */
4933 
4934 static bool
4935 bfin_assemble_integer (rtx value, unsigned int size, int aligned_p)
4936 {
4937   if (TARGET_FDPIC && size == UNITS_PER_WORD)
4938     {
4939       if (GET_CODE (value) == SYMBOL_REF
4940 	  && SYMBOL_REF_FUNCTION_P (value))
4941 	{
4942 	  fputs ("\t.picptr\tfuncdesc(", asm_out_file);
4943 	  output_addr_const (asm_out_file, value);
4944 	  fputs (")\n", asm_out_file);
4945 	  return true;
4946 	}
4947       if (!aligned_p)
4948 	{
4949 	  /* We've set the unaligned SI op to NULL, so we always have to
4950 	     handle the unaligned case here.  */
4951 	  assemble_integer_with_op ("\t.4byte\t", value);
4952 	  return true;
4953 	}
4954     }
4955   return default_assemble_integer (value, size, aligned_p);
4956 }
4957 
4958 /* Output the assembler code for a thunk function.  THUNK_DECL is the
4959    declaration for the thunk function itself, FUNCTION is the decl for
4960    the target function.  DELTA is an immediate constant offset to be
4961    added to THIS.  If VCALL_OFFSET is nonzero, the word at
4962    *(*this + vcall_offset) should be added to THIS.  */
4963 
4964 static void
4965 bfin_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
4966 		      tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
4967 		      HOST_WIDE_INT vcall_offset, tree function)
4968 {
4969   rtx xops[3];
4970   /* The this parameter is passed as the first argument.  */
4971   rtx this_rtx = gen_rtx_REG (Pmode, REG_R0);
4972 
4973   /* Adjust the this parameter by a fixed constant.  */
4974   if (delta)
4975     {
4976       xops[1] = this_rtx;
4977       if (delta >= -64 && delta <= 63)
4978 	{
4979 	  xops[0] = GEN_INT (delta);
4980 	  output_asm_insn ("%1 += %0;", xops);
4981 	}
4982       else if (delta >= -128 && delta < -64)
4983 	{
4984 	  xops[0] = GEN_INT (delta + 64);
4985 	  output_asm_insn ("%1 += -64; %1 += %0;", xops);
4986 	}
4987       else if (delta > 63 && delta <= 126)
4988 	{
4989 	  xops[0] = GEN_INT (delta - 63);
4990 	  output_asm_insn ("%1 += 63; %1 += %0;", xops);
4991 	}
4992       else
4993 	{
4994 	  xops[0] = GEN_INT (delta);
4995 	  output_asm_insn ("r3.l = %h0; r3.h = %d0; %1 = %1 + r3;", xops);
4996 	}
4997     }
4998 
4999   /* Adjust the this parameter by a value stored in the vtable.  */
5000   if (vcall_offset)
5001     {
5002       rtx p2tmp = gen_rtx_REG (Pmode, REG_P2);
5003       rtx tmp = gen_rtx_REG (Pmode, REG_R3);
5004 
5005       xops[1] = tmp;
5006       xops[2] = p2tmp;
5007       output_asm_insn ("%2 = r0; %2 = [%2];", xops);
5008 
5009       /* Adjust the this parameter.  */
5010       xops[0] = gen_rtx_MEM (Pmode, plus_constant (Pmode, p2tmp,
5011 						   vcall_offset));
5012       if (!memory_operand (xops[0], Pmode))
5013 	{
5014 	  rtx tmp2 = gen_rtx_REG (Pmode, REG_P1);
5015 	  xops[0] = GEN_INT (vcall_offset);
5016 	  xops[1] = tmp2;
5017 	  output_asm_insn ("%h1 = %h0; %d1 = %d0; %2 = %2 + %1", xops);
5018 	  xops[0] = gen_rtx_MEM (Pmode, p2tmp);
5019 	}
5020       xops[2] = this_rtx;
5021       output_asm_insn ("%1 = %0; %2 = %2 + %1;", xops);
5022     }
5023 
5024   xops[0] = XEXP (DECL_RTL (function), 0);
5025   if (1 || !flag_pic || (*targetm.binds_local_p) (function))
5026     output_asm_insn ("jump.l\t%P0", xops);
5027 }
5028 
5029 /* Codes for all the Blackfin builtins.  */
5030 enum bfin_builtins
5031 {
5032   BFIN_BUILTIN_CSYNC,
5033   BFIN_BUILTIN_SSYNC,
5034   BFIN_BUILTIN_ONES,
5035   BFIN_BUILTIN_COMPOSE_2X16,
5036   BFIN_BUILTIN_EXTRACTLO,
5037   BFIN_BUILTIN_EXTRACTHI,
5038 
5039   BFIN_BUILTIN_SSADD_2X16,
5040   BFIN_BUILTIN_SSSUB_2X16,
5041   BFIN_BUILTIN_SSADDSUB_2X16,
5042   BFIN_BUILTIN_SSSUBADD_2X16,
5043   BFIN_BUILTIN_MULT_2X16,
5044   BFIN_BUILTIN_MULTR_2X16,
5045   BFIN_BUILTIN_NEG_2X16,
5046   BFIN_BUILTIN_ABS_2X16,
5047   BFIN_BUILTIN_MIN_2X16,
5048   BFIN_BUILTIN_MAX_2X16,
5049 
5050   BFIN_BUILTIN_SSADD_1X16,
5051   BFIN_BUILTIN_SSSUB_1X16,
5052   BFIN_BUILTIN_MULT_1X16,
5053   BFIN_BUILTIN_MULTR_1X16,
5054   BFIN_BUILTIN_NORM_1X16,
5055   BFIN_BUILTIN_NEG_1X16,
5056   BFIN_BUILTIN_ABS_1X16,
5057   BFIN_BUILTIN_MIN_1X16,
5058   BFIN_BUILTIN_MAX_1X16,
5059 
5060   BFIN_BUILTIN_SUM_2X16,
5061   BFIN_BUILTIN_DIFFHL_2X16,
5062   BFIN_BUILTIN_DIFFLH_2X16,
5063 
5064   BFIN_BUILTIN_SSADD_1X32,
5065   BFIN_BUILTIN_SSSUB_1X32,
5066   BFIN_BUILTIN_NORM_1X32,
5067   BFIN_BUILTIN_ROUND_1X32,
5068   BFIN_BUILTIN_NEG_1X32,
5069   BFIN_BUILTIN_ABS_1X32,
5070   BFIN_BUILTIN_MIN_1X32,
5071   BFIN_BUILTIN_MAX_1X32,
5072   BFIN_BUILTIN_MULT_1X32,
5073   BFIN_BUILTIN_MULT_1X32X32,
5074   BFIN_BUILTIN_MULT_1X32X32NS,
5075 
5076   BFIN_BUILTIN_MULHISILL,
5077   BFIN_BUILTIN_MULHISILH,
5078   BFIN_BUILTIN_MULHISIHL,
5079   BFIN_BUILTIN_MULHISIHH,
5080 
5081   BFIN_BUILTIN_LSHIFT_1X16,
5082   BFIN_BUILTIN_LSHIFT_2X16,
5083   BFIN_BUILTIN_SSASHIFT_1X16,
5084   BFIN_BUILTIN_SSASHIFT_2X16,
5085   BFIN_BUILTIN_SSASHIFT_1X32,
5086 
5087   BFIN_BUILTIN_CPLX_MUL_16,
5088   BFIN_BUILTIN_CPLX_MAC_16,
5089   BFIN_BUILTIN_CPLX_MSU_16,
5090 
5091   BFIN_BUILTIN_CPLX_MUL_16_S40,
5092   BFIN_BUILTIN_CPLX_MAC_16_S40,
5093   BFIN_BUILTIN_CPLX_MSU_16_S40,
5094 
5095   BFIN_BUILTIN_CPLX_SQU,
5096 
5097   BFIN_BUILTIN_LOADBYTES,
5098 
5099   BFIN_BUILTIN_MAX
5100 };
5101 
5102 #define def_builtin(NAME, TYPE, CODE)					\
5103 do {									\
5104   add_builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD,		\
5105 		       NULL, NULL_TREE);				\
5106 } while (0)
5107 
5108 /* Set up all builtin functions for this target.  */
5109 static void
5110 bfin_init_builtins (void)
5111 {
5112   tree V2HI_type_node = build_vector_type_for_mode (intHI_type_node, V2HImode);
5113   tree void_ftype_void
5114     = build_function_type_list (void_type_node, NULL_TREE);
5115   tree short_ftype_short
5116     = build_function_type_list (short_integer_type_node, short_integer_type_node,
5117 				NULL_TREE);
5118   tree short_ftype_int_int
5119     = build_function_type_list (short_integer_type_node, integer_type_node,
5120 				integer_type_node, NULL_TREE);
5121   tree int_ftype_int_int
5122     = build_function_type_list (integer_type_node, integer_type_node,
5123 				integer_type_node, NULL_TREE);
5124   tree int_ftype_int
5125     = build_function_type_list (integer_type_node, integer_type_node,
5126 				NULL_TREE);
5127   tree short_ftype_int
5128     = build_function_type_list (short_integer_type_node, integer_type_node,
5129 				NULL_TREE);
5130   tree int_ftype_v2hi_v2hi
5131     = build_function_type_list (integer_type_node, V2HI_type_node,
5132 				V2HI_type_node, NULL_TREE);
5133   tree v2hi_ftype_v2hi_v2hi
5134     = build_function_type_list (V2HI_type_node, V2HI_type_node,
5135 				V2HI_type_node, NULL_TREE);
5136   tree v2hi_ftype_v2hi_v2hi_v2hi
5137     = build_function_type_list (V2HI_type_node, V2HI_type_node,
5138 				V2HI_type_node, V2HI_type_node, NULL_TREE);
5139   tree v2hi_ftype_int_int
5140     = build_function_type_list (V2HI_type_node, integer_type_node,
5141 				integer_type_node, NULL_TREE);
5142   tree v2hi_ftype_v2hi_int
5143     = build_function_type_list (V2HI_type_node, V2HI_type_node,
5144 				integer_type_node, NULL_TREE);
5145   tree int_ftype_short_short
5146     = build_function_type_list (integer_type_node, short_integer_type_node,
5147 				short_integer_type_node, NULL_TREE);
5148   tree v2hi_ftype_v2hi
5149     = build_function_type_list (V2HI_type_node, V2HI_type_node, NULL_TREE);
5150   tree short_ftype_v2hi
5151     = build_function_type_list (short_integer_type_node, V2HI_type_node,
5152 				NULL_TREE);
5153   tree int_ftype_pint
5154     = build_function_type_list (integer_type_node,
5155 				build_pointer_type (integer_type_node),
5156 				NULL_TREE);
5157 
5158   /* Add the remaining MMX insns with somewhat more complicated types.  */
5159   def_builtin ("__builtin_bfin_csync", void_ftype_void, BFIN_BUILTIN_CSYNC);
5160   def_builtin ("__builtin_bfin_ssync", void_ftype_void, BFIN_BUILTIN_SSYNC);
5161 
5162   def_builtin ("__builtin_bfin_ones", short_ftype_int, BFIN_BUILTIN_ONES);
5163 
5164   def_builtin ("__builtin_bfin_compose_2x16", v2hi_ftype_int_int,
5165 	       BFIN_BUILTIN_COMPOSE_2X16);
5166   def_builtin ("__builtin_bfin_extract_hi", short_ftype_v2hi,
5167 	       BFIN_BUILTIN_EXTRACTHI);
5168   def_builtin ("__builtin_bfin_extract_lo", short_ftype_v2hi,
5169 	       BFIN_BUILTIN_EXTRACTLO);
5170 
5171   def_builtin ("__builtin_bfin_min_fr2x16", v2hi_ftype_v2hi_v2hi,
5172 	       BFIN_BUILTIN_MIN_2X16);
5173   def_builtin ("__builtin_bfin_max_fr2x16", v2hi_ftype_v2hi_v2hi,
5174 	       BFIN_BUILTIN_MAX_2X16);
5175 
5176   def_builtin ("__builtin_bfin_add_fr2x16", v2hi_ftype_v2hi_v2hi,
5177 	       BFIN_BUILTIN_SSADD_2X16);
5178   def_builtin ("__builtin_bfin_sub_fr2x16", v2hi_ftype_v2hi_v2hi,
5179 	       BFIN_BUILTIN_SSSUB_2X16);
5180   def_builtin ("__builtin_bfin_dspaddsubsat", v2hi_ftype_v2hi_v2hi,
5181 	       BFIN_BUILTIN_SSADDSUB_2X16);
5182   def_builtin ("__builtin_bfin_dspsubaddsat", v2hi_ftype_v2hi_v2hi,
5183 	       BFIN_BUILTIN_SSSUBADD_2X16);
5184   def_builtin ("__builtin_bfin_mult_fr2x16", v2hi_ftype_v2hi_v2hi,
5185 	       BFIN_BUILTIN_MULT_2X16);
5186   def_builtin ("__builtin_bfin_multr_fr2x16", v2hi_ftype_v2hi_v2hi,
5187 	       BFIN_BUILTIN_MULTR_2X16);
5188   def_builtin ("__builtin_bfin_negate_fr2x16", v2hi_ftype_v2hi,
5189 	       BFIN_BUILTIN_NEG_2X16);
5190   def_builtin ("__builtin_bfin_abs_fr2x16", v2hi_ftype_v2hi,
5191 	       BFIN_BUILTIN_ABS_2X16);
5192 
5193   def_builtin ("__builtin_bfin_min_fr1x16", short_ftype_int_int,
5194 	       BFIN_BUILTIN_MIN_1X16);
5195   def_builtin ("__builtin_bfin_max_fr1x16", short_ftype_int_int,
5196 	       BFIN_BUILTIN_MAX_1X16);
5197 
5198   def_builtin ("__builtin_bfin_add_fr1x16", short_ftype_int_int,
5199 	       BFIN_BUILTIN_SSADD_1X16);
5200   def_builtin ("__builtin_bfin_sub_fr1x16", short_ftype_int_int,
5201 	       BFIN_BUILTIN_SSSUB_1X16);
5202   def_builtin ("__builtin_bfin_mult_fr1x16", short_ftype_int_int,
5203 	       BFIN_BUILTIN_MULT_1X16);
5204   def_builtin ("__builtin_bfin_multr_fr1x16", short_ftype_int_int,
5205 	       BFIN_BUILTIN_MULTR_1X16);
5206   def_builtin ("__builtin_bfin_negate_fr1x16", short_ftype_short,
5207 	       BFIN_BUILTIN_NEG_1X16);
5208   def_builtin ("__builtin_bfin_abs_fr1x16", short_ftype_short,
5209 	       BFIN_BUILTIN_ABS_1X16);
5210   def_builtin ("__builtin_bfin_norm_fr1x16", short_ftype_int,
5211 	       BFIN_BUILTIN_NORM_1X16);
5212 
5213   def_builtin ("__builtin_bfin_sum_fr2x16", short_ftype_v2hi,
5214 	       BFIN_BUILTIN_SUM_2X16);
5215   def_builtin ("__builtin_bfin_diff_hl_fr2x16", short_ftype_v2hi,
5216 	       BFIN_BUILTIN_DIFFHL_2X16);
5217   def_builtin ("__builtin_bfin_diff_lh_fr2x16", short_ftype_v2hi,
5218 	       BFIN_BUILTIN_DIFFLH_2X16);
5219 
5220   def_builtin ("__builtin_bfin_mulhisill", int_ftype_v2hi_v2hi,
5221 	       BFIN_BUILTIN_MULHISILL);
5222   def_builtin ("__builtin_bfin_mulhisihl", int_ftype_v2hi_v2hi,
5223 	       BFIN_BUILTIN_MULHISIHL);
5224   def_builtin ("__builtin_bfin_mulhisilh", int_ftype_v2hi_v2hi,
5225 	       BFIN_BUILTIN_MULHISILH);
5226   def_builtin ("__builtin_bfin_mulhisihh", int_ftype_v2hi_v2hi,
5227 	       BFIN_BUILTIN_MULHISIHH);
5228 
5229   def_builtin ("__builtin_bfin_min_fr1x32", int_ftype_int_int,
5230 	       BFIN_BUILTIN_MIN_1X32);
5231   def_builtin ("__builtin_bfin_max_fr1x32", int_ftype_int_int,
5232 	       BFIN_BUILTIN_MAX_1X32);
5233 
5234   def_builtin ("__builtin_bfin_add_fr1x32", int_ftype_int_int,
5235 	       BFIN_BUILTIN_SSADD_1X32);
5236   def_builtin ("__builtin_bfin_sub_fr1x32", int_ftype_int_int,
5237 	       BFIN_BUILTIN_SSSUB_1X32);
5238   def_builtin ("__builtin_bfin_negate_fr1x32", int_ftype_int,
5239 	       BFIN_BUILTIN_NEG_1X32);
5240   def_builtin ("__builtin_bfin_abs_fr1x32", int_ftype_int,
5241 	       BFIN_BUILTIN_ABS_1X32);
5242   def_builtin ("__builtin_bfin_norm_fr1x32", short_ftype_int,
5243 	       BFIN_BUILTIN_NORM_1X32);
5244   def_builtin ("__builtin_bfin_round_fr1x32", short_ftype_int,
5245 	       BFIN_BUILTIN_ROUND_1X32);
5246   def_builtin ("__builtin_bfin_mult_fr1x32", int_ftype_short_short,
5247 	       BFIN_BUILTIN_MULT_1X32);
5248   def_builtin ("__builtin_bfin_mult_fr1x32x32", int_ftype_int_int,
5249 	       BFIN_BUILTIN_MULT_1X32X32);
5250   def_builtin ("__builtin_bfin_mult_fr1x32x32NS", int_ftype_int_int,
5251 	       BFIN_BUILTIN_MULT_1X32X32NS);
5252 
5253   /* Shifts.  */
5254   def_builtin ("__builtin_bfin_shl_fr1x16", short_ftype_int_int,
5255 	       BFIN_BUILTIN_SSASHIFT_1X16);
5256   def_builtin ("__builtin_bfin_shl_fr2x16", v2hi_ftype_v2hi_int,
5257 	       BFIN_BUILTIN_SSASHIFT_2X16);
5258   def_builtin ("__builtin_bfin_lshl_fr1x16", short_ftype_int_int,
5259 	       BFIN_BUILTIN_LSHIFT_1X16);
5260   def_builtin ("__builtin_bfin_lshl_fr2x16", v2hi_ftype_v2hi_int,
5261 	       BFIN_BUILTIN_LSHIFT_2X16);
5262   def_builtin ("__builtin_bfin_shl_fr1x32", int_ftype_int_int,
5263 	       BFIN_BUILTIN_SSASHIFT_1X32);
5264 
5265   /* Complex numbers.  */
5266   def_builtin ("__builtin_bfin_cmplx_add", v2hi_ftype_v2hi_v2hi,
5267 	       BFIN_BUILTIN_SSADD_2X16);
5268   def_builtin ("__builtin_bfin_cmplx_sub", v2hi_ftype_v2hi_v2hi,
5269 	       BFIN_BUILTIN_SSSUB_2X16);
5270   def_builtin ("__builtin_bfin_cmplx_mul", v2hi_ftype_v2hi_v2hi,
5271 	       BFIN_BUILTIN_CPLX_MUL_16);
5272   def_builtin ("__builtin_bfin_cmplx_mac", v2hi_ftype_v2hi_v2hi_v2hi,
5273 	       BFIN_BUILTIN_CPLX_MAC_16);
5274   def_builtin ("__builtin_bfin_cmplx_msu", v2hi_ftype_v2hi_v2hi_v2hi,
5275 	       BFIN_BUILTIN_CPLX_MSU_16);
5276   def_builtin ("__builtin_bfin_cmplx_mul_s40", v2hi_ftype_v2hi_v2hi,
5277 	       BFIN_BUILTIN_CPLX_MUL_16_S40);
5278   def_builtin ("__builtin_bfin_cmplx_mac_s40", v2hi_ftype_v2hi_v2hi_v2hi,
5279 	       BFIN_BUILTIN_CPLX_MAC_16_S40);
5280   def_builtin ("__builtin_bfin_cmplx_msu_s40", v2hi_ftype_v2hi_v2hi_v2hi,
5281 	       BFIN_BUILTIN_CPLX_MSU_16_S40);
5282   def_builtin ("__builtin_bfin_csqu_fr16", v2hi_ftype_v2hi,
5283 	       BFIN_BUILTIN_CPLX_SQU);
5284 
5285   /* "Unaligned" load.  */
5286   def_builtin ("__builtin_bfin_loadbytes", int_ftype_pint,
5287 	       BFIN_BUILTIN_LOADBYTES);
5288 
5289 }
5290 
5291 
5292 struct builtin_description
5293 {
5294   const enum insn_code icode;
5295   const char *const name;
5296   const enum bfin_builtins code;
5297   int macflag;
5298 };
5299 
5300 static const struct builtin_description bdesc_2arg[] =
5301 {
5302   { CODE_FOR_composev2hi, "__builtin_bfin_compose_2x16", BFIN_BUILTIN_COMPOSE_2X16, -1 },
5303 
5304   { CODE_FOR_ssashiftv2hi3, "__builtin_bfin_shl_fr2x16", BFIN_BUILTIN_SSASHIFT_2X16, -1 },
5305   { CODE_FOR_ssashifthi3, "__builtin_bfin_shl_fr1x16", BFIN_BUILTIN_SSASHIFT_1X16, -1 },
5306   { CODE_FOR_lshiftv2hi3, "__builtin_bfin_lshl_fr2x16", BFIN_BUILTIN_LSHIFT_2X16, -1 },
5307   { CODE_FOR_lshifthi3, "__builtin_bfin_lshl_fr1x16", BFIN_BUILTIN_LSHIFT_1X16, -1 },
5308   { CODE_FOR_ssashiftsi3, "__builtin_bfin_shl_fr1x32", BFIN_BUILTIN_SSASHIFT_1X32, -1 },
5309 
5310   { CODE_FOR_sminhi3, "__builtin_bfin_min_fr1x16", BFIN_BUILTIN_MIN_1X16, -1 },
5311   { CODE_FOR_smaxhi3, "__builtin_bfin_max_fr1x16", BFIN_BUILTIN_MAX_1X16, -1 },
5312   { CODE_FOR_ssaddhi3, "__builtin_bfin_add_fr1x16", BFIN_BUILTIN_SSADD_1X16, -1 },
5313   { CODE_FOR_sssubhi3, "__builtin_bfin_sub_fr1x16", BFIN_BUILTIN_SSSUB_1X16, -1 },
5314 
5315   { CODE_FOR_sminsi3, "__builtin_bfin_min_fr1x32", BFIN_BUILTIN_MIN_1X32, -1 },
5316   { CODE_FOR_smaxsi3, "__builtin_bfin_max_fr1x32", BFIN_BUILTIN_MAX_1X32, -1 },
5317   { CODE_FOR_ssaddsi3, "__builtin_bfin_add_fr1x32", BFIN_BUILTIN_SSADD_1X32, -1 },
5318   { CODE_FOR_sssubsi3, "__builtin_bfin_sub_fr1x32", BFIN_BUILTIN_SSSUB_1X32, -1 },
5319 
5320   { CODE_FOR_sminv2hi3, "__builtin_bfin_min_fr2x16", BFIN_BUILTIN_MIN_2X16, -1 },
5321   { CODE_FOR_smaxv2hi3, "__builtin_bfin_max_fr2x16", BFIN_BUILTIN_MAX_2X16, -1 },
5322   { CODE_FOR_ssaddv2hi3, "__builtin_bfin_add_fr2x16", BFIN_BUILTIN_SSADD_2X16, -1 },
5323   { CODE_FOR_sssubv2hi3, "__builtin_bfin_sub_fr2x16", BFIN_BUILTIN_SSSUB_2X16, -1 },
5324   { CODE_FOR_ssaddsubv2hi3, "__builtin_bfin_dspaddsubsat", BFIN_BUILTIN_SSADDSUB_2X16, -1 },
5325   { CODE_FOR_sssubaddv2hi3, "__builtin_bfin_dspsubaddsat", BFIN_BUILTIN_SSSUBADD_2X16, -1 },
5326 
5327   { CODE_FOR_flag_mulhisi, "__builtin_bfin_mult_fr1x32", BFIN_BUILTIN_MULT_1X32, MACFLAG_NONE },
5328   { CODE_FOR_flag_mulhi, "__builtin_bfin_mult_fr1x16", BFIN_BUILTIN_MULT_1X16, MACFLAG_T },
5329   { CODE_FOR_flag_mulhi, "__builtin_bfin_multr_fr1x16", BFIN_BUILTIN_MULTR_1X16, MACFLAG_NONE },
5330   { CODE_FOR_flag_mulv2hi, "__builtin_bfin_mult_fr2x16", BFIN_BUILTIN_MULT_2X16, MACFLAG_T },
5331   { CODE_FOR_flag_mulv2hi, "__builtin_bfin_multr_fr2x16", BFIN_BUILTIN_MULTR_2X16, MACFLAG_NONE },
5332 
5333   { CODE_FOR_mulhisi_ll, "__builtin_bfin_mulhisill", BFIN_BUILTIN_MULHISILL, -1 },
5334   { CODE_FOR_mulhisi_lh, "__builtin_bfin_mulhisilh", BFIN_BUILTIN_MULHISILH, -1 },
5335   { CODE_FOR_mulhisi_hl, "__builtin_bfin_mulhisihl", BFIN_BUILTIN_MULHISIHL, -1 },
5336   { CODE_FOR_mulhisi_hh, "__builtin_bfin_mulhisihh", BFIN_BUILTIN_MULHISIHH, -1 }
5337 
5338 };
5339 
5340 static const struct builtin_description bdesc_1arg[] =
5341 {
5342   { CODE_FOR_loadbytes, "__builtin_bfin_loadbytes", BFIN_BUILTIN_LOADBYTES, 0 },
5343 
5344   { CODE_FOR_ones, "__builtin_bfin_ones", BFIN_BUILTIN_ONES, 0 },
5345 
5346   { CODE_FOR_clrsbhi2, "__builtin_bfin_norm_fr1x16", BFIN_BUILTIN_NORM_1X16, 0 },
5347   { CODE_FOR_ssneghi2, "__builtin_bfin_negate_fr1x16", BFIN_BUILTIN_NEG_1X16, 0 },
5348   { CODE_FOR_abshi2, "__builtin_bfin_abs_fr1x16", BFIN_BUILTIN_ABS_1X16, 0 },
5349 
5350   { CODE_FOR_clrsbsi2, "__builtin_bfin_norm_fr1x32", BFIN_BUILTIN_NORM_1X32, 0 },
5351   { CODE_FOR_ssroundsi2, "__builtin_bfin_round_fr1x32", BFIN_BUILTIN_ROUND_1X32, 0 },
5352   { CODE_FOR_ssnegsi2, "__builtin_bfin_negate_fr1x32", BFIN_BUILTIN_NEG_1X32, 0 },
5353   { CODE_FOR_ssabssi2, "__builtin_bfin_abs_fr1x32", BFIN_BUILTIN_ABS_1X32, 0 },
5354 
5355   { CODE_FOR_movv2hi_hi_low, "__builtin_bfin_extract_lo", BFIN_BUILTIN_EXTRACTLO, 0 },
5356   { CODE_FOR_movv2hi_hi_high, "__builtin_bfin_extract_hi", BFIN_BUILTIN_EXTRACTHI, 0 },
5357   { CODE_FOR_ssnegv2hi2, "__builtin_bfin_negate_fr2x16", BFIN_BUILTIN_NEG_2X16, 0 },
5358   { CODE_FOR_ssabsv2hi2, "__builtin_bfin_abs_fr2x16", BFIN_BUILTIN_ABS_2X16, 0 }
5359 };
5360 
5361 /* Errors in the source file can cause expand_expr to return const0_rtx
5362    where we expect a vector.  To avoid crashing, use one of the vector
5363    clear instructions.  */
5364 static rtx
5365 safe_vector_operand (rtx x, machine_mode mode)
5366 {
5367   if (x != const0_rtx)
5368     return x;
5369   x = gen_reg_rtx (SImode);
5370 
5371   emit_insn (gen_movsi (x, CONST0_RTX (SImode)));
5372   return gen_lowpart (mode, x);
5373 }
5374 
5375 /* Subroutine of bfin_expand_builtin to take care of binop insns.  MACFLAG is -1
5376    if this is a normal binary op, or one of the MACFLAG_xxx constants.  */
5377 
5378 static rtx
5379 bfin_expand_binop_builtin (enum insn_code icode, tree exp, rtx target,
5380 			   int macflag)
5381 {
5382   rtx pat;
5383   tree arg0 = CALL_EXPR_ARG (exp, 0);
5384   tree arg1 = CALL_EXPR_ARG (exp, 1);
5385   rtx op0 = expand_normal (arg0);
5386   rtx op1 = expand_normal (arg1);
5387   machine_mode op0mode = GET_MODE (op0);
5388   machine_mode op1mode = GET_MODE (op1);
5389   machine_mode tmode = insn_data[icode].operand[0].mode;
5390   machine_mode mode0 = insn_data[icode].operand[1].mode;
5391   machine_mode mode1 = insn_data[icode].operand[2].mode;
5392 
5393   if (VECTOR_MODE_P (mode0))
5394     op0 = safe_vector_operand (op0, mode0);
5395   if (VECTOR_MODE_P (mode1))
5396     op1 = safe_vector_operand (op1, mode1);
5397 
5398   if (! target
5399       || GET_MODE (target) != tmode
5400       || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
5401     target = gen_reg_rtx (tmode);
5402 
5403   if ((op0mode == SImode || op0mode == VOIDmode) && mode0 == HImode)
5404     {
5405       op0mode = HImode;
5406       op0 = gen_lowpart (HImode, op0);
5407     }
5408   if ((op1mode == SImode || op1mode == VOIDmode) && mode1 == HImode)
5409     {
5410       op1mode = HImode;
5411       op1 = gen_lowpart (HImode, op1);
5412     }
5413   /* In case the insn wants input operands in modes different from
5414      the result, abort.  */
5415   gcc_assert ((op0mode == mode0 || op0mode == VOIDmode)
5416 	      && (op1mode == mode1 || op1mode == VOIDmode));
5417 
5418   if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
5419     op0 = copy_to_mode_reg (mode0, op0);
5420   if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
5421     op1 = copy_to_mode_reg (mode1, op1);
5422 
5423   if (macflag == -1)
5424     pat = GEN_FCN (icode) (target, op0, op1);
5425   else
5426     pat = GEN_FCN (icode) (target, op0, op1, GEN_INT (macflag));
5427   if (! pat)
5428     return 0;
5429 
5430   emit_insn (pat);
5431   return target;
5432 }
5433 
5434 /* Subroutine of bfin_expand_builtin to take care of unop insns.  */
5435 
5436 static rtx
5437 bfin_expand_unop_builtin (enum insn_code icode, tree exp,
5438 			  rtx target)
5439 {
5440   rtx pat;
5441   tree arg0 = CALL_EXPR_ARG (exp, 0);
5442   rtx op0 = expand_normal (arg0);
5443   machine_mode op0mode = GET_MODE (op0);
5444   machine_mode tmode = insn_data[icode].operand[0].mode;
5445   machine_mode mode0 = insn_data[icode].operand[1].mode;
5446 
5447   if (! target
5448       || GET_MODE (target) != tmode
5449       || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
5450     target = gen_reg_rtx (tmode);
5451 
5452   if (VECTOR_MODE_P (mode0))
5453     op0 = safe_vector_operand (op0, mode0);
5454 
5455   if (op0mode == SImode && mode0 == HImode)
5456     {
5457       op0mode = HImode;
5458       op0 = gen_lowpart (HImode, op0);
5459     }
5460   gcc_assert (op0mode == mode0 || op0mode == VOIDmode);
5461 
5462   if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
5463     op0 = copy_to_mode_reg (mode0, op0);
5464 
5465   pat = GEN_FCN (icode) (target, op0);
5466   if (! pat)
5467     return 0;
5468   emit_insn (pat);
5469   return target;
5470 }
5471 
5472 /* Expand an expression EXP that calls a built-in function,
5473    with result going to TARGET if that's convenient
5474    (and in mode MODE if that's convenient).
5475    SUBTARGET may be used as the target for computing one of EXP's operands.
5476    IGNORE is nonzero if the value is to be ignored.  */
5477 
5478 static rtx
5479 bfin_expand_builtin (tree exp, rtx target ATTRIBUTE_UNUSED,
5480 		     rtx subtarget ATTRIBUTE_UNUSED,
5481 		     machine_mode mode ATTRIBUTE_UNUSED,
5482 		     int ignore ATTRIBUTE_UNUSED)
5483 {
5484   size_t i;
5485   enum insn_code icode;
5486   const struct builtin_description *d;
5487   tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
5488   unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
5489   tree arg0, arg1, arg2;
5490   rtx op0, op1, op2, accvec, pat, tmp1, tmp2, a0reg, a1reg;
5491   machine_mode tmode, mode0;
5492 
5493   switch (fcode)
5494     {
5495     case BFIN_BUILTIN_CSYNC:
5496       emit_insn (gen_csync ());
5497       return 0;
5498     case BFIN_BUILTIN_SSYNC:
5499       emit_insn (gen_ssync ());
5500       return 0;
5501 
5502     case BFIN_BUILTIN_DIFFHL_2X16:
5503     case BFIN_BUILTIN_DIFFLH_2X16:
5504     case BFIN_BUILTIN_SUM_2X16:
5505       arg0 = CALL_EXPR_ARG (exp, 0);
5506       op0 = expand_normal (arg0);
5507       icode = (fcode == BFIN_BUILTIN_DIFFHL_2X16 ? CODE_FOR_subhilov2hi3
5508 	       : fcode == BFIN_BUILTIN_DIFFLH_2X16 ? CODE_FOR_sublohiv2hi3
5509 	       : CODE_FOR_ssaddhilov2hi3);
5510       tmode = insn_data[icode].operand[0].mode;
5511       mode0 = insn_data[icode].operand[1].mode;
5512 
5513       if (! target
5514 	  || GET_MODE (target) != tmode
5515 	  || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
5516 	target = gen_reg_rtx (tmode);
5517 
5518       if (VECTOR_MODE_P (mode0))
5519 	op0 = safe_vector_operand (op0, mode0);
5520 
5521       if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
5522 	op0 = copy_to_mode_reg (mode0, op0);
5523 
5524       pat = GEN_FCN (icode) (target, op0, op0);
5525       if (! pat)
5526 	return 0;
5527       emit_insn (pat);
5528       return target;
5529 
5530     case BFIN_BUILTIN_MULT_1X32X32:
5531     case BFIN_BUILTIN_MULT_1X32X32NS:
5532       arg0 = CALL_EXPR_ARG (exp, 0);
5533       arg1 = CALL_EXPR_ARG (exp, 1);
5534       op0 = expand_normal (arg0);
5535       op1 = expand_normal (arg1);
5536       if (! target
5537 	  || !register_operand (target, SImode))
5538 	target = gen_reg_rtx (SImode);
5539       if (! register_operand (op0, SImode))
5540 	op0 = copy_to_mode_reg (SImode, op0);
5541       if (! register_operand (op1, SImode))
5542 	op1 = copy_to_mode_reg (SImode, op1);
5543 
5544       a1reg = gen_rtx_REG (PDImode, REG_A1);
5545       a0reg = gen_rtx_REG (PDImode, REG_A0);
5546       tmp1 = gen_lowpart (V2HImode, op0);
5547       tmp2 = gen_lowpart (V2HImode, op1);
5548       emit_insn (gen_flag_macinit1hi (a1reg,
5549 				      gen_lowpart (HImode, op0),
5550 				      gen_lowpart (HImode, op1),
5551 				      GEN_INT (MACFLAG_FU)));
5552       emit_insn (gen_lshrpdi3 (a1reg, a1reg, GEN_INT (16)));
5553 
5554       if (fcode == BFIN_BUILTIN_MULT_1X32X32)
5555 	emit_insn (gen_flag_mul_macv2hi_parts_acconly (a0reg, a1reg, tmp1, tmp2,
5556 						       const1_rtx, const1_rtx,
5557 						       const1_rtx, const0_rtx, a1reg,
5558 						       const0_rtx, GEN_INT (MACFLAG_NONE),
5559 						       GEN_INT (MACFLAG_M)));
5560       else
5561 	{
5562 	  /* For saturating multiplication, there's exactly one special case
5563 	     to be handled: multiplying the smallest negative value with
5564 	     itself.  Due to shift correction in fractional multiplies, this
5565 	     can overflow.  Iff this happens, OP2 will contain 1, which, when
5566 	     added in 32 bits to the smallest negative, wraps to the largest
5567 	     positive, which is the result we want.  */
5568 	  op2 = gen_reg_rtx (V2HImode);
5569 	  emit_insn (gen_packv2hi (op2, tmp1, tmp2, const0_rtx, const0_rtx));
5570 	  emit_insn (gen_movsibi (gen_rtx_REG (BImode, REG_CC),
5571 				  gen_lowpart (SImode, op2)));
5572 	  emit_insn (gen_flag_mul_macv2hi_parts_acconly_andcc0 (a0reg, a1reg, tmp1, tmp2,
5573 								const1_rtx, const1_rtx,
5574 								const1_rtx, const0_rtx, a1reg,
5575 								const0_rtx, GEN_INT (MACFLAG_NONE),
5576 								GEN_INT (MACFLAG_M)));
5577 	  op2 = gen_reg_rtx (SImode);
5578 	  emit_insn (gen_movbisi (op2, gen_rtx_REG (BImode, REG_CC)));
5579 	}
5580       emit_insn (gen_flag_machi_parts_acconly (a1reg, tmp2, tmp1,
5581 					       const1_rtx, const0_rtx,
5582 					       a1reg, const0_rtx, GEN_INT (MACFLAG_M)));
5583       emit_insn (gen_ashrpdi3 (a1reg, a1reg, GEN_INT (15)));
5584       emit_insn (gen_sum_of_accumulators (target, a0reg, a0reg, a1reg));
5585       if (fcode == BFIN_BUILTIN_MULT_1X32X32NS)
5586 	emit_insn (gen_addsi3 (target, target, op2));
5587       return target;
5588 
5589     case BFIN_BUILTIN_CPLX_MUL_16:
5590     case BFIN_BUILTIN_CPLX_MUL_16_S40:
5591       arg0 = CALL_EXPR_ARG (exp, 0);
5592       arg1 = CALL_EXPR_ARG (exp, 1);
5593       op0 = expand_normal (arg0);
5594       op1 = expand_normal (arg1);
5595       accvec = gen_reg_rtx (V2PDImode);
5596       icode = CODE_FOR_flag_macv2hi_parts;
5597       tmode = insn_data[icode].operand[0].mode;
5598 
5599       if (! target
5600 	  || GET_MODE (target) != V2HImode
5601 	  || ! (*insn_data[icode].operand[0].predicate) (target, V2HImode))
5602 	target = gen_reg_rtx (tmode);
5603       if (! register_operand (op0, GET_MODE (op0)))
5604 	op0 = copy_to_mode_reg (GET_MODE (op0), op0);
5605       if (! register_operand (op1, GET_MODE (op1)))
5606 	op1 = copy_to_mode_reg (GET_MODE (op1), op1);
5607 
5608       if (fcode == BFIN_BUILTIN_CPLX_MUL_16)
5609 	emit_insn (gen_flag_macinit1v2hi_parts (accvec, op0, op1, const0_rtx,
5610 						const0_rtx, const0_rtx,
5611 						const1_rtx, GEN_INT (MACFLAG_W32)));
5612       else
5613 	emit_insn (gen_flag_macinit1v2hi_parts (accvec, op0, op1, const0_rtx,
5614 						const0_rtx, const0_rtx,
5615 						const1_rtx, GEN_INT (MACFLAG_NONE)));
5616       emit_insn (gen_flag_macv2hi_parts (target, op0, op1, const1_rtx,
5617 					 const1_rtx, const1_rtx,
5618 					 const0_rtx, accvec, const1_rtx, const0_rtx,
5619 					 GEN_INT (MACFLAG_NONE), accvec));
5620 
5621       return target;
5622 
5623     case BFIN_BUILTIN_CPLX_MAC_16:
5624     case BFIN_BUILTIN_CPLX_MSU_16:
5625     case BFIN_BUILTIN_CPLX_MAC_16_S40:
5626     case BFIN_BUILTIN_CPLX_MSU_16_S40:
5627       arg0 = CALL_EXPR_ARG (exp, 0);
5628       arg1 = CALL_EXPR_ARG (exp, 1);
5629       arg2 = CALL_EXPR_ARG (exp, 2);
5630       op0 = expand_normal (arg0);
5631       op1 = expand_normal (arg1);
5632       op2 = expand_normal (arg2);
5633       accvec = gen_reg_rtx (V2PDImode);
5634       icode = CODE_FOR_flag_macv2hi_parts;
5635       tmode = insn_data[icode].operand[0].mode;
5636 
5637       if (! target
5638 	  || GET_MODE (target) != V2HImode
5639 	  || ! (*insn_data[icode].operand[0].predicate) (target, V2HImode))
5640 	target = gen_reg_rtx (tmode);
5641       if (! register_operand (op1, GET_MODE (op1)))
5642 	op1 = copy_to_mode_reg (GET_MODE (op1), op1);
5643       if (! register_operand (op2, GET_MODE (op2)))
5644 	op2 = copy_to_mode_reg (GET_MODE (op2), op2);
5645 
5646       tmp1 = gen_reg_rtx (SImode);
5647       tmp2 = gen_reg_rtx (SImode);
5648       emit_insn (gen_ashlsi3 (tmp1, gen_lowpart (SImode, op0), GEN_INT (16)));
5649       emit_move_insn (tmp2, gen_lowpart (SImode, op0));
5650       emit_insn (gen_movstricthi_1 (gen_lowpart (HImode, tmp2), const0_rtx));
5651       emit_insn (gen_load_accumulator_pair (accvec, tmp1, tmp2));
5652       if (fcode == BFIN_BUILTIN_CPLX_MAC_16
5653 	  || fcode == BFIN_BUILTIN_CPLX_MSU_16)
5654 	emit_insn (gen_flag_macv2hi_parts_acconly (accvec, op1, op2, const0_rtx,
5655 						   const0_rtx, const0_rtx,
5656 						   const1_rtx, accvec, const0_rtx,
5657 						   const0_rtx,
5658 						   GEN_INT (MACFLAG_W32)));
5659       else
5660 	emit_insn (gen_flag_macv2hi_parts_acconly (accvec, op1, op2, const0_rtx,
5661 						   const0_rtx, const0_rtx,
5662 						   const1_rtx, accvec, const0_rtx,
5663 						   const0_rtx,
5664 						   GEN_INT (MACFLAG_NONE)));
5665       if (fcode == BFIN_BUILTIN_CPLX_MAC_16
5666 	  || fcode == BFIN_BUILTIN_CPLX_MAC_16_S40)
5667 	{
5668 	  tmp1 = const1_rtx;
5669 	  tmp2 = const0_rtx;
5670 	}
5671       else
5672 	{
5673 	  tmp1 = const0_rtx;
5674 	  tmp2 = const1_rtx;
5675 	}
5676       emit_insn (gen_flag_macv2hi_parts (target, op1, op2, const1_rtx,
5677 					 const1_rtx, const1_rtx,
5678 					 const0_rtx, accvec, tmp1, tmp2,
5679 					 GEN_INT (MACFLAG_NONE), accvec));
5680 
5681       return target;
5682 
5683     case BFIN_BUILTIN_CPLX_SQU:
5684       arg0 = CALL_EXPR_ARG (exp, 0);
5685       op0 = expand_normal (arg0);
5686       accvec = gen_reg_rtx (V2PDImode);
5687       icode = CODE_FOR_flag_mulv2hi;
5688       tmp1 = gen_reg_rtx (V2HImode);
5689       tmp2 = gen_reg_rtx (V2HImode);
5690 
5691       if (! target
5692 	  || GET_MODE (target) != V2HImode
5693 	  || ! (*insn_data[icode].operand[0].predicate) (target, V2HImode))
5694 	target = gen_reg_rtx (V2HImode);
5695       if (! register_operand (op0, GET_MODE (op0)))
5696 	op0 = copy_to_mode_reg (GET_MODE (op0), op0);
5697 
5698       emit_insn (gen_flag_mulv2hi (tmp1, op0, op0, GEN_INT (MACFLAG_NONE)));
5699 
5700       emit_insn (gen_flag_mulhi_parts (gen_lowpart (HImode, tmp2), op0, op0,
5701 				       const0_rtx, const1_rtx,
5702 				       GEN_INT (MACFLAG_NONE)));
5703 
5704       emit_insn (gen_ssaddhi3_high_parts (target, tmp2, tmp2, tmp2, const0_rtx,
5705 					  const0_rtx));
5706       emit_insn (gen_sssubhi3_low_parts (target, target, tmp1, tmp1,
5707 					 const0_rtx, const1_rtx));
5708 
5709       return target;
5710 
5711     default:
5712       break;
5713     }
5714 
5715   for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
5716     if (d->code == fcode)
5717       return bfin_expand_binop_builtin (d->icode, exp, target,
5718 					d->macflag);
5719 
5720   for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
5721     if (d->code == fcode)
5722       return bfin_expand_unop_builtin (d->icode, exp, target);
5723 
5724   gcc_unreachable ();
5725 }
5726 
5727 static void
5728 bfin_conditional_register_usage (void)
5729 {
5730   /* initialize condition code flag register rtx */
5731   bfin_cc_rtx = gen_rtx_REG (BImode, REG_CC);
5732   bfin_rets_rtx = gen_rtx_REG (Pmode, REG_RETS);
5733   if (TARGET_FDPIC)
5734     call_used_regs[FDPIC_REGNO] = 1;
5735   if (!TARGET_FDPIC && flag_pic)
5736     {
5737       fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
5738       call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
5739     }
5740 }
5741 
5742 #undef TARGET_INIT_BUILTINS
5743 #define TARGET_INIT_BUILTINS bfin_init_builtins
5744 
5745 #undef TARGET_EXPAND_BUILTIN
5746 #define TARGET_EXPAND_BUILTIN bfin_expand_builtin
5747 
5748 #undef TARGET_ASM_GLOBALIZE_LABEL
5749 #define TARGET_ASM_GLOBALIZE_LABEL bfin_globalize_label
5750 
5751 #undef TARGET_ASM_FILE_START
5752 #define TARGET_ASM_FILE_START output_file_start
5753 
5754 #undef TARGET_ATTRIBUTE_TABLE
5755 #define TARGET_ATTRIBUTE_TABLE bfin_attribute_table
5756 
5757 #undef TARGET_COMP_TYPE_ATTRIBUTES
5758 #define TARGET_COMP_TYPE_ATTRIBUTES bfin_comp_type_attributes
5759 
5760 #undef TARGET_RTX_COSTS
5761 #define TARGET_RTX_COSTS bfin_rtx_costs
5762 
5763 #undef  TARGET_ADDRESS_COST
5764 #define TARGET_ADDRESS_COST bfin_address_cost
5765 
5766 #undef TARGET_REGISTER_MOVE_COST
5767 #define TARGET_REGISTER_MOVE_COST bfin_register_move_cost
5768 
5769 #undef TARGET_MEMORY_MOVE_COST
5770 #define TARGET_MEMORY_MOVE_COST bfin_memory_move_cost
5771 
5772 #undef  TARGET_ASM_INTEGER
5773 #define TARGET_ASM_INTEGER bfin_assemble_integer
5774 
5775 #undef TARGET_MACHINE_DEPENDENT_REORG
5776 #define TARGET_MACHINE_DEPENDENT_REORG bfin_reorg
5777 
5778 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
5779 #define TARGET_FUNCTION_OK_FOR_SIBCALL bfin_function_ok_for_sibcall
5780 
5781 #undef TARGET_ASM_OUTPUT_MI_THUNK
5782 #define TARGET_ASM_OUTPUT_MI_THUNK bfin_output_mi_thunk
5783 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
5784 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
5785 
5786 #undef TARGET_SCHED_ADJUST_COST
5787 #define TARGET_SCHED_ADJUST_COST bfin_adjust_cost
5788 
5789 #undef TARGET_SCHED_ISSUE_RATE
5790 #define TARGET_SCHED_ISSUE_RATE bfin_issue_rate
5791 
5792 #undef TARGET_PROMOTE_FUNCTION_MODE
5793 #define TARGET_PROMOTE_FUNCTION_MODE default_promote_function_mode_always_promote
5794 
5795 #undef TARGET_ARG_PARTIAL_BYTES
5796 #define TARGET_ARG_PARTIAL_BYTES bfin_arg_partial_bytes
5797 
5798 #undef TARGET_FUNCTION_ARG
5799 #define TARGET_FUNCTION_ARG bfin_function_arg
5800 
5801 #undef TARGET_FUNCTION_ARG_ADVANCE
5802 #define TARGET_FUNCTION_ARG_ADVANCE bfin_function_arg_advance
5803 
5804 #undef TARGET_PASS_BY_REFERENCE
5805 #define TARGET_PASS_BY_REFERENCE bfin_pass_by_reference
5806 
5807 #undef TARGET_SETUP_INCOMING_VARARGS
5808 #define TARGET_SETUP_INCOMING_VARARGS setup_incoming_varargs
5809 
5810 #undef TARGET_STRUCT_VALUE_RTX
5811 #define TARGET_STRUCT_VALUE_RTX bfin_struct_value_rtx
5812 
5813 #undef TARGET_VECTOR_MODE_SUPPORTED_P
5814 #define TARGET_VECTOR_MODE_SUPPORTED_P bfin_vector_mode_supported_p
5815 
5816 #undef TARGET_OPTION_OVERRIDE
5817 #define TARGET_OPTION_OVERRIDE bfin_option_override
5818 
5819 #undef TARGET_SECONDARY_RELOAD
5820 #define TARGET_SECONDARY_RELOAD bfin_secondary_reload
5821 
5822 #undef TARGET_CLASS_LIKELY_SPILLED_P
5823 #define TARGET_CLASS_LIKELY_SPILLED_P bfin_class_likely_spilled_p
5824 
5825 #undef TARGET_DELEGITIMIZE_ADDRESS
5826 #define TARGET_DELEGITIMIZE_ADDRESS bfin_delegitimize_address
5827 
5828 #undef TARGET_LEGITIMATE_CONSTANT_P
5829 #define TARGET_LEGITIMATE_CONSTANT_P bfin_legitimate_constant_p
5830 
5831 #undef TARGET_CANNOT_FORCE_CONST_MEM
5832 #define TARGET_CANNOT_FORCE_CONST_MEM bfin_cannot_force_const_mem
5833 
5834 #undef TARGET_RETURN_IN_MEMORY
5835 #define TARGET_RETURN_IN_MEMORY bfin_return_in_memory
5836 
5837 #undef TARGET_LEGITIMATE_ADDRESS_P
5838 #define TARGET_LEGITIMATE_ADDRESS_P	bfin_legitimate_address_p
5839 
5840 #undef TARGET_FRAME_POINTER_REQUIRED
5841 #define TARGET_FRAME_POINTER_REQUIRED bfin_frame_pointer_required
5842 
5843 #undef TARGET_CAN_ELIMINATE
5844 #define TARGET_CAN_ELIMINATE bfin_can_eliminate
5845 
5846 #undef TARGET_CONDITIONAL_REGISTER_USAGE
5847 #define TARGET_CONDITIONAL_REGISTER_USAGE bfin_conditional_register_usage
5848 
5849 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
5850 #define TARGET_ASM_TRAMPOLINE_TEMPLATE bfin_asm_trampoline_template
5851 #undef TARGET_TRAMPOLINE_INIT
5852 #define TARGET_TRAMPOLINE_INIT bfin_trampoline_init
5853 
5854 #undef TARGET_EXTRA_LIVE_ON_ENTRY
5855 #define TARGET_EXTRA_LIVE_ON_ENTRY bfin_extra_live_on_entry
5856 
5857 /* Passes after sched2 can break the helpful TImode annotations that
5858    haifa-sched puts on every insn.  Just do scheduling in reorg.  */
5859 #undef TARGET_DELAY_SCHED2
5860 #define TARGET_DELAY_SCHED2 true
5861 
5862 /* Variable tracking should be run after all optimizations which
5863    change order of insns.  It also needs a valid CFG.  */
5864 #undef TARGET_DELAY_VARTRACK
5865 #define TARGET_DELAY_VARTRACK true
5866 
5867 #undef TARGET_CAN_USE_DOLOOP_P
5868 #define TARGET_CAN_USE_DOLOOP_P bfin_can_use_doloop_p
5869 
5870 struct gcc_target targetm = TARGET_INITIALIZER;
5871