xref: /netbsd-src/external/gpl3/gcc.old/dist/gcc/config/arc/arc.c (revision bdc22b2e01993381dcefeff2bc9b56ca75a4235c)
1 /* Subroutines used for code generation on the Synopsys DesignWare ARC cpu.
2    Copyright (C) 1994-2015 Free Software Foundation, Inc.
3 
4    Sources derived from work done by Sankhya Technologies (www.sankhya.com) on
5    behalf of Synopsys Inc.
6 
7    Position Independent Code support added,Code cleaned up,
8    Comments and Support For ARC700 instructions added by
9    Saurabh Verma (saurabh.verma@codito.com)
10    Ramana Radhakrishnan(ramana.radhakrishnan@codito.com)
11 
12    Fixing ABI inconsistencies, optimizations for ARC600 / ARC700 pipelines,
13    profiling support added by Joern Rennecke <joern.rennecke@embecosm.com>
14 
15 This file is part of GCC.
16 
17 GCC is free software; you can redistribute it and/or modify
18 it under the terms of the GNU General Public License as published by
19 the Free Software Foundation; either version 3, or (at your option)
20 any later version.
21 
22 GCC is distributed in the hope that it will be useful,
23 but WITHOUT ANY WARRANTY; without even the implied warranty of
24 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
25 GNU General Public License for more details.
26 
27 You should have received a copy of the GNU General Public License
28 along with GCC; see the file COPYING3.  If not see
29 <http://www.gnu.org/licenses/>.  */
30 
31 #include "config.h"
32 #include "system.h"
33 #include "coretypes.h"
34 #include "tm.h"
35 #include "hash-set.h"
36 #include "machmode.h"
37 #include "vec.h"
38 #include "double-int.h"
39 #include "input.h"
40 #include "alias.h"
41 #include "symtab.h"
42 #include "wide-int.h"
43 #include "inchash.h"
44 #include "tree.h"
45 #include "fold-const.h"
46 #include "varasm.h"
47 #include "stor-layout.h"
48 #include "stringpool.h"
49 #include "calls.h"
50 #include "rtl.h"
51 #include "regs.h"
52 #include "hard-reg-set.h"
53 #include "real.h"
54 #include "insn-config.h"
55 #include "conditions.h"
56 #include "insn-flags.h"
57 #include "hashtab.h"
58 #include "function.h"
59 #include "toplev.h"
60 #include "ggc.h"
61 #include "tm_p.h"
62 #include "target.h"
63 #include "output.h"
64 #include "insn-attr.h"
65 #include "flags.h"
66 #include "statistics.h"
67 #include "fixed-value.h"
68 #include "expmed.h"
69 #include "dojump.h"
70 #include "explow.h"
71 #include "emit-rtl.h"
72 #include "stmt.h"
73 #include "expr.h"
74 #include "recog.h"
75 #include "debug.h"
76 #include "diagnostic.h"
77 #include "insn-codes.h"
78 #include "langhooks.h"
79 #include "optabs.h"
80 #include "tm-constrs.h"
81 #include "reload.h" /* For operands_match_p */
82 #include "dominance.h"
83 #include "cfg.h"
84 #include "cfgrtl.h"
85 #include "cfganal.h"
86 #include "lcm.h"
87 #include "cfgbuild.h"
88 #include "cfgcleanup.h"
89 #include "predict.h"
90 #include "basic-block.h"
91 #include "df.h"
92 #include "tree-pass.h"
93 #include "context.h"
94 #include "pass_manager.h"
95 #include "builtins.h"
96 #include "rtl-iter.h"
97 
98 /* Which cpu we're compiling for (A5, ARC600, ARC601, ARC700).  */
99 static const char *arc_cpu_string = "";
100 
101 /* ??? Loads can handle any constant, stores can only handle small ones.  */
102 /* OTOH, LIMMs cost extra, so their usefulness is limited.  */
103 #define RTX_OK_FOR_OFFSET_P(MODE, X) \
104 (GET_CODE (X) == CONST_INT \
105  && SMALL_INT_RANGE (INTVAL (X), (GET_MODE_SIZE (MODE) - 1) & -4, \
106 		     (INTVAL (X) & (GET_MODE_SIZE (MODE) - 1) & 3 \
107 		      ? 0 \
108 		      : -(-GET_MODE_SIZE (MODE) | -4) >> 1)))
109 
110 #define LEGITIMATE_OFFSET_ADDRESS_P(MODE, X, INDEX, STRICT) \
111 (GET_CODE (X) == PLUS			     \
112   && RTX_OK_FOR_BASE_P (XEXP (X, 0), (STRICT)) \
113   && ((INDEX && RTX_OK_FOR_INDEX_P (XEXP (X, 1), (STRICT)) \
114        && GET_MODE_SIZE ((MODE)) <= 4) \
115       || RTX_OK_FOR_OFFSET_P (MODE, XEXP (X, 1))))
116 
117 #define LEGITIMATE_SCALED_ADDRESS_P(MODE, X, STRICT) \
118 (GET_CODE (X) == PLUS \
119  && GET_CODE (XEXP (X, 0)) == MULT \
120  && RTX_OK_FOR_INDEX_P (XEXP (XEXP (X, 0), 0), (STRICT)) \
121  && GET_CODE (XEXP (XEXP (X, 0), 1)) == CONST_INT \
122  && ((GET_MODE_SIZE (MODE) == 2 && INTVAL (XEXP (XEXP (X, 0), 1)) == 2) \
123      || (GET_MODE_SIZE (MODE) == 4 && INTVAL (XEXP (XEXP (X, 0), 1)) == 4)) \
124  && (RTX_OK_FOR_BASE_P (XEXP (X, 1), (STRICT)) \
125      || (flag_pic ? CONST_INT_P (XEXP (X, 1)) : CONSTANT_P (XEXP (X, 1)))))
126 
127 #define LEGITIMATE_SMALL_DATA_ADDRESS_P(X) \
128   (GET_CODE (X) == PLUS \
129    && (REG_P (XEXP ((X), 0)) && REGNO (XEXP ((X), 0)) == SDATA_BASE_REGNUM) \
130    && ((GET_CODE (XEXP((X),1)) == SYMBOL_REF \
131 	&& SYMBOL_REF_SMALL_P (XEXP ((X), 1))) \
132        || (GET_CODE (XEXP ((X), 1)) == CONST \
133 	   && GET_CODE (XEXP (XEXP ((X), 1), 0)) == PLUS \
134 	   && GET_CODE (XEXP (XEXP (XEXP ((X), 1), 0), 0)) == SYMBOL_REF \
135 	   && SYMBOL_REF_SMALL_P (XEXP (XEXP (XEXP ((X), 1), 0), 0)) \
136 	   && GET_CODE (XEXP(XEXP (XEXP ((X), 1), 0), 1)) == CONST_INT)))
137 
138 /* Array of valid operand punctuation characters.  */
139 char arc_punct_chars[256];
140 
141 /* State used by arc_ccfsm_advance to implement conditional execution.  */
142 struct GTY (()) arc_ccfsm
143 {
144   int state;
145   int cc;
146   rtx cond;
147   rtx_insn *target_insn;
148   int target_label;
149 };
150 
151 #define arc_ccfsm_current cfun->machine->ccfsm_current
152 
153 #define ARC_CCFSM_BRANCH_DELETED_P(STATE) \
154   ((STATE)->state == 1 || (STATE)->state == 2)
155 
156 /* Indicate we're conditionalizing insns now.  */
157 #define ARC_CCFSM_RECORD_BRANCH_DELETED(STATE) \
158   ((STATE)->state += 2)
159 
160 #define ARC_CCFSM_COND_EXEC_P(STATE) \
161   ((STATE)->state == 3 || (STATE)->state == 4 || (STATE)->state == 5 \
162    || current_insn_predicate)
163 
164 /* Check if INSN has a 16 bit opcode considering struct arc_ccfsm *STATE.  */
165 #define CCFSM_ISCOMPACT(INSN,STATE) \
166   (ARC_CCFSM_COND_EXEC_P (STATE) \
167    ? (get_attr_iscompact (INSN) == ISCOMPACT_TRUE \
168       || get_attr_iscompact (INSN) == ISCOMPACT_TRUE_LIMM) \
169    : get_attr_iscompact (INSN) != ISCOMPACT_FALSE)
170 
171 /* Likewise, but also consider that INSN might be in a delay slot of JUMP.  */
172 #define CCFSM_DBR_ISCOMPACT(INSN,JUMP,STATE) \
173   ((ARC_CCFSM_COND_EXEC_P (STATE) \
174     || (JUMP_P (JUMP) \
175 	&& INSN_ANNULLED_BRANCH_P (JUMP) \
176 	&& (TARGET_AT_DBR_CONDEXEC || INSN_FROM_TARGET_P (INSN)))) \
177    ? (get_attr_iscompact (INSN) == ISCOMPACT_TRUE \
178       || get_attr_iscompact (INSN) == ISCOMPACT_TRUE_LIMM) \
179    : get_attr_iscompact (INSN) != ISCOMPACT_FALSE)
180 
181 /* The maximum number of insns skipped which will be conditionalised if
182    possible.  */
183 /* When optimizing for speed:
184     Let p be the probability that the potentially skipped insns need to
185     be executed, pn the cost of a correctly predicted non-taken branch,
186     mt the cost of a mis/non-predicted taken branch,
187     mn mispredicted non-taken, pt correctly predicted taken ;
188     costs expressed in numbers of instructions like the ones considered
189     skipping.
190     Unfortunately we don't have a measure of predictability - this
191     is linked to probability only in that in the no-eviction-scenario
192     there is a lower bound 1 - 2 * min (p, 1-p), and a somewhat larger
193     value that can be assumed *if* the distribution is perfectly random.
194     A predictability of 1 is perfectly plausible not matter what p is,
195     because the decision could be dependent on an invocation parameter
196     of the program.
197     For large p, we want MAX_INSNS_SKIPPED == pn/(1-p) + mt - pn
198     For small p, we want MAX_INSNS_SKIPPED == pt
199 
200    When optimizing for size:
201     We want to skip insn unless we could use 16 opcodes for the
202     non-conditionalized insn to balance the branch length or more.
203     Performance can be tie-breaker.  */
204 /* If the potentially-skipped insns are likely to be executed, we'll
205    generally save one non-taken branch
206    o
207    this to be no less than the 1/p  */
208 #define MAX_INSNS_SKIPPED 3
209 
210 /* The values of unspec's first field.  */
211 enum {
212   ARC_UNSPEC_PLT = 3,
213   ARC_UNSPEC_GOT,
214   ARC_UNSPEC_GOTOFF
215 } ;
216 
217 
218 enum arc_builtins {
219   ARC_BUILTIN_NOP        =    2,
220   ARC_BUILTIN_NORM       =    3,
221   ARC_BUILTIN_NORMW      =    4,
222   ARC_BUILTIN_SWAP       =    5,
223   ARC_BUILTIN_BRK        =    6,
224   ARC_BUILTIN_DIVAW      =    7,
225   ARC_BUILTIN_EX         =    8,
226   ARC_BUILTIN_MUL64      =    9,
227   ARC_BUILTIN_MULU64     =   10,
228   ARC_BUILTIN_RTIE       =   11,
229   ARC_BUILTIN_SYNC       =   12,
230   ARC_BUILTIN_CORE_READ  =   13,
231   ARC_BUILTIN_CORE_WRITE =   14,
232   ARC_BUILTIN_FLAG       =   15,
233   ARC_BUILTIN_LR         =   16,
234   ARC_BUILTIN_SR         =   17,
235   ARC_BUILTIN_SLEEP      =   18,
236   ARC_BUILTIN_SWI        =   19,
237   ARC_BUILTIN_TRAP_S     =   20,
238   ARC_BUILTIN_UNIMP_S    =   21,
239   ARC_BUILTIN_ALIGNED    =   22,
240 
241   /* Sentinel to mark start of simd builtins.  */
242   ARC_SIMD_BUILTIN_BEGIN      = 1000,
243 
244   ARC_SIMD_BUILTIN_VADDAW     = 1001,
245   ARC_SIMD_BUILTIN_VADDW      = 1002,
246   ARC_SIMD_BUILTIN_VAVB       = 1003,
247   ARC_SIMD_BUILTIN_VAVRB      = 1004,
248   ARC_SIMD_BUILTIN_VDIFAW     = 1005,
249   ARC_SIMD_BUILTIN_VDIFW      = 1006,
250   ARC_SIMD_BUILTIN_VMAXAW     = 1007,
251   ARC_SIMD_BUILTIN_VMAXW      = 1008,
252   ARC_SIMD_BUILTIN_VMINAW     = 1009,
253   ARC_SIMD_BUILTIN_VMINW      = 1010,
254   ARC_SIMD_BUILTIN_VMULAW     = 1011,
255   ARC_SIMD_BUILTIN_VMULFAW    = 1012,
256   ARC_SIMD_BUILTIN_VMULFW     = 1013,
257   ARC_SIMD_BUILTIN_VMULW      = 1014,
258   ARC_SIMD_BUILTIN_VSUBAW     = 1015,
259   ARC_SIMD_BUILTIN_VSUBW      = 1016,
260   ARC_SIMD_BUILTIN_VSUMMW     = 1017,
261   ARC_SIMD_BUILTIN_VAND       = 1018,
262   ARC_SIMD_BUILTIN_VANDAW     = 1019,
263   ARC_SIMD_BUILTIN_VBIC       = 1020,
264   ARC_SIMD_BUILTIN_VBICAW     = 1021,
265   ARC_SIMD_BUILTIN_VOR        = 1022,
266   ARC_SIMD_BUILTIN_VXOR       = 1023,
267   ARC_SIMD_BUILTIN_VXORAW     = 1024,
268   ARC_SIMD_BUILTIN_VEQW       = 1025,
269   ARC_SIMD_BUILTIN_VLEW       = 1026,
270   ARC_SIMD_BUILTIN_VLTW       = 1027,
271   ARC_SIMD_BUILTIN_VNEW       = 1028,
272   ARC_SIMD_BUILTIN_VMR1AW     = 1029,
273   ARC_SIMD_BUILTIN_VMR1W      = 1030,
274   ARC_SIMD_BUILTIN_VMR2AW     = 1031,
275   ARC_SIMD_BUILTIN_VMR2W      = 1032,
276   ARC_SIMD_BUILTIN_VMR3AW     = 1033,
277   ARC_SIMD_BUILTIN_VMR3W      = 1034,
278   ARC_SIMD_BUILTIN_VMR4AW     = 1035,
279   ARC_SIMD_BUILTIN_VMR4W      = 1036,
280   ARC_SIMD_BUILTIN_VMR5AW     = 1037,
281   ARC_SIMD_BUILTIN_VMR5W      = 1038,
282   ARC_SIMD_BUILTIN_VMR6AW     = 1039,
283   ARC_SIMD_BUILTIN_VMR6W      = 1040,
284   ARC_SIMD_BUILTIN_VMR7AW     = 1041,
285   ARC_SIMD_BUILTIN_VMR7W      = 1042,
286   ARC_SIMD_BUILTIN_VMRB       = 1043,
287   ARC_SIMD_BUILTIN_VH264F     = 1044,
288   ARC_SIMD_BUILTIN_VH264FT    = 1045,
289   ARC_SIMD_BUILTIN_VH264FW    = 1046,
290   ARC_SIMD_BUILTIN_VVC1F      = 1047,
291   ARC_SIMD_BUILTIN_VVC1FT     = 1048,
292 
293   /* Va, Vb, rlimm instructions.  */
294   ARC_SIMD_BUILTIN_VBADDW     = 1050,
295   ARC_SIMD_BUILTIN_VBMAXW     = 1051,
296   ARC_SIMD_BUILTIN_VBMINW     = 1052,
297   ARC_SIMD_BUILTIN_VBMULAW    = 1053,
298   ARC_SIMD_BUILTIN_VBMULFW    = 1054,
299   ARC_SIMD_BUILTIN_VBMULW     = 1055,
300   ARC_SIMD_BUILTIN_VBRSUBW    = 1056,
301   ARC_SIMD_BUILTIN_VBSUBW     = 1057,
302 
303   /* Va, Vb, Ic instructions.  */
304   ARC_SIMD_BUILTIN_VASRW      = 1060,
305   ARC_SIMD_BUILTIN_VSR8       = 1061,
306   ARC_SIMD_BUILTIN_VSR8AW     = 1062,
307 
308   /* Va, Vb, u6 instructions.  */
309   ARC_SIMD_BUILTIN_VASRRWi    = 1065,
310   ARC_SIMD_BUILTIN_VASRSRWi   = 1066,
311   ARC_SIMD_BUILTIN_VASRWi     = 1067,
312   ARC_SIMD_BUILTIN_VASRPWBi   = 1068,
313   ARC_SIMD_BUILTIN_VASRRPWBi  = 1069,
314   ARC_SIMD_BUILTIN_VSR8AWi    = 1070,
315   ARC_SIMD_BUILTIN_VSR8i      = 1071,
316 
317   /* Va, Vb, u8 (simm) instructions.  */
318   ARC_SIMD_BUILTIN_VMVAW      = 1075,
319   ARC_SIMD_BUILTIN_VMVW       = 1076,
320   ARC_SIMD_BUILTIN_VMVZW      = 1077,
321   ARC_SIMD_BUILTIN_VD6TAPF    = 1078,
322 
323   /* Va, rlimm, u8 (simm) instructions.  */
324   ARC_SIMD_BUILTIN_VMOVAW     = 1080,
325   ARC_SIMD_BUILTIN_VMOVW      = 1081,
326   ARC_SIMD_BUILTIN_VMOVZW     = 1082,
327 
328   /* Va, Vb instructions.  */
329   ARC_SIMD_BUILTIN_VABSAW     = 1085,
330   ARC_SIMD_BUILTIN_VABSW      = 1086,
331   ARC_SIMD_BUILTIN_VADDSUW    = 1087,
332   ARC_SIMD_BUILTIN_VSIGNW     = 1088,
333   ARC_SIMD_BUILTIN_VEXCH1     = 1089,
334   ARC_SIMD_BUILTIN_VEXCH2     = 1090,
335   ARC_SIMD_BUILTIN_VEXCH4     = 1091,
336   ARC_SIMD_BUILTIN_VUPBAW     = 1092,
337   ARC_SIMD_BUILTIN_VUPBW      = 1093,
338   ARC_SIMD_BUILTIN_VUPSBAW    = 1094,
339   ARC_SIMD_BUILTIN_VUPSBW     = 1095,
340 
341   ARC_SIMD_BUILTIN_VDIRUN     = 1100,
342   ARC_SIMD_BUILTIN_VDORUN     = 1101,
343   ARC_SIMD_BUILTIN_VDIWR      = 1102,
344   ARC_SIMD_BUILTIN_VDOWR      = 1103,
345 
346   ARC_SIMD_BUILTIN_VREC       = 1105,
347   ARC_SIMD_BUILTIN_VRUN       = 1106,
348   ARC_SIMD_BUILTIN_VRECRUN    = 1107,
349   ARC_SIMD_BUILTIN_VENDREC    = 1108,
350 
351   ARC_SIMD_BUILTIN_VLD32WH    = 1110,
352   ARC_SIMD_BUILTIN_VLD32WL    = 1111,
353   ARC_SIMD_BUILTIN_VLD64      = 1112,
354   ARC_SIMD_BUILTIN_VLD32      = 1113,
355   ARC_SIMD_BUILTIN_VLD64W     = 1114,
356   ARC_SIMD_BUILTIN_VLD128     = 1115,
357   ARC_SIMD_BUILTIN_VST128     = 1116,
358   ARC_SIMD_BUILTIN_VST64      = 1117,
359 
360   ARC_SIMD_BUILTIN_VST16_N    = 1120,
361   ARC_SIMD_BUILTIN_VST32_N    = 1121,
362 
363   ARC_SIMD_BUILTIN_VINTI      = 1201,
364 
365   ARC_SIMD_BUILTIN_END
366 };
367 
368 /* A nop is needed between a 4 byte insn that sets the condition codes and
369    a branch that uses them (the same isn't true for an 8 byte insn that sets
370    the condition codes).  Set by arc_ccfsm_advance.  Used by
371    arc_print_operand.  */
372 
373 static int get_arc_condition_code (rtx);
374 
375 static tree arc_handle_interrupt_attribute (tree *, tree, tree, int, bool *);
376 
377 /* Initialized arc_attribute_table to NULL since arc doesnot have any
378    machine specific supported attributes.  */
379 const struct attribute_spec arc_attribute_table[] =
380 {
381  /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
382       affects_type_identity } */
383   { "interrupt", 1, 1, true, false, false, arc_handle_interrupt_attribute, true },
384   /* Function calls made to this symbol must be done indirectly, because
385      it may lie outside of the 21/25 bit addressing range of a normal function
386      call.  */
387   { "long_call",    0, 0, false, true,  true,  NULL, false },
388   /* Whereas these functions are always known to reside within the 25 bit
389      addressing range of unconditionalized bl.  */
390   { "medium_call",   0, 0, false, true,  true,  NULL, false },
391   /* And these functions are always known to reside within the 21 bit
392      addressing range of blcc.  */
393   { "short_call",   0, 0, false, true,  true,  NULL, false },
394   { NULL, 0, 0, false, false, false, NULL, false }
395 };
396 static int arc_comp_type_attributes (const_tree, const_tree);
397 static void arc_file_start (void);
398 static void arc_internal_label (FILE *, const char *, unsigned long);
399 static void arc_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
400 				 tree);
401 static int arc_address_cost (rtx, machine_mode, addr_space_t, bool);
402 static void arc_encode_section_info (tree decl, rtx rtl, int first);
403 
404 static void arc_init_builtins (void);
405 static rtx arc_expand_builtin (tree, rtx, rtx, machine_mode, int);
406 
407 static int branch_dest (rtx);
408 
409 static void  arc_output_pic_addr_const (FILE *,  rtx, int);
410 void emit_pic_move (rtx *, machine_mode);
411 bool arc_legitimate_pic_operand_p (rtx);
412 static bool arc_function_ok_for_sibcall (tree, tree);
413 static rtx arc_function_value (const_tree, const_tree, bool);
414 const char * output_shift (rtx *);
415 static void arc_reorg (void);
416 static bool arc_in_small_data_p (const_tree);
417 
418 static void arc_init_reg_tables (void);
419 static bool arc_return_in_memory (const_tree, const_tree);
420 static void arc_init_simd_builtins (void);
421 static bool arc_vector_mode_supported_p (machine_mode);
422 
423 static bool arc_can_use_doloop_p (const widest_int &, const widest_int &,
424 				  unsigned int, bool);
425 static const char *arc_invalid_within_doloop (const rtx_insn *);
426 
427 static void output_short_suffix (FILE *file);
428 
429 static bool arc_frame_pointer_required (void);
430 
431 static bool arc_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT,
432 						unsigned int,
433 						enum by_pieces_operation op,
434 						bool);
435 
436 /* Implements target hook vector_mode_supported_p.  */
437 
438 static bool
439 arc_vector_mode_supported_p (machine_mode mode)
440 {
441   if (!TARGET_SIMD_SET)
442     return false;
443 
444   if ((mode == V4SImode)
445       || (mode == V8HImode))
446     return true;
447 
448   return false;
449 }
450 
451 
452 /* TARGET_PRESERVE_RELOAD_P is still awaiting patch re-evaluation / review.  */
453 static bool arc_preserve_reload_p (rtx in) ATTRIBUTE_UNUSED;
454 static rtx arc_delegitimize_address (rtx);
455 static bool arc_can_follow_jump (const rtx_insn *follower,
456 				 const rtx_insn *followee);
457 
458 static rtx frame_insn (rtx);
459 static void arc_function_arg_advance (cumulative_args_t, machine_mode,
460 				      const_tree, bool);
461 static rtx arc_legitimize_address_0 (rtx, rtx, machine_mode mode);
462 
463 static void arc_finalize_pic (void);
464 
465 /* initialize the GCC target structure.  */
466 #undef  TARGET_COMP_TYPE_ATTRIBUTES
467 #define TARGET_COMP_TYPE_ATTRIBUTES arc_comp_type_attributes
468 #undef TARGET_ASM_FILE_START
469 #define TARGET_ASM_FILE_START arc_file_start
470 #undef TARGET_ATTRIBUTE_TABLE
471 #define TARGET_ATTRIBUTE_TABLE arc_attribute_table
472 #undef TARGET_ASM_INTERNAL_LABEL
473 #define TARGET_ASM_INTERNAL_LABEL arc_internal_label
474 #undef TARGET_RTX_COSTS
475 #define TARGET_RTX_COSTS arc_rtx_costs
476 #undef TARGET_ADDRESS_COST
477 #define TARGET_ADDRESS_COST arc_address_cost
478 
479 #undef TARGET_ENCODE_SECTION_INFO
480 #define TARGET_ENCODE_SECTION_INFO arc_encode_section_info
481 
482 #undef TARGET_CANNOT_FORCE_CONST_MEM
483 #define TARGET_CANNOT_FORCE_CONST_MEM arc_cannot_force_const_mem
484 
485 #undef  TARGET_INIT_BUILTINS
486 #define TARGET_INIT_BUILTINS  arc_init_builtins
487 
488 #undef  TARGET_EXPAND_BUILTIN
489 #define TARGET_EXPAND_BUILTIN arc_expand_builtin
490 
491 #undef  TARGET_ASM_OUTPUT_MI_THUNK
492 #define TARGET_ASM_OUTPUT_MI_THUNK arc_output_mi_thunk
493 
494 #undef  TARGET_ASM_CAN_OUTPUT_MI_THUNK
495 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
496 
497 #undef  TARGET_FUNCTION_OK_FOR_SIBCALL
498 #define TARGET_FUNCTION_OK_FOR_SIBCALL arc_function_ok_for_sibcall
499 
500 #undef  TARGET_MACHINE_DEPENDENT_REORG
501 #define TARGET_MACHINE_DEPENDENT_REORG arc_reorg
502 
503 #undef TARGET_IN_SMALL_DATA_P
504 #define TARGET_IN_SMALL_DATA_P arc_in_small_data_p
505 
506 #undef TARGET_PROMOTE_FUNCTION_MODE
507 #define TARGET_PROMOTE_FUNCTION_MODE \
508   default_promote_function_mode_always_promote
509 
510 #undef TARGET_PROMOTE_PROTOTYPES
511 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
512 
513 #undef TARGET_RETURN_IN_MEMORY
514 #define TARGET_RETURN_IN_MEMORY arc_return_in_memory
515 #undef TARGET_PASS_BY_REFERENCE
516 #define TARGET_PASS_BY_REFERENCE arc_pass_by_reference
517 
518 #undef TARGET_SETUP_INCOMING_VARARGS
519 #define TARGET_SETUP_INCOMING_VARARGS arc_setup_incoming_varargs
520 
521 #undef TARGET_ARG_PARTIAL_BYTES
522 #define TARGET_ARG_PARTIAL_BYTES arc_arg_partial_bytes
523 
524 #undef TARGET_MUST_PASS_IN_STACK
525 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
526 
527 #undef TARGET_FUNCTION_VALUE
528 #define TARGET_FUNCTION_VALUE arc_function_value
529 
530 #undef  TARGET_SCHED_ADJUST_PRIORITY
531 #define TARGET_SCHED_ADJUST_PRIORITY arc_sched_adjust_priority
532 
533 #undef TARGET_VECTOR_MODE_SUPPORTED_P
534 #define TARGET_VECTOR_MODE_SUPPORTED_P arc_vector_mode_supported_p
535 
536 #undef TARGET_CAN_USE_DOLOOP_P
537 #define TARGET_CAN_USE_DOLOOP_P arc_can_use_doloop_p
538 
539 #undef TARGET_INVALID_WITHIN_DOLOOP
540 #define TARGET_INVALID_WITHIN_DOLOOP arc_invalid_within_doloop
541 
542 #undef TARGET_PRESERVE_RELOAD_P
543 #define TARGET_PRESERVE_RELOAD_P arc_preserve_reload_p
544 
545 #undef TARGET_CAN_FOLLOW_JUMP
546 #define TARGET_CAN_FOLLOW_JUMP arc_can_follow_jump
547 
548 #undef TARGET_DELEGITIMIZE_ADDRESS
549 #define TARGET_DELEGITIMIZE_ADDRESS arc_delegitimize_address
550 
551 #undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
552 #define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
553   arc_use_by_pieces_infrastructure_p
554 
555 /* Usually, we will be able to scale anchor offsets.
556    When this fails, we want LEGITIMIZE_ADDRESS to kick in.  */
557 #undef TARGET_MIN_ANCHOR_OFFSET
558 #define TARGET_MIN_ANCHOR_OFFSET (-1024)
559 #undef TARGET_MAX_ANCHOR_OFFSET
560 #define TARGET_MAX_ANCHOR_OFFSET (1020)
561 
562 #undef TARGET_SECONDARY_RELOAD
563 #define TARGET_SECONDARY_RELOAD arc_secondary_reload
564 
565 #define TARGET_OPTION_OVERRIDE arc_override_options
566 
567 #define TARGET_CONDITIONAL_REGISTER_USAGE arc_conditional_register_usage
568 
569 #define TARGET_TRAMPOLINE_INIT arc_initialize_trampoline
570 
571 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arc_trampoline_adjust_address
572 
573 #define TARGET_CAN_ELIMINATE arc_can_eliminate
574 
575 #define TARGET_FRAME_POINTER_REQUIRED arc_frame_pointer_required
576 
577 #define TARGET_FUNCTION_ARG arc_function_arg
578 
579 #define TARGET_FUNCTION_ARG_ADVANCE arc_function_arg_advance
580 
581 #define TARGET_LEGITIMATE_CONSTANT_P arc_legitimate_constant_p
582 
583 #define TARGET_LEGITIMATE_ADDRESS_P arc_legitimate_address_p
584 
585 #define TARGET_MODE_DEPENDENT_ADDRESS_P arc_mode_dependent_address_p
586 
587 #define TARGET_LEGITIMIZE_ADDRESS arc_legitimize_address
588 
589 #define TARGET_ADJUST_INSN_LENGTH arc_adjust_insn_length
590 
591 #define TARGET_INSN_LENGTH_PARAMETERS arc_insn_length_parameters
592 
593 #undef TARGET_LRA_P
594 #define TARGET_LRA_P arc_lra_p
595 #define TARGET_REGISTER_PRIORITY arc_register_priority
596 /* Stores with scaled offsets have different displacement ranges.  */
597 #define TARGET_DIFFERENT_ADDR_DISPLACEMENT_P hook_bool_void_true
598 #define TARGET_SPILL_CLASS arc_spill_class
599 
600 #include "target-def.h"
601 
602 #undef TARGET_ASM_ALIGNED_HI_OP
603 #define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
604 #undef TARGET_ASM_ALIGNED_SI_OP
605 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
606 
607 /* Try to keep the (mov:DF _, reg) as early as possible so
608    that the d<add/sub/mul>h-lr insns appear together and can
609    use the peephole2 pattern.  */
610 
611 static int
612 arc_sched_adjust_priority (rtx_insn *insn, int priority)
613 {
614   rtx set = single_set (insn);
615   if (set
616       && GET_MODE (SET_SRC(set)) == DFmode
617       && GET_CODE (SET_SRC(set)) == REG)
618     {
619       /* Incrementing priority by 20 (empirically derived).  */
620       return priority + 20;
621     }
622 
623   return priority;
624 }
625 
626 static reg_class_t
627 arc_secondary_reload (bool in_p, rtx x, reg_class_t cl, machine_mode,
628 		      secondary_reload_info *)
629 {
630   if (cl == DOUBLE_REGS)
631     return GENERAL_REGS;
632 
633   /* The loop counter register can be stored, but not loaded directly.  */
634   if ((cl == LPCOUNT_REG || cl == WRITABLE_CORE_REGS)
635       && in_p && MEM_P (x))
636     return GENERAL_REGS;
637   return NO_REGS;
638 }
639 
640 static unsigned arc_ifcvt (void);
641 
642 namespace {
643 
644 const pass_data pass_data_arc_ifcvt =
645 {
646   RTL_PASS,
647   "arc_ifcvt",				/* name */
648   OPTGROUP_NONE,			/* optinfo_flags */
649   TV_IFCVT2,				/* tv_id */
650   0,					/* properties_required */
651   0,					/* properties_provided */
652   0,					/* properties_destroyed */
653   0,					/* todo_flags_start */
654   TODO_df_finish			/* todo_flags_finish */
655 };
656 
657 class pass_arc_ifcvt : public rtl_opt_pass
658 {
659 public:
660   pass_arc_ifcvt(gcc::context *ctxt)
661   : rtl_opt_pass(pass_data_arc_ifcvt, ctxt)
662   {}
663 
664   /* opt_pass methods: */
665   opt_pass * clone () { return new pass_arc_ifcvt (m_ctxt); }
666   virtual unsigned int execute (function *) { return arc_ifcvt (); }
667 };
668 
669 } // anon namespace
670 
671 rtl_opt_pass *
672 make_pass_arc_ifcvt (gcc::context *ctxt)
673 {
674   return new pass_arc_ifcvt (ctxt);
675 }
676 
677 static unsigned arc_predicate_delay_insns (void);
678 
679 namespace {
680 
681 const pass_data pass_data_arc_predicate_delay_insns =
682 {
683   RTL_PASS,
684   "arc_predicate_delay_insns",		/* name */
685   OPTGROUP_NONE,			/* optinfo_flags */
686   TV_IFCVT2,				/* tv_id */
687   0,					/* properties_required */
688   0,					/* properties_provided */
689   0,					/* properties_destroyed */
690   0,					/* todo_flags_start */
691   TODO_df_finish			/* todo_flags_finish */
692 };
693 
694 class pass_arc_predicate_delay_insns : public rtl_opt_pass
695 {
696 public:
697   pass_arc_predicate_delay_insns(gcc::context *ctxt)
698   : rtl_opt_pass(pass_data_arc_predicate_delay_insns, ctxt)
699   {}
700 
701   /* opt_pass methods: */
702   virtual unsigned int execute (function *)
703     {
704       return arc_predicate_delay_insns ();
705     }
706 };
707 
708 } // anon namespace
709 
710 rtl_opt_pass *
711 make_pass_arc_predicate_delay_insns (gcc::context *ctxt)
712 {
713   return new pass_arc_predicate_delay_insns (ctxt);
714 }
715 
716 /* Called by OVERRIDE_OPTIONS to initialize various things.  */
717 
718 void
719 arc_init (void)
720 {
721   enum attr_tune tune_dflt = TUNE_NONE;
722 
723   if (TARGET_A5)
724     {
725       arc_cpu_string = "A5";
726     }
727   else if (TARGET_ARC600)
728     {
729       arc_cpu_string = "ARC600";
730       tune_dflt = TUNE_ARC600;
731     }
732   else if (TARGET_ARC601)
733     {
734       arc_cpu_string = "ARC601";
735       tune_dflt = TUNE_ARC600;
736     }
737   else if (TARGET_ARC700)
738     {
739       arc_cpu_string = "ARC700";
740       tune_dflt = TUNE_ARC700_4_2_STD;
741     }
742   else
743     gcc_unreachable ();
744   if (arc_tune == TUNE_NONE)
745     arc_tune = tune_dflt;
746   /* Note: arc_multcost is only used in rtx_cost if speed is true.  */
747   if (arc_multcost < 0)
748     switch (arc_tune)
749       {
750       case TUNE_ARC700_4_2_STD:
751 	/* latency 7;
752 	   max throughput (1 multiply + 4 other insns) / 5 cycles.  */
753 	arc_multcost = COSTS_N_INSNS (4);
754 	if (TARGET_NOMPY_SET)
755 	  arc_multcost = COSTS_N_INSNS (30);
756 	break;
757       case TUNE_ARC700_4_2_XMAC:
758 	/* latency 5;
759 	   max throughput (1 multiply + 2 other insns) / 3 cycles.  */
760 	arc_multcost = COSTS_N_INSNS (3);
761 	if (TARGET_NOMPY_SET)
762 	  arc_multcost = COSTS_N_INSNS (30);
763 	break;
764       case TUNE_ARC600:
765 	if (TARGET_MUL64_SET)
766 	  {
767 	    arc_multcost = COSTS_N_INSNS (4);
768 	    break;
769 	  }
770 	/* Fall through.  */
771       default:
772 	arc_multcost = COSTS_N_INSNS (30);
773 	break;
774       }
775 
776   /* Support mul64 generation only for A5 and ARC600.  */
777   if (TARGET_MUL64_SET && TARGET_ARC700)
778       error ("-mmul64 not supported for ARC700");
779 
780   /* MPY instructions valid only for ARC700.  */
781   if (TARGET_NOMPY_SET && !TARGET_ARC700)
782       error ("-mno-mpy supported only for ARC700");
783 
784   /* mul/mac instructions only for ARC600.  */
785   if (TARGET_MULMAC_32BY16_SET && !(TARGET_ARC600 || TARGET_ARC601))
786       error ("-mmul32x16 supported only for ARC600 or ARC601");
787 
788   if (!TARGET_DPFP && TARGET_DPFP_DISABLE_LRSR)
789       error ("-mno-dpfp-lrsr supported only with -mdpfp");
790 
791   /* FPX-1. No fast and compact together.  */
792   if ((TARGET_DPFP_FAST_SET && TARGET_DPFP_COMPACT_SET)
793       || (TARGET_SPFP_FAST_SET && TARGET_SPFP_COMPACT_SET))
794     error ("FPX fast and compact options cannot be specified together");
795 
796   /* FPX-2. No fast-spfp for arc600 or arc601.  */
797   if (TARGET_SPFP_FAST_SET && (TARGET_ARC600 || TARGET_ARC601))
798     error ("-mspfp_fast not available on ARC600 or ARC601");
799 
800   /* FPX-3. No FPX extensions on pre-ARC600 cores.  */
801   if ((TARGET_DPFP || TARGET_SPFP)
802       && !(TARGET_ARC600 || TARGET_ARC601 || TARGET_ARC700))
803     error ("FPX extensions not available on pre-ARC600 cores");
804 
805   /* Warn for unimplemented PIC in pre-ARC700 cores, and disable flag_pic.  */
806   if (flag_pic && !TARGET_ARC700)
807     {
808       warning (DK_WARNING, "PIC is not supported for %s. Generating non-PIC code only..", arc_cpu_string);
809       flag_pic = 0;
810     }
811 
812   arc_init_reg_tables ();
813 
814   /* Initialize array for PRINT_OPERAND_PUNCT_VALID_P.  */
815   memset (arc_punct_chars, 0, sizeof (arc_punct_chars));
816   arc_punct_chars['#'] = 1;
817   arc_punct_chars['*'] = 1;
818   arc_punct_chars['?'] = 1;
819   arc_punct_chars['!'] = 1;
820   arc_punct_chars['^'] = 1;
821   arc_punct_chars['&'] = 1;
822 
823   if (optimize > 1 && !TARGET_NO_COND_EXEC)
824     {
825       /* There are two target-independent ifcvt passes, and arc_reorg may do
826 	 one or more arc_ifcvt calls.  */
827       opt_pass *pass_arc_ifcvt_4 = make_pass_arc_ifcvt (g);
828       struct register_pass_info arc_ifcvt4_info
829 	= { pass_arc_ifcvt_4, "dbr", 1, PASS_POS_INSERT_AFTER };
830       struct register_pass_info arc_ifcvt5_info
831 	= { pass_arc_ifcvt_4->clone (), "shorten", 1, PASS_POS_INSERT_BEFORE };
832 
833       register_pass (&arc_ifcvt4_info);
834       register_pass (&arc_ifcvt5_info);
835     }
836 
837   if (flag_delayed_branch)
838     {
839       opt_pass *pass_arc_predicate_delay_insns
840 	= make_pass_arc_predicate_delay_insns (g);
841       struct register_pass_info arc_predicate_delay_info
842 	= { pass_arc_predicate_delay_insns, "dbr", 1, PASS_POS_INSERT_AFTER };
843 
844       register_pass (&arc_predicate_delay_info);
845     }
846 }
847 
848 /* Check ARC options, generate derived target attributes.  */
849 
850 static void
851 arc_override_options (void)
852 {
853   if (arc_cpu == PROCESSOR_NONE)
854     arc_cpu = PROCESSOR_ARC700;
855 
856   if (arc_size_opt_level == 3)
857     optimize_size = 1;
858 
859   if (flag_pic)
860     target_flags |= MASK_NO_SDATA_SET;
861 
862   if (flag_no_common == 255)
863     flag_no_common = !TARGET_NO_SDATA_SET;
864 
865   /* TARGET_COMPACT_CASESI needs the "q" register class.  */ \
866   if (TARGET_MIXED_CODE)
867     TARGET_Q_CLASS = 1;
868   if (!TARGET_Q_CLASS)
869     TARGET_COMPACT_CASESI = 0;
870   if (TARGET_COMPACT_CASESI)
871     TARGET_CASE_VECTOR_PC_RELATIVE = 1;
872 
873   /* These need to be done at start up.  It's convenient to do them here.  */
874   arc_init ();
875 }
876 
877 /* The condition codes of the ARC, and the inverse function.  */
878 /* For short branches, the "c" / "nc" names are not defined in the ARC
879    Programmers manual, so we have to use "lo" / "hs"" instead.  */
880 static const char *arc_condition_codes[] =
881 {
882   "al", 0, "eq", "ne", "p", "n", "lo", "hs", "v", "nv",
883   "gt", "le", "ge", "lt", "hi", "ls", "pnz", 0
884 };
885 
886 enum arc_cc_code_index
887 {
888   ARC_CC_AL, ARC_CC_EQ = ARC_CC_AL+2, ARC_CC_NE, ARC_CC_P, ARC_CC_N,
889   ARC_CC_C,  ARC_CC_NC, ARC_CC_V, ARC_CC_NV,
890   ARC_CC_GT, ARC_CC_LE, ARC_CC_GE, ARC_CC_LT, ARC_CC_HI, ARC_CC_LS, ARC_CC_PNZ,
891   ARC_CC_LO = ARC_CC_C, ARC_CC_HS = ARC_CC_NC
892 };
893 
894 #define ARC_INVERSE_CONDITION_CODE(X)  ((X) ^ 1)
895 
896 /* Returns the index of the ARC condition code string in
897    `arc_condition_codes'.  COMPARISON should be an rtx like
898    `(eq (...) (...))'.  */
899 
900 static int
901 get_arc_condition_code (rtx comparison)
902 {
903   switch (GET_MODE (XEXP (comparison, 0)))
904     {
905     case CCmode:
906     case SImode: /* For BRcc.  */
907       switch (GET_CODE (comparison))
908 	{
909 	case EQ : return ARC_CC_EQ;
910 	case NE : return ARC_CC_NE;
911 	case GT : return ARC_CC_GT;
912 	case LE : return ARC_CC_LE;
913 	case GE : return ARC_CC_GE;
914 	case LT : return ARC_CC_LT;
915 	case GTU : return ARC_CC_HI;
916 	case LEU : return ARC_CC_LS;
917 	case LTU : return ARC_CC_LO;
918 	case GEU : return ARC_CC_HS;
919 	default : gcc_unreachable ();
920 	}
921     case CC_ZNmode:
922       switch (GET_CODE (comparison))
923 	{
924 	case EQ : return ARC_CC_EQ;
925 	case NE : return ARC_CC_NE;
926 	case GE: return ARC_CC_P;
927 	case LT: return ARC_CC_N;
928 	case GT : return ARC_CC_PNZ;
929 	default : gcc_unreachable ();
930 	}
931     case CC_Zmode:
932       switch (GET_CODE (comparison))
933 	{
934 	case EQ : return ARC_CC_EQ;
935 	case NE : return ARC_CC_NE;
936 	default : gcc_unreachable ();
937 	}
938     case CC_Cmode:
939       switch (GET_CODE (comparison))
940 	{
941 	case LTU : return ARC_CC_C;
942 	case GEU : return ARC_CC_NC;
943 	default : gcc_unreachable ();
944 	}
945     case CC_FP_GTmode:
946       if (TARGET_ARGONAUT_SET && TARGET_SPFP)
947 	switch (GET_CODE (comparison))
948 	  {
949 	  case GT  : return ARC_CC_N;
950 	  case UNLE: return ARC_CC_P;
951 	  default : gcc_unreachable ();
952 	}
953       else
954 	switch (GET_CODE (comparison))
955 	  {
956 	  case GT   : return ARC_CC_HI;
957 	  case UNLE : return ARC_CC_LS;
958 	  default : gcc_unreachable ();
959 	}
960     case CC_FP_GEmode:
961       /* Same for FPX and non-FPX.  */
962       switch (GET_CODE (comparison))
963 	{
964 	case GE   : return ARC_CC_HS;
965 	case UNLT : return ARC_CC_LO;
966 	default : gcc_unreachable ();
967 	}
968     case CC_FP_UNEQmode:
969       switch (GET_CODE (comparison))
970 	{
971 	case UNEQ : return ARC_CC_EQ;
972 	case LTGT : return ARC_CC_NE;
973 	default : gcc_unreachable ();
974 	}
975     case CC_FP_ORDmode:
976       switch (GET_CODE (comparison))
977 	{
978 	case UNORDERED : return ARC_CC_C;
979 	case ORDERED   : return ARC_CC_NC;
980 	default : gcc_unreachable ();
981 	}
982     case CC_FPXmode:
983       switch (GET_CODE (comparison))
984 	{
985 	case EQ        : return ARC_CC_EQ;
986 	case NE        : return ARC_CC_NE;
987 	case UNORDERED : return ARC_CC_C;
988 	case ORDERED   : return ARC_CC_NC;
989 	case LTGT      : return ARC_CC_HI;
990 	case UNEQ      : return ARC_CC_LS;
991 	default : gcc_unreachable ();
992 	}
993     default : gcc_unreachable ();
994     }
995   /*NOTREACHED*/
996   return (42);
997 }
998 
999 /* Return true if COMPARISON has a short form that can accomodate OFFSET.  */
1000 
1001 bool
1002 arc_short_comparison_p (rtx comparison, int offset)
1003 {
1004   gcc_assert (ARC_CC_NC == ARC_CC_HS);
1005   gcc_assert (ARC_CC_C == ARC_CC_LO);
1006   switch (get_arc_condition_code (comparison))
1007     {
1008     case ARC_CC_EQ: case ARC_CC_NE:
1009       return offset >= -512 && offset <= 506;
1010     case ARC_CC_GT: case ARC_CC_LE: case ARC_CC_GE: case ARC_CC_LT:
1011     case ARC_CC_HI: case ARC_CC_LS: case ARC_CC_LO: case ARC_CC_HS:
1012       return offset >= -64 && offset <= 58;
1013     default:
1014       return false;
1015     }
1016 }
1017 
1018 /* Given a comparison code (EQ, NE, etc.) and the first operand of a COMPARE,
1019    return the mode to be used for the comparison.  */
1020 
1021 machine_mode
1022 arc_select_cc_mode (enum rtx_code op, rtx x, rtx y)
1023 {
1024   machine_mode mode = GET_MODE (x);
1025   rtx x1;
1026 
1027   /* For an operation that sets the condition codes as a side-effect, the
1028      C and V flags is not set as for cmp, so we can only use comparisons where
1029      this doesn't matter.  (For LT and GE we can use "mi" and "pl"
1030      instead.)  */
1031   /* ??? We could use "pnz" for greater than zero, however, we could then
1032      get into trouble because the comparison could not be reversed.  */
1033   if (GET_MODE_CLASS (mode) == MODE_INT
1034       && y == const0_rtx
1035       && (op == EQ || op == NE
1036 	  || ((op == LT || op == GE) && GET_MODE_SIZE (GET_MODE (x)) <= 4)))
1037     return CC_ZNmode;
1038 
1039   /* add.f for if (a+b) */
1040   if (mode == SImode
1041       && GET_CODE (y) == NEG
1042       && (op == EQ || op == NE))
1043     return CC_ZNmode;
1044 
1045   /* Check if this is a test suitable for bxor.f .  */
1046   if (mode == SImode && (op == EQ || op == NE) && CONST_INT_P (y)
1047       && ((INTVAL (y) - 1) & INTVAL (y)) == 0
1048       && INTVAL (y))
1049     return CC_Zmode;
1050 
1051   /* Check if this is a test suitable for add / bmsk.f .  */
1052   if (mode == SImode && (op == EQ || op == NE) && CONST_INT_P (y)
1053       && GET_CODE (x) == AND && CONST_INT_P ((x1 = XEXP (x, 1)))
1054       && ((INTVAL (x1) + 1) & INTVAL (x1)) == 0
1055       && (~INTVAL (x1) | INTVAL (y)) < 0
1056       && (~INTVAL (x1) | INTVAL (y)) > -0x800)
1057     return CC_Zmode;
1058 
1059   if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
1060       && GET_CODE (x) == PLUS
1061       && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
1062     return CC_Cmode;
1063 
1064   if (TARGET_ARGONAUT_SET
1065       && ((mode == SFmode && TARGET_SPFP) || (mode == DFmode && TARGET_DPFP)))
1066     switch (op)
1067       {
1068       case EQ: case NE: case UNEQ: case LTGT: case ORDERED: case UNORDERED:
1069 	return CC_FPXmode;
1070       case LT: case UNGE: case GT: case UNLE:
1071 	return CC_FP_GTmode;
1072       case LE: case UNGT: case GE: case UNLT:
1073 	return CC_FP_GEmode;
1074       default: gcc_unreachable ();
1075       }
1076   else if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_OPTFPE)
1077     switch (op)
1078       {
1079       case EQ: case NE: return CC_Zmode;
1080       case LT: case UNGE:
1081       case GT: case UNLE: return CC_FP_GTmode;
1082       case LE: case UNGT:
1083       case GE: case UNLT: return CC_FP_GEmode;
1084       case UNEQ: case LTGT: return CC_FP_UNEQmode;
1085       case ORDERED: case UNORDERED: return CC_FP_ORDmode;
1086       default: gcc_unreachable ();
1087       }
1088 
1089   return CCmode;
1090 }
1091 
1092 /* Vectors to keep interesting information about registers where it can easily
1093    be got.  We use to use the actual mode value as the bit number, but there
1094    is (or may be) more than 32 modes now.  Instead we use two tables: one
1095    indexed by hard register number, and one indexed by mode.  */
1096 
1097 /* The purpose of arc_mode_class is to shrink the range of modes so that
1098    they all fit (as bit numbers) in a 32-bit word (again).  Each real mode is
1099    mapped into one arc_mode_class mode.  */
1100 
1101 enum arc_mode_class {
1102   C_MODE,
1103   S_MODE, D_MODE, T_MODE, O_MODE,
1104   SF_MODE, DF_MODE, TF_MODE, OF_MODE,
1105   V_MODE
1106 };
1107 
1108 /* Modes for condition codes.  */
1109 #define C_MODES (1 << (int) C_MODE)
1110 
1111 /* Modes for single-word and smaller quantities.  */
1112 #define S_MODES ((1 << (int) S_MODE) | (1 << (int) SF_MODE))
1113 
1114 /* Modes for double-word and smaller quantities.  */
1115 #define D_MODES (S_MODES | (1 << (int) D_MODE) | (1 << DF_MODE))
1116 
1117 /* Mode for 8-byte DF values only.  */
1118 #define DF_MODES (1 << DF_MODE)
1119 
1120 /* Modes for quad-word and smaller quantities.  */
1121 #define T_MODES (D_MODES | (1 << (int) T_MODE) | (1 << (int) TF_MODE))
1122 
1123 /* Modes for 128-bit vectors.  */
1124 #define V_MODES (1 << (int) V_MODE)
1125 
1126 /* Value is 1 if register/mode pair is acceptable on arc.  */
1127 
1128 unsigned int arc_hard_regno_mode_ok[] = {
1129   T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, T_MODES,
1130   T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, T_MODES,
1131   T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, D_MODES,
1132   D_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES,
1133 
1134   /* ??? Leave these as S_MODES for now.  */
1135   S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES,
1136   DF_MODES, 0, DF_MODES, 0, S_MODES, S_MODES, S_MODES, S_MODES,
1137   S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES,
1138   S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, C_MODES, S_MODES,
1139 
1140   V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES,
1141   V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES,
1142   V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES,
1143   V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES,
1144 
1145   V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES,
1146   V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES,
1147   V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES,
1148   V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES,
1149 
1150   S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES,
1151   S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES
1152 };
1153 
1154 unsigned int arc_mode_class [NUM_MACHINE_MODES];
1155 
1156 enum reg_class arc_regno_reg_class[FIRST_PSEUDO_REGISTER];
1157 
1158 enum reg_class
1159 arc_preferred_reload_class (rtx, enum reg_class cl)
1160 {
1161   if ((cl) == CHEAP_CORE_REGS  || (cl) == WRITABLE_CORE_REGS)
1162     return GENERAL_REGS;
1163   return cl;
1164 }
1165 
1166 /* Initialize the arc_mode_class array.  */
1167 
1168 static void
1169 arc_init_reg_tables (void)
1170 {
1171   int i;
1172 
1173   for (i = 0; i < NUM_MACHINE_MODES; i++)
1174     {
1175       machine_mode m = (machine_mode) i;
1176 
1177       switch (GET_MODE_CLASS (m))
1178 	{
1179 	case MODE_INT:
1180 	case MODE_PARTIAL_INT:
1181 	case MODE_COMPLEX_INT:
1182 	  if (GET_MODE_SIZE (m) <= 4)
1183 	    arc_mode_class[i] = 1 << (int) S_MODE;
1184 	  else if (GET_MODE_SIZE (m) == 8)
1185 	    arc_mode_class[i] = 1 << (int) D_MODE;
1186 	  else if (GET_MODE_SIZE (m) == 16)
1187 	    arc_mode_class[i] = 1 << (int) T_MODE;
1188 	  else if (GET_MODE_SIZE (m) == 32)
1189 	    arc_mode_class[i] = 1 << (int) O_MODE;
1190 	  else
1191 	    arc_mode_class[i] = 0;
1192 	  break;
1193 	case MODE_FLOAT:
1194 	case MODE_COMPLEX_FLOAT:
1195 	  if (GET_MODE_SIZE (m) <= 4)
1196 	    arc_mode_class[i] = 1 << (int) SF_MODE;
1197 	  else if (GET_MODE_SIZE (m) == 8)
1198 	    arc_mode_class[i] = 1 << (int) DF_MODE;
1199 	  else if (GET_MODE_SIZE (m) == 16)
1200 	    arc_mode_class[i] = 1 << (int) TF_MODE;
1201 	  else if (GET_MODE_SIZE (m) == 32)
1202 	    arc_mode_class[i] = 1 << (int) OF_MODE;
1203 	  else
1204 	    arc_mode_class[i] = 0;
1205 	  break;
1206 	case MODE_VECTOR_INT:
1207 	  arc_mode_class [i] = (1<< (int) V_MODE);
1208 	  break;
1209 	case MODE_CC:
1210 	default:
1211 	  /* mode_class hasn't been initialized yet for EXTRA_CC_MODES, so
1212 	     we must explicitly check for them here.  */
1213 	  if (i == (int) CCmode || i == (int) CC_ZNmode || i == (int) CC_Zmode
1214 	      || i == (int) CC_Cmode
1215 	      || i == CC_FP_GTmode || i == CC_FP_GEmode || i == CC_FP_ORDmode)
1216 	    arc_mode_class[i] = 1 << (int) C_MODE;
1217 	  else
1218 	    arc_mode_class[i] = 0;
1219 	  break;
1220 	}
1221     }
1222 }
1223 
1224 /* Core registers 56..59 are used for multiply extension options.
1225    The dsp option uses r56 and r57, these are then named acc1 and acc2.
1226    acc1 is the highpart, and acc2 the lowpart, so which register gets which
1227    number depends on endianness.
1228    The mul64 multiplier options use r57 for mlo, r58 for mmid and r59 for mhi.
1229    Because mlo / mhi form a 64 bit value, we use different gcc internal
1230    register numbers to make them form a register pair as the gcc internals
1231    know it.  mmid gets number 57, if still available, and mlo / mhi get
1232    number 58 and 59, depending on endianness.  We use DBX_REGISTER_NUMBER
1233    to map this back.  */
1234   char rname56[5] = "r56";
1235   char rname57[5] = "r57";
1236   char rname58[5] = "r58";
1237   char rname59[5] = "r59";
1238 
1239 static void
1240 arc_conditional_register_usage (void)
1241 {
1242   int regno;
1243   int i;
1244   int fix_start = 60, fix_end = 55;
1245 
1246   if (TARGET_MUL64_SET)
1247     {
1248       fix_start = 57;
1249       fix_end = 59;
1250 
1251       /* We don't provide a name for mmed.  In rtl / assembly resource lists,
1252 	 you are supposed to refer to it as mlo & mhi, e.g
1253 	 (zero_extract:SI (reg:DI 58) (const_int 32) (16)) .
1254 	 In an actual asm instruction, you are of course use mmed.
1255 	 The point of avoiding having a separate register for mmed is that
1256 	 this way, we don't have to carry clobbers of that reg around in every
1257 	 isntruction that modifies mlo and/or mhi.  */
1258       strcpy (rname57, "");
1259       strcpy (rname58, TARGET_BIG_ENDIAN ? "mhi" : "mlo");
1260       strcpy (rname59, TARGET_BIG_ENDIAN ? "mlo" : "mhi");
1261     }
1262   if (TARGET_MULMAC_32BY16_SET)
1263     {
1264       fix_start = 56;
1265       fix_end = fix_end > 57 ? fix_end : 57;
1266       strcpy (rname56, TARGET_BIG_ENDIAN ? "acc1" : "acc2");
1267       strcpy (rname57, TARGET_BIG_ENDIAN ? "acc2" : "acc1");
1268     }
1269   for (regno = fix_start; regno <= fix_end; regno++)
1270     {
1271       if (!fixed_regs[regno])
1272 	warning (0, "multiply option implies r%d is fixed", regno);
1273       fixed_regs [regno] = call_used_regs[regno] = 1;
1274     }
1275   if (TARGET_Q_CLASS)
1276     {
1277       reg_alloc_order[2] = 12;
1278       reg_alloc_order[3] = 13;
1279       reg_alloc_order[4] = 14;
1280       reg_alloc_order[5] = 15;
1281       reg_alloc_order[6] = 1;
1282       reg_alloc_order[7] = 0;
1283       reg_alloc_order[8] = 4;
1284       reg_alloc_order[9] = 5;
1285       reg_alloc_order[10] = 6;
1286       reg_alloc_order[11] = 7;
1287       reg_alloc_order[12] = 8;
1288       reg_alloc_order[13] = 9;
1289       reg_alloc_order[14] = 10;
1290       reg_alloc_order[15] = 11;
1291     }
1292   if (TARGET_SIMD_SET)
1293     {
1294       int i;
1295       for (i = ARC_FIRST_SIMD_VR_REG; i <= ARC_LAST_SIMD_VR_REG; i++)
1296 	reg_alloc_order [i] = i;
1297       for (i = ARC_FIRST_SIMD_DMA_CONFIG_REG;
1298 	   i <= ARC_LAST_SIMD_DMA_CONFIG_REG; i++)
1299 	reg_alloc_order [i] = i;
1300     }
1301   /* For Arctangent-A5 / ARC600, lp_count may not be read in an instruction
1302      following immediately after another one setting it to a new value.
1303      There was some discussion on how to enforce scheduling constraints for
1304      processors with missing interlocks on the gcc mailing list:
1305      http://gcc.gnu.org/ml/gcc/2008-05/msg00021.html .
1306      However, we can't actually use this approach, because for ARC the
1307      delay slot scheduling pass is active, which runs after
1308      machine_dependent_reorg.  */
1309   if (TARGET_ARC600)
1310     CLEAR_HARD_REG_BIT (reg_class_contents[SIBCALL_REGS], LP_COUNT);
1311   else if (!TARGET_ARC700)
1312     fixed_regs[LP_COUNT] = 1;
1313   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
1314     if (!call_used_regs[regno])
1315       CLEAR_HARD_REG_BIT (reg_class_contents[SIBCALL_REGS], regno);
1316   for (regno = 32; regno < 60; regno++)
1317     if (!fixed_regs[regno])
1318       SET_HARD_REG_BIT (reg_class_contents[WRITABLE_CORE_REGS], regno);
1319   if (TARGET_ARC700)
1320     {
1321       for (regno = 32; regno <= 60; regno++)
1322 	CLEAR_HARD_REG_BIT (reg_class_contents[CHEAP_CORE_REGS], regno);
1323 
1324       /* If they have used -ffixed-lp_count, make sure it takes
1325 	 effect.  */
1326       if (fixed_regs[LP_COUNT])
1327 	{
1328 	  CLEAR_HARD_REG_BIT (reg_class_contents[LPCOUNT_REG], LP_COUNT);
1329 	  CLEAR_HARD_REG_BIT (reg_class_contents[SIBCALL_REGS], LP_COUNT);
1330 	  CLEAR_HARD_REG_BIT (reg_class_contents[WRITABLE_CORE_REGS], LP_COUNT);
1331 
1332 	  /* Instead of taking out SF_MODE like below, forbid it outright.  */
1333 	  arc_hard_regno_mode_ok[60] = 0;
1334 	}
1335       else
1336 	arc_hard_regno_mode_ok[60] = 1 << (int) S_MODE;
1337     }
1338 
1339   for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
1340     {
1341       if (i < 29)
1342 	{
1343 	  if (TARGET_Q_CLASS && ((i <= 3) || ((i >= 12) && (i <= 15))))
1344 	    arc_regno_reg_class[i] = ARCOMPACT16_REGS;
1345 	  else
1346 	    arc_regno_reg_class[i] = GENERAL_REGS;
1347 	}
1348       else if (i < 60)
1349 	arc_regno_reg_class[i]
1350 	  = (fixed_regs[i]
1351 	     ? (TEST_HARD_REG_BIT (reg_class_contents[CHEAP_CORE_REGS], i)
1352 		? CHEAP_CORE_REGS : ALL_CORE_REGS)
1353 	     : ((TARGET_ARC700
1354 		 && TEST_HARD_REG_BIT (reg_class_contents[CHEAP_CORE_REGS], i))
1355 		? CHEAP_CORE_REGS : WRITABLE_CORE_REGS));
1356       else
1357 	arc_regno_reg_class[i] = NO_REGS;
1358     }
1359 
1360   /* ARCOMPACT16_REGS is empty, if TARGET_Q_CLASS has not been activated.  */
1361   if (!TARGET_Q_CLASS)
1362     {
1363       CLEAR_HARD_REG_SET(reg_class_contents [ARCOMPACT16_REGS]);
1364       CLEAR_HARD_REG_SET(reg_class_contents [AC16_BASE_REGS]);
1365     }
1366 
1367   gcc_assert (FIRST_PSEUDO_REGISTER >= 144);
1368 
1369   /* Handle Special Registers.  */
1370   arc_regno_reg_class[29] = LINK_REGS; /* ilink1 register.  */
1371   arc_regno_reg_class[30] = LINK_REGS; /* ilink2 register.  */
1372   arc_regno_reg_class[31] = LINK_REGS; /* blink register.  */
1373   arc_regno_reg_class[60] = LPCOUNT_REG;
1374   arc_regno_reg_class[61] = NO_REGS;      /* CC_REG: must be NO_REGS.  */
1375   arc_regno_reg_class[62] = GENERAL_REGS;
1376 
1377   if (TARGET_DPFP)
1378     {
1379       for (i = 40; i < 44; ++i)
1380 	{
1381 	  arc_regno_reg_class[i] = DOUBLE_REGS;
1382 
1383 	  /* Unless they want us to do 'mov d1, 0x00000000' make sure
1384 	     no attempt is made to use such a register as a destination
1385 	     operand in *movdf_insn.  */
1386 	  if (!TARGET_ARGONAUT_SET)
1387 	    {
1388 	    /* Make sure no 'c', 'w', 'W', or 'Rac' constraint is
1389 	       interpreted to mean they can use D1 or D2 in their insn.  */
1390 	    CLEAR_HARD_REG_BIT(reg_class_contents[CHEAP_CORE_REGS       ], i);
1391 	    CLEAR_HARD_REG_BIT(reg_class_contents[ALL_CORE_REGS         ], i);
1392 	    CLEAR_HARD_REG_BIT(reg_class_contents[WRITABLE_CORE_REGS    ], i);
1393 	    CLEAR_HARD_REG_BIT(reg_class_contents[MPY_WRITABLE_CORE_REGS], i);
1394 	    }
1395 	}
1396     }
1397   else
1398     {
1399       /* Disable all DOUBLE_REGISTER settings,
1400 	 if not generating DPFP code.  */
1401       arc_regno_reg_class[40] = ALL_REGS;
1402       arc_regno_reg_class[41] = ALL_REGS;
1403       arc_regno_reg_class[42] = ALL_REGS;
1404       arc_regno_reg_class[43] = ALL_REGS;
1405 
1406       arc_hard_regno_mode_ok[40] = 0;
1407       arc_hard_regno_mode_ok[42] = 0;
1408 
1409       CLEAR_HARD_REG_SET(reg_class_contents [DOUBLE_REGS]);
1410     }
1411 
1412   if (TARGET_SIMD_SET)
1413     {
1414       gcc_assert (ARC_FIRST_SIMD_VR_REG == 64);
1415       gcc_assert (ARC_LAST_SIMD_VR_REG  == 127);
1416 
1417       for (i = ARC_FIRST_SIMD_VR_REG; i <= ARC_LAST_SIMD_VR_REG; i++)
1418 	arc_regno_reg_class [i] =  SIMD_VR_REGS;
1419 
1420       gcc_assert (ARC_FIRST_SIMD_DMA_CONFIG_REG == 128);
1421       gcc_assert (ARC_FIRST_SIMD_DMA_CONFIG_IN_REG == 128);
1422       gcc_assert (ARC_FIRST_SIMD_DMA_CONFIG_OUT_REG == 136);
1423       gcc_assert (ARC_LAST_SIMD_DMA_CONFIG_REG  == 143);
1424 
1425       for (i = ARC_FIRST_SIMD_DMA_CONFIG_REG;
1426 	   i <= ARC_LAST_SIMD_DMA_CONFIG_REG; i++)
1427 	arc_regno_reg_class [i] =  SIMD_DMA_CONFIG_REGS;
1428     }
1429 
1430   /* pc : r63 */
1431   arc_regno_reg_class[PROGRAM_COUNTER_REGNO] = GENERAL_REGS;
1432 }
1433 
1434 /* Handle an "interrupt" attribute; arguments as in
1435    struct attribute_spec.handler.  */
1436 
1437 static tree
1438 arc_handle_interrupt_attribute (tree *, tree name, tree args, int,
1439 				bool *no_add_attrs)
1440 {
1441   gcc_assert (args);
1442 
1443   tree value = TREE_VALUE (args);
1444 
1445   if (TREE_CODE (value) != STRING_CST)
1446     {
1447       warning (OPT_Wattributes,
1448 	       "argument of %qE attribute is not a string constant",
1449 	       name);
1450       *no_add_attrs = true;
1451     }
1452   else if (strcmp (TREE_STRING_POINTER (value), "ilink1")
1453 	   && strcmp (TREE_STRING_POINTER (value), "ilink2"))
1454     {
1455       warning (OPT_Wattributes,
1456 	       "argument of %qE attribute is not \"ilink1\" or \"ilink2\"",
1457 	       name);
1458       *no_add_attrs = true;
1459     }
1460   return NULL_TREE;
1461 }
1462 
1463 /* Return zero if TYPE1 and TYPE are incompatible, one if they are compatible,
1464    and two if they are nearly compatible (which causes a warning to be
1465    generated).  */
1466 
1467 static int
1468 arc_comp_type_attributes (const_tree type1,
1469 			  const_tree type2)
1470 {
1471   int l1, l2, m1, m2, s1, s2;
1472 
1473   /* Check for mismatch of non-default calling convention.  */
1474   if (TREE_CODE (type1) != FUNCTION_TYPE)
1475     return 1;
1476 
1477   /* Check for mismatched call attributes.  */
1478   l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
1479   l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
1480   m1 = lookup_attribute ("medium_call", TYPE_ATTRIBUTES (type1)) != NULL;
1481   m2 = lookup_attribute ("medium_call", TYPE_ATTRIBUTES (type2)) != NULL;
1482   s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
1483   s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
1484 
1485   /* Only bother to check if an attribute is defined.  */
1486   if (l1 | l2 | m1 | m2 | s1 | s2)
1487     {
1488       /* If one type has an attribute, the other must have the same attribute.  */
1489       if ((l1 != l2) || (m1 != m2) || (s1 != s2))
1490 	return 0;
1491 
1492       /* Disallow mixed attributes.  */
1493       if (l1 + m1 + s1 > 1)
1494 	return 0;
1495     }
1496 
1497 
1498   return 1;
1499 }
1500 
1501 /* Set the default attributes for TYPE.  */
1502 
1503 void
1504 arc_set_default_type_attributes (tree type ATTRIBUTE_UNUSED)
1505 {
1506   gcc_unreachable();
1507 }
1508 
1509 /* Misc. utilities.  */
1510 
1511 /* X and Y are two things to compare using CODE.  Emit the compare insn and
1512    return the rtx for the cc reg in the proper mode.  */
1513 
1514 rtx
1515 gen_compare_reg (rtx comparison, machine_mode omode)
1516 {
1517   enum rtx_code code = GET_CODE (comparison);
1518   rtx x = XEXP (comparison, 0);
1519   rtx y = XEXP (comparison, 1);
1520   rtx tmp, cc_reg;
1521   machine_mode mode, cmode;
1522 
1523 
1524   cmode = GET_MODE (x);
1525   if (cmode == VOIDmode)
1526     cmode = GET_MODE (y);
1527   gcc_assert (cmode == SImode || cmode == SFmode || cmode == DFmode);
1528   if (cmode == SImode)
1529     {
1530       if (!register_operand (x, SImode))
1531 	{
1532 	  if (register_operand (y, SImode))
1533 	    {
1534 	      tmp = x;
1535 	      x = y;
1536 	      y = tmp;
1537 	      code = swap_condition (code);
1538 	    }
1539 	  else
1540 	    x = copy_to_mode_reg (SImode, x);
1541 	}
1542       if (GET_CODE (y) == SYMBOL_REF && flag_pic)
1543 	y = copy_to_mode_reg (SImode, y);
1544     }
1545   else
1546     {
1547       x = force_reg (cmode, x);
1548       y = force_reg (cmode, y);
1549     }
1550   mode = SELECT_CC_MODE (code, x, y);
1551 
1552   cc_reg = gen_rtx_REG (mode, CC_REG);
1553 
1554   /* ??? FIXME (x-y)==0, as done by both cmpsfpx_raw and
1555      cmpdfpx_raw, is not a correct comparison for floats:
1556         http://www.cygnus-software.com/papers/comparingfloats/comparingfloats.htm
1557    */
1558   if (TARGET_ARGONAUT_SET
1559       && ((cmode == SFmode && TARGET_SPFP) || (cmode == DFmode && TARGET_DPFP)))
1560     {
1561       switch (code)
1562 	{
1563 	case NE: case EQ: case LT: case UNGE: case LE: case UNGT:
1564 	case UNEQ: case LTGT: case ORDERED: case UNORDERED:
1565 	  break;
1566 	case GT: case UNLE: case GE: case UNLT:
1567 	  code = swap_condition (code);
1568 	  tmp = x;
1569 	  x = y;
1570 	  y = tmp;
1571 	  break;
1572 	default:
1573 	  gcc_unreachable ();
1574 	}
1575       if (cmode == SFmode)
1576       {
1577 	emit_insn (gen_cmpsfpx_raw (x, y));
1578       }
1579       else /* DFmode */
1580       {
1581 	/* Accepts Dx regs directly by insns.  */
1582 	emit_insn (gen_cmpdfpx_raw (x, y));
1583       }
1584 
1585       if (mode != CC_FPXmode)
1586 	emit_insn (gen_rtx_SET (VOIDmode, cc_reg,
1587 				gen_rtx_COMPARE (mode,
1588 						 gen_rtx_REG (CC_FPXmode, 61),
1589 						 const0_rtx)));
1590     }
1591   else if (GET_MODE_CLASS (cmode) == MODE_FLOAT && TARGET_OPTFPE)
1592     {
1593       rtx op0 = gen_rtx_REG (cmode, 0);
1594       rtx op1 = gen_rtx_REG (cmode, GET_MODE_SIZE (cmode) / UNITS_PER_WORD);
1595 
1596       switch (code)
1597 	{
1598 	case NE: case EQ: case GT: case UNLE: case GE: case UNLT:
1599 	case UNEQ: case LTGT: case ORDERED: case UNORDERED:
1600 	  break;
1601 	case LT: case UNGE: case LE: case UNGT:
1602 	  code = swap_condition (code);
1603 	  tmp = x;
1604 	  x = y;
1605 	  y = tmp;
1606 	  break;
1607 	default:
1608 	  gcc_unreachable ();
1609 	}
1610       if (currently_expanding_to_rtl)
1611 	{
1612 	  emit_move_insn (op0, x);
1613 	  emit_move_insn (op1, y);
1614 	}
1615       else
1616 	{
1617 	  gcc_assert (rtx_equal_p (op0, x));
1618 	  gcc_assert (rtx_equal_p (op1, y));
1619 	}
1620       emit_insn (gen_cmp_float (cc_reg, gen_rtx_COMPARE (mode, op0, op1)));
1621     }
1622   else
1623     emit_insn (gen_rtx_SET (omode, cc_reg,
1624 			    gen_rtx_COMPARE (mode, x, y)));
1625   return gen_rtx_fmt_ee (code, omode, cc_reg, const0_rtx);
1626 }
1627 
1628 /* Return true if VALUE, a const_double, will fit in a limm (4 byte number).
1629    We assume the value can be either signed or unsigned.  */
1630 
1631 bool
1632 arc_double_limm_p (rtx value)
1633 {
1634   HOST_WIDE_INT low, high;
1635 
1636   gcc_assert (GET_CODE (value) == CONST_DOUBLE);
1637 
1638   if (TARGET_DPFP)
1639     return true;
1640 
1641   low = CONST_DOUBLE_LOW (value);
1642   high = CONST_DOUBLE_HIGH (value);
1643 
1644   if (low & 0x80000000)
1645     {
1646       return (((unsigned HOST_WIDE_INT) low <= 0xffffffff && high == 0)
1647 	      || (((low & - (unsigned HOST_WIDE_INT) 0x80000000)
1648 		   == - (unsigned HOST_WIDE_INT) 0x80000000)
1649 		  && high == -1));
1650     }
1651   else
1652     {
1653       return (unsigned HOST_WIDE_INT) low <= 0x7fffffff && high == 0;
1654     }
1655 }
1656 
1657 /* Do any needed setup for a variadic function.  For the ARC, we must
1658    create a register parameter block, and then copy any anonymous arguments
1659    in registers to memory.
1660 
1661    CUM has not been updated for the last named argument which has type TYPE
1662    and mode MODE, and we rely on this fact.  */
1663 
1664 static void
1665 arc_setup_incoming_varargs (cumulative_args_t args_so_far,
1666 			    machine_mode mode, tree type,
1667 			    int *pretend_size, int no_rtl)
1668 {
1669   int first_anon_arg;
1670   CUMULATIVE_ARGS next_cum;
1671 
1672   /* We must treat `__builtin_va_alist' as an anonymous arg.  */
1673 
1674   next_cum = *get_cumulative_args (args_so_far);
1675   arc_function_arg_advance (pack_cumulative_args (&next_cum), mode, type, 1);
1676   first_anon_arg = next_cum;
1677 
1678   if (first_anon_arg < MAX_ARC_PARM_REGS)
1679     {
1680       /* First anonymous (unnamed) argument is in a reg.  */
1681 
1682       /* Note that first_reg_offset < MAX_ARC_PARM_REGS.  */
1683       int first_reg_offset = first_anon_arg;
1684 
1685       if (!no_rtl)
1686 	{
1687 	  rtx regblock
1688 	    = gen_rtx_MEM (BLKmode, plus_constant (Pmode, arg_pointer_rtx,
1689 			   FIRST_PARM_OFFSET (0)));
1690 	  move_block_from_reg (first_reg_offset, regblock,
1691 			       MAX_ARC_PARM_REGS - first_reg_offset);
1692 	}
1693 
1694       *pretend_size
1695 	= ((MAX_ARC_PARM_REGS - first_reg_offset ) * UNITS_PER_WORD);
1696     }
1697 }
1698 
1699 /* Cost functions.  */
1700 
1701 /* Provide the costs of an addressing mode that contains ADDR.
1702    If ADDR is not a valid address, its cost is irrelevant.  */
1703 
1704 int
1705 arc_address_cost (rtx addr, machine_mode, addr_space_t, bool speed)
1706 {
1707   switch (GET_CODE (addr))
1708     {
1709     case REG :
1710       return speed || satisfies_constraint_Rcq (addr) ? 0 : 1;
1711     case PRE_INC: case PRE_DEC: case POST_INC: case POST_DEC:
1712     case PRE_MODIFY: case POST_MODIFY:
1713       return !speed;
1714 
1715     case LABEL_REF :
1716     case SYMBOL_REF :
1717     case CONST :
1718       /* Most likely needs a LIMM.  */
1719       return COSTS_N_INSNS (1);
1720 
1721     case PLUS :
1722       {
1723 	register rtx plus0 = XEXP (addr, 0);
1724 	register rtx plus1 = XEXP (addr, 1);
1725 
1726 	if (GET_CODE (plus0) != REG
1727 	    && (GET_CODE (plus0) != MULT
1728 		|| !CONST_INT_P (XEXP (plus0, 1))
1729 		|| (INTVAL (XEXP (plus0, 1)) != 2
1730 		    && INTVAL (XEXP (plus0, 1)) != 4)))
1731 	  break;
1732 
1733 	switch (GET_CODE (plus1))
1734 	  {
1735 	  case CONST_INT :
1736 	    return (!RTX_OK_FOR_OFFSET_P (SImode, plus1)
1737 		    ? COSTS_N_INSNS (1)
1738 		    : speed
1739 		    ? 0
1740 		    : (satisfies_constraint_Rcq (plus0)
1741 		       && satisfies_constraint_O (plus1))
1742 		    ? 0
1743 		    : 1);
1744 	  case REG:
1745 	    return (speed < 1 ? 0
1746 		    : (satisfies_constraint_Rcq (plus0)
1747 		       && satisfies_constraint_Rcq (plus1))
1748 		    ? 0 : 1);
1749 	  case CONST :
1750 	  case SYMBOL_REF :
1751 	  case LABEL_REF :
1752 	    return COSTS_N_INSNS (1);
1753 	  default:
1754 	    break;
1755 	  }
1756 	break;
1757       }
1758     default:
1759       break;
1760     }
1761 
1762   return 4;
1763 }
1764 
1765 /* Emit instruction X with the frame related bit set.  */
1766 
1767 static rtx
1768 frame_insn (rtx x)
1769 {
1770   x = emit_insn (x);
1771   RTX_FRAME_RELATED_P (x) = 1;
1772   return x;
1773 }
1774 
1775 /* Emit a frame insn to move SRC to DST.  */
1776 
1777 static rtx
1778 frame_move (rtx dst, rtx src)
1779 {
1780   return frame_insn (gen_rtx_SET (VOIDmode, dst, src));
1781 }
1782 
1783 /* Like frame_move, but add a REG_INC note for REG if ADDR contains an
1784    auto increment address, or is zero.  */
1785 
1786 static rtx
1787 frame_move_inc (rtx dst, rtx src, rtx reg, rtx addr)
1788 {
1789   rtx insn = frame_move (dst, src);
1790 
1791   if (!addr
1792       || GET_CODE (addr) == PRE_DEC || GET_CODE (addr) == POST_INC
1793       || GET_CODE (addr) == PRE_MODIFY || GET_CODE (addr) == POST_MODIFY)
1794     add_reg_note (insn, REG_INC, reg);
1795   return insn;
1796 }
1797 
1798 /* Emit a frame insn which adjusts a frame address register REG by OFFSET.  */
1799 
1800 static rtx
1801 frame_add (rtx reg, HOST_WIDE_INT offset)
1802 {
1803   gcc_assert ((offset & 0x3) == 0);
1804   if (!offset)
1805     return NULL_RTX;
1806   return frame_move (reg, plus_constant (Pmode, reg, offset));
1807 }
1808 
1809 /* Emit a frame insn which adjusts stack pointer by OFFSET.  */
1810 
1811 static rtx
1812 frame_stack_add (HOST_WIDE_INT offset)
1813 {
1814   return frame_add (stack_pointer_rtx, offset);
1815 }
1816 
1817 /* Traditionally, we push saved registers first in the prologue,
1818    then we allocate the rest of the frame - and reverse in the epilogue.
1819    This has still its merits for ease of debugging, or saving code size
1820    or even execution time if the stack frame is so large that some accesses
1821    can't be encoded anymore with offsets in the instruction code when using
1822    a different scheme.
1823    Also, it would be a good starting point if we got instructions to help
1824    with register save/restore.
1825 
1826    However, often stack frames are small, and the pushing / popping has
1827    some costs:
1828    - the stack modification prevents a lot of scheduling.
1829    - frame allocation / deallocation needs extra instructions.
1830    - unless we know that we compile ARC700 user code, we need to put
1831      a memory barrier after frame allocation / before deallocation to
1832      prevent interrupts clobbering our data in the frame.
1833      In particular, we don't have any such guarantees for library functions,
1834      which tend to, on the other hand, to have small frames.
1835 
1836    Thus, for small frames, we'd like to use a different scheme:
1837    - The frame is allocated in full with the first prologue instruction,
1838      and deallocated in full with the last epilogue instruction.
1839      Thus, the instructions in-betwen can be freely scheduled.
1840    - If the function has no outgoing arguments on the stack, we can allocate
1841      one register save slot at the top of the stack.  This register can then
1842      be saved simultanously with frame allocation, and restored with
1843      frame deallocation.
1844      This register can be picked depending on scheduling considerations,
1845      although same though should go into having some set of registers
1846      to be potentially lingering after a call, and others to be available
1847      immediately - i.e. in the absence of interprocedual optimization, we
1848      can use an ABI-like convention for register allocation to reduce
1849      stalls after function return.  */
1850 /* Function prologue/epilogue handlers.  */
1851 
1852 /* ARCompact stack frames look like:
1853 
1854            Before call                     After call
1855   high  +-----------------------+       +-----------------------+
1856   mem   |  reg parm save area   |       | reg parm save area    |
1857         |  only created for     |       | only created for      |
1858         |  variable arg fns     |       | variable arg fns      |
1859     AP  +-----------------------+       +-----------------------+
1860         |  return addr register |       | return addr register  |
1861         |  (if required)        |       | (if required)         |
1862         +-----------------------+       +-----------------------+
1863         |                       |       |                       |
1864         |  reg save area        |       | reg save area         |
1865         |                       |       |                       |
1866         +-----------------------+       +-----------------------+
1867         |  frame pointer        |       | frame pointer         |
1868         |  (if required)        |       | (if required)         |
1869     FP  +-----------------------+       +-----------------------+
1870         |                       |       |                       |
1871         |  local/temp variables |       | local/temp variables  |
1872         |                       |       |                       |
1873         +-----------------------+       +-----------------------+
1874         |                       |       |                       |
1875         |  arguments on stack   |       | arguments on stack    |
1876         |                       |       |                       |
1877     SP  +-----------------------+       +-----------------------+
1878                                         | reg parm save area    |
1879                                         | only created for      |
1880                                         | variable arg fns      |
1881                                     AP  +-----------------------+
1882                                         | return addr register  |
1883                                         | (if required)         |
1884                                         +-----------------------+
1885                                         |                       |
1886                                         | reg save area         |
1887                                         |                       |
1888                                         +-----------------------+
1889                                         | frame pointer         |
1890                                         | (if required)         |
1891                                     FP  +-----------------------+
1892                                         |                       |
1893                                         | local/temp variables  |
1894                                         |                       |
1895                                         +-----------------------+
1896                                         |                       |
1897                                         | arguments on stack    |
1898   low                                   |                       |
1899   mem                               SP  +-----------------------+
1900 
1901 Notes:
1902 1) The "reg parm save area" does not exist for non variable argument fns.
1903    The "reg parm save area" can be eliminated completely if we created our
1904    own va-arc.h, but that has tradeoffs as well (so it's not done).  */
1905 
1906 /* Structure to be filled in by arc_compute_frame_size with register
1907    save masks, and offsets for the current function.  */
1908 struct GTY (()) arc_frame_info
1909 {
1910   unsigned int total_size;	/* # bytes that the entire frame takes up.  */
1911   unsigned int extra_size;	/* # bytes of extra stuff.  */
1912   unsigned int pretend_size;	/* # bytes we push and pretend caller did.  */
1913   unsigned int args_size;	/* # bytes that outgoing arguments take up.  */
1914   unsigned int reg_size;	/* # bytes needed to store regs.  */
1915   unsigned int var_size;	/* # bytes that variables take up.  */
1916   unsigned int reg_offset;	/* Offset from new sp to store regs.  */
1917   unsigned int gmask;		/* Mask of saved gp registers.  */
1918   int          initialized;	/* Nonzero if frame size already calculated.  */
1919   short millicode_start_reg;
1920   short millicode_end_reg;
1921   bool save_return_addr;
1922 };
1923 
1924 /* Defining data structures for per-function information.  */
1925 
1926 typedef struct GTY (()) machine_function
1927 {
1928   enum arc_function_type fn_type;
1929   struct arc_frame_info frame_info;
1930   /* To keep track of unalignment caused by short insns.  */
1931   int unalign;
1932   int force_short_suffix; /* Used when disgorging return delay slot insns.  */
1933   const char *size_reason;
1934   struct arc_ccfsm ccfsm_current;
1935   /* Map from uid to ccfsm state during branch shortening.  */
1936   rtx ccfsm_current_insn;
1937   char arc_reorg_started;
1938   char prescan_initialized;
1939 } machine_function;
1940 
1941 /* Type of function DECL.
1942 
1943    The result is cached.  To reset the cache at the end of a function,
1944    call with DECL = NULL_TREE.  */
1945 
1946 enum arc_function_type
1947 arc_compute_function_type (struct function *fun)
1948 {
1949   tree decl = fun->decl;
1950   tree a;
1951   enum arc_function_type fn_type = fun->machine->fn_type;
1952 
1953   if (fn_type != ARC_FUNCTION_UNKNOWN)
1954     return fn_type;
1955 
1956   /* Assume we have a normal function (not an interrupt handler).  */
1957   fn_type = ARC_FUNCTION_NORMAL;
1958 
1959   /* Now see if this is an interrupt handler.  */
1960   for (a = DECL_ATTRIBUTES (decl);
1961        a;
1962        a = TREE_CHAIN (a))
1963     {
1964       tree name = TREE_PURPOSE (a), args = TREE_VALUE (a);
1965 
1966       if (name == get_identifier ("interrupt")
1967 	  && list_length (args) == 1
1968 	  && TREE_CODE (TREE_VALUE (args)) == STRING_CST)
1969 	{
1970 	  tree value = TREE_VALUE (args);
1971 
1972 	  if (!strcmp (TREE_STRING_POINTER (value), "ilink1"))
1973 	    fn_type = ARC_FUNCTION_ILINK1;
1974 	  else if (!strcmp (TREE_STRING_POINTER (value), "ilink2"))
1975 	    fn_type = ARC_FUNCTION_ILINK2;
1976 	  else
1977 	    gcc_unreachable ();
1978 	  break;
1979 	}
1980     }
1981 
1982   return fun->machine->fn_type = fn_type;
1983 }
1984 
1985 #define FRAME_POINTER_MASK (1 << (FRAME_POINTER_REGNUM))
1986 #define RETURN_ADDR_MASK (1 << (RETURN_ADDR_REGNUM))
1987 
1988 /* Tell prologue and epilogue if register REGNO should be saved / restored.
1989    The return address and frame pointer are treated separately.
1990    Don't consider them here.
1991    Addition for pic: The gp register needs to be saved if the current
1992    function changes it to access gotoff variables.
1993    FIXME: This will not be needed if we used some arbitrary register
1994    instead of r26.
1995 */
1996 #define MUST_SAVE_REGISTER(regno, interrupt_p) \
1997 (((regno) != RETURN_ADDR_REGNUM && (regno) != FRAME_POINTER_REGNUM \
1998   && (df_regs_ever_live_p (regno) && (!call_used_regs[regno] || interrupt_p))) \
1999  || (flag_pic && crtl->uses_pic_offset_table \
2000      && regno == PIC_OFFSET_TABLE_REGNUM) )
2001 
2002 #define MUST_SAVE_RETURN_ADDR \
2003   (cfun->machine->frame_info.save_return_addr)
2004 
2005 /* Return non-zero if there are registers to be saved or loaded using
2006    millicode thunks.  We can only use consecutive sequences starting
2007    with r13, and not going beyond r25.
2008    GMASK is a bitmask of registers to save.  This function sets
2009    FRAME->millicod_start_reg .. FRAME->millicode_end_reg to the range
2010    of registers to be saved / restored with a millicode call.  */
2011 
2012 static int
2013 arc_compute_millicode_save_restore_regs (unsigned int gmask,
2014 					 struct arc_frame_info *frame)
2015 {
2016   int regno;
2017 
2018   int start_reg = 13, end_reg = 25;
2019 
2020   for (regno = start_reg; regno <= end_reg && (gmask & (1L << regno));)
2021     regno++;
2022   end_reg = regno - 1;
2023   /* There is no point in using millicode thunks if we don't save/restore
2024      at least three registers.  For non-leaf functions we also have the
2025      blink restore.  */
2026   if (regno - start_reg >= 3 - (crtl->is_leaf == 0))
2027     {
2028       frame->millicode_start_reg = 13;
2029       frame->millicode_end_reg = regno - 1;
2030       return 1;
2031     }
2032   return 0;
2033 }
2034 
2035 /* Return the bytes needed to compute the frame pointer from the current
2036    stack pointer.
2037 
2038    SIZE is the size needed for local variables.  */
2039 
2040 unsigned int
2041 arc_compute_frame_size (int size)	/* size = # of var. bytes allocated.  */
2042 {
2043   int regno;
2044   unsigned int total_size, var_size, args_size, pretend_size, extra_size;
2045   unsigned int reg_size, reg_offset;
2046   unsigned int gmask;
2047   enum arc_function_type fn_type;
2048   int interrupt_p;
2049   struct arc_frame_info *frame_info = &cfun->machine->frame_info;
2050 
2051   size = ARC_STACK_ALIGN (size);
2052 
2053   /* 1) Size of locals and temporaries */
2054   var_size	= size;
2055 
2056   /* 2) Size of outgoing arguments */
2057   args_size	= crtl->outgoing_args_size;
2058 
2059   /* 3) Calculate space needed for saved registers.
2060      ??? We ignore the extension registers for now.  */
2061 
2062   /* See if this is an interrupt handler.  Call used registers must be saved
2063      for them too.  */
2064 
2065   reg_size = 0;
2066   gmask = 0;
2067   fn_type = arc_compute_function_type (cfun);
2068   interrupt_p = ARC_INTERRUPT_P (fn_type);
2069 
2070   for (regno = 0; regno <= 31; regno++)
2071     {
2072       if (MUST_SAVE_REGISTER (regno, interrupt_p))
2073 	{
2074 	  reg_size += UNITS_PER_WORD;
2075 	  gmask |= 1 << regno;
2076 	}
2077     }
2078 
2079   /* 4) Space for back trace data structure.
2080 	<return addr reg size> (if required) + <fp size> (if required).  */
2081   frame_info->save_return_addr
2082     = (!crtl->is_leaf || df_regs_ever_live_p (RETURN_ADDR_REGNUM));
2083   /* Saving blink reg in case of leaf function for millicode thunk calls.  */
2084   if (optimize_size && !TARGET_NO_MILLICODE_THUNK_SET)
2085     {
2086       if (arc_compute_millicode_save_restore_regs (gmask, frame_info))
2087 	frame_info->save_return_addr = true;
2088     }
2089 
2090   extra_size = 0;
2091   if (MUST_SAVE_RETURN_ADDR)
2092     extra_size = 4;
2093   if (frame_pointer_needed)
2094     extra_size += 4;
2095 
2096   /* 5) Space for variable arguments passed in registers */
2097   pretend_size	= crtl->args.pretend_args_size;
2098 
2099   /* Ensure everything before the locals is aligned appropriately.  */
2100     {
2101        unsigned int extra_plus_reg_size;
2102        unsigned int extra_plus_reg_size_aligned;
2103 
2104        extra_plus_reg_size = extra_size + reg_size;
2105        extra_plus_reg_size_aligned = ARC_STACK_ALIGN(extra_plus_reg_size);
2106        reg_size = extra_plus_reg_size_aligned - extra_size;
2107     }
2108 
2109   /* Compute total frame size.  */
2110   total_size = var_size + args_size + extra_size + pretend_size + reg_size;
2111 
2112   total_size = ARC_STACK_ALIGN (total_size);
2113 
2114   /* Compute offset of register save area from stack pointer:
2115      A5 Frame: pretend_size <blink> reg_size <fp> var_size args_size <--sp
2116   */
2117   reg_offset = (total_size - (pretend_size + reg_size + extra_size)
2118 		+ (frame_pointer_needed ? 4 : 0));
2119 
2120   /* Save computed information.  */
2121   frame_info->total_size   = total_size;
2122   frame_info->extra_size   = extra_size;
2123   frame_info->pretend_size = pretend_size;
2124   frame_info->var_size     = var_size;
2125   frame_info->args_size    = args_size;
2126   frame_info->reg_size     = reg_size;
2127   frame_info->reg_offset   = reg_offset;
2128   frame_info->gmask        = gmask;
2129   frame_info->initialized  = reload_completed;
2130 
2131   /* Ok, we're done.  */
2132   return total_size;
2133 }
2134 
2135 /* Common code to save/restore registers.  */
2136 /* BASE_REG is the base register to use for addressing and to adjust.
2137    GMASK is a bitmask of general purpose registers to save/restore.
2138    epilogue_p 0: prologue 1:epilogue 2:epilogue, sibling thunk
2139    If *FIRST_OFFSET is non-zero, add it first to BASE_REG - preferably
2140    using a pre-modify for the first memory access.  *FIRST_OFFSET is then
2141    zeroed.  */
2142 
2143 static void
2144 arc_save_restore (rtx base_reg,
2145 		  unsigned int gmask, int epilogue_p, int *first_offset)
2146 {
2147   unsigned int offset = 0;
2148   int regno;
2149   struct arc_frame_info *frame = &cfun->machine->frame_info;
2150   rtx sibthunk_insn = NULL_RTX;
2151 
2152   if (gmask)
2153     {
2154       /* Millicode thunks implementation:
2155 	 Generates calls to millicodes for registers starting from r13 to r25
2156 	 Present Limitations:
2157 	 - Only one range supported. The remaining regs will have the ordinary
2158 	   st and ld instructions for store and loads. Hence a gmask asking
2159 	   to store r13-14, r16-r25 will only generate calls to store and
2160 	   load r13 to r14 while store and load insns will be generated for
2161 	   r16 to r25 in the prologue and epilogue respectively.
2162 
2163 	 - Presently library only supports register ranges starting from r13.
2164       */
2165       if (epilogue_p == 2 || frame->millicode_end_reg > 14)
2166 	{
2167 	  int start_call = frame->millicode_start_reg;
2168 	  int end_call = frame->millicode_end_reg;
2169 	  int n_regs = end_call - start_call + 1;
2170 	  int i = 0, r, off = 0;
2171 	  rtx insn;
2172 	  rtx ret_addr = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
2173 
2174 	  if (*first_offset)
2175 	    {
2176 	      /* "reg_size" won't be more than 127 .  */
2177 	      gcc_assert (epilogue_p || abs (*first_offset) <= 127);
2178 	      frame_add (base_reg, *first_offset);
2179 	      *first_offset = 0;
2180 	    }
2181 	  insn = gen_rtx_PARALLEL
2182 		  (VOIDmode, rtvec_alloc ((epilogue_p == 2) + n_regs + 1));
2183 	  if (epilogue_p == 2)
2184 	    i += 2;
2185 	  else
2186 	    XVECEXP (insn, 0, n_regs) = gen_rtx_CLOBBER (VOIDmode, ret_addr);
2187 	  for (r = start_call; r <= end_call; r++, off += UNITS_PER_WORD, i++)
2188 	    {
2189 	      rtx reg = gen_rtx_REG (SImode, r);
2190 	      rtx mem
2191 		= gen_frame_mem (SImode, plus_constant (Pmode, base_reg, off));
2192 
2193 	      if (epilogue_p)
2194 		XVECEXP (insn, 0, i) = gen_rtx_SET (VOIDmode, reg, mem);
2195 	      else
2196 		XVECEXP (insn, 0, i) = gen_rtx_SET (VOIDmode, mem, reg);
2197 	      gmask = gmask & ~(1L << r);
2198 	    }
2199 	  if (epilogue_p == 2)
2200 	    sibthunk_insn = insn;
2201 	  else
2202 	    frame_insn (insn);
2203 	  offset += off;
2204 	}
2205 
2206       for (regno = 0; regno <= 31; regno++)
2207 	{
2208 	  if ((gmask & (1L << regno)) != 0)
2209 	    {
2210 	      rtx reg = gen_rtx_REG (SImode, regno);
2211 	      rtx addr, mem;
2212 
2213 	      if (*first_offset)
2214 		{
2215 		  gcc_assert (!offset);
2216 		  addr = plus_constant (Pmode, base_reg, *first_offset);
2217 		  addr = gen_rtx_PRE_MODIFY (Pmode, base_reg, addr);
2218 		  *first_offset = 0;
2219 		}
2220 	      else
2221 		{
2222 		  gcc_assert (SMALL_INT (offset));
2223 		  addr = plus_constant (Pmode, base_reg, offset);
2224 		}
2225 	      mem = gen_frame_mem (SImode, addr);
2226 	      if (epilogue_p)
2227 		frame_move_inc (reg, mem, base_reg, addr);
2228 	      else
2229 		frame_move_inc (mem, reg, base_reg, addr);
2230 	      offset += UNITS_PER_WORD;
2231 	    } /* if */
2232 	} /* for */
2233     }/* if */
2234   if (sibthunk_insn)
2235     {
2236       rtx r12 = gen_rtx_REG (Pmode, 12);
2237 
2238       frame_insn (gen_rtx_SET (VOIDmode, r12, GEN_INT (offset)));
2239       XVECEXP (sibthunk_insn, 0, 0) = ret_rtx;
2240       XVECEXP (sibthunk_insn, 0, 1)
2241 	= gen_rtx_SET (VOIDmode, stack_pointer_rtx,
2242 		       gen_rtx_PLUS (Pmode, stack_pointer_rtx, r12));
2243       sibthunk_insn = emit_jump_insn (sibthunk_insn);
2244       RTX_FRAME_RELATED_P (sibthunk_insn) = 1;
2245     }
2246 } /* arc_save_restore */
2247 
2248 
2249 int arc_return_address_regs[4]
2250   = {0, RETURN_ADDR_REGNUM, ILINK1_REGNUM, ILINK2_REGNUM};
2251 
2252 /* Set up the stack and frame pointer (if desired) for the function.  */
2253 
2254 void
2255 arc_expand_prologue (void)
2256 {
2257   int size = get_frame_size ();
2258   unsigned int gmask = cfun->machine->frame_info.gmask;
2259   /*  unsigned int frame_pointer_offset;*/
2260   unsigned int frame_size_to_allocate;
2261   /* (FIXME: The first store will use a PRE_MODIFY; this will usually be r13.
2262      Change the stack layout so that we rather store a high register with the
2263      PRE_MODIFY, thus enabling more short insn generation.)  */
2264   int first_offset = 0;
2265 
2266   size = ARC_STACK_ALIGN (size);
2267 
2268   /* Compute/get total frame size.  */
2269   size = (!cfun->machine->frame_info.initialized
2270 	   ? arc_compute_frame_size (size)
2271 	   : cfun->machine->frame_info.total_size);
2272 
2273   if (flag_stack_usage_info)
2274     current_function_static_stack_size = size;
2275 
2276   /* Keep track of frame size to be allocated.  */
2277   frame_size_to_allocate = size;
2278 
2279   /* These cases shouldn't happen.  Catch them now.  */
2280   gcc_assert (!(size == 0 && gmask));
2281 
2282   /* Allocate space for register arguments if this is a variadic function.  */
2283   if (cfun->machine->frame_info.pretend_size != 0)
2284     {
2285        /* Ensure pretend_size is maximum of 8 * word_size.  */
2286       gcc_assert (cfun->machine->frame_info.pretend_size <= 32);
2287 
2288       frame_stack_add (-(HOST_WIDE_INT)cfun->machine->frame_info.pretend_size);
2289       frame_size_to_allocate -= cfun->machine->frame_info.pretend_size;
2290     }
2291 
2292   /* The home-grown ABI says link register is saved first.  */
2293   if (MUST_SAVE_RETURN_ADDR)
2294     {
2295       rtx ra = gen_rtx_REG (SImode, RETURN_ADDR_REGNUM);
2296       rtx mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx));
2297 
2298       frame_move_inc (mem, ra, stack_pointer_rtx, 0);
2299       frame_size_to_allocate -= UNITS_PER_WORD;
2300 
2301     } /* MUST_SAVE_RETURN_ADDR */
2302 
2303   /* Save any needed call-saved regs (and call-used if this is an
2304      interrupt handler) for ARCompact ISA.  */
2305   if (cfun->machine->frame_info.reg_size)
2306     {
2307       first_offset = -cfun->machine->frame_info.reg_size;
2308       /* N.B. FRAME_POINTER_MASK and RETURN_ADDR_MASK are cleared in gmask.  */
2309       arc_save_restore (stack_pointer_rtx, gmask, 0, &first_offset);
2310       frame_size_to_allocate -= cfun->machine->frame_info.reg_size;
2311     }
2312 
2313 
2314   /* Save frame pointer if needed.  */
2315   if (frame_pointer_needed)
2316     {
2317       rtx addr = gen_rtx_PLUS (Pmode, stack_pointer_rtx,
2318 			       GEN_INT (-UNITS_PER_WORD + first_offset));
2319       rtx mem = gen_frame_mem (Pmode, gen_rtx_PRE_MODIFY (Pmode,
2320 							  stack_pointer_rtx,
2321 							  addr));
2322       frame_move_inc (mem, frame_pointer_rtx, stack_pointer_rtx, 0);
2323       frame_size_to_allocate -= UNITS_PER_WORD;
2324       first_offset = 0;
2325       frame_move (frame_pointer_rtx, stack_pointer_rtx);
2326     }
2327 
2328   /* ??? We don't handle the case where the saved regs are more than 252
2329      bytes away from sp.  This can be handled by decrementing sp once, saving
2330      the regs, and then decrementing it again.  The epilogue doesn't have this
2331      problem as the `ld' insn takes reg+limm values (though it would be more
2332      efficient to avoid reg+limm).  */
2333 
2334   frame_size_to_allocate -= first_offset;
2335   /* Allocate the stack frame.  */
2336   if (frame_size_to_allocate > 0)
2337     frame_stack_add ((HOST_WIDE_INT) 0 - frame_size_to_allocate);
2338 
2339   /* Setup the gp register, if needed.  */
2340   if (crtl->uses_pic_offset_table)
2341     arc_finalize_pic ();
2342 }
2343 
2344 /* Do any necessary cleanup after a function to restore stack, frame,
2345    and regs.  */
2346 
2347 void
2348 arc_expand_epilogue (int sibcall_p)
2349 {
2350   int size = get_frame_size ();
2351   enum arc_function_type fn_type = arc_compute_function_type (cfun);
2352 
2353   size = ARC_STACK_ALIGN (size);
2354   size = (!cfun->machine->frame_info.initialized
2355 	   ? arc_compute_frame_size (size)
2356 	   : cfun->machine->frame_info.total_size);
2357 
2358   unsigned int pretend_size = cfun->machine->frame_info.pretend_size;
2359   unsigned int frame_size;
2360   unsigned int size_to_deallocate;
2361   int restored;
2362   int can_trust_sp_p = !cfun->calls_alloca;
2363   int first_offset = 0;
2364   int millicode_p = cfun->machine->frame_info.millicode_end_reg > 0;
2365 
2366   size_to_deallocate = size;
2367 
2368   frame_size = size - (pretend_size +
2369 		       cfun->machine->frame_info.reg_size +
2370 		       cfun->machine->frame_info.extra_size);
2371 
2372   /* ??? There are lots of optimizations that can be done here.
2373      EG: Use fp to restore regs if it's closer.
2374      Maybe in time we'll do them all.  For now, always restore regs from
2375      sp, but don't restore sp if we don't have to.  */
2376 
2377   if (!can_trust_sp_p)
2378     gcc_assert (frame_pointer_needed);
2379 
2380   /* Restore stack pointer to the beginning of saved register area for
2381      ARCompact ISA.  */
2382   if (frame_size)
2383     {
2384       if (frame_pointer_needed)
2385 	frame_move (stack_pointer_rtx, frame_pointer_rtx);
2386       else
2387 	first_offset = frame_size;
2388       size_to_deallocate -= frame_size;
2389     }
2390   else if (!can_trust_sp_p)
2391     frame_stack_add (-frame_size);
2392 
2393 
2394   /* Restore any saved registers.  */
2395   if (frame_pointer_needed)
2396     {
2397 	  rtx addr = gen_rtx_POST_INC (Pmode, stack_pointer_rtx);
2398 
2399 	  frame_move_inc (frame_pointer_rtx, gen_frame_mem (Pmode, addr),
2400 			  stack_pointer_rtx, 0);
2401 	  size_to_deallocate -= UNITS_PER_WORD;
2402     }
2403 
2404   /* Load blink after the calls to thunk calls in case of optimize size.  */
2405   if (millicode_p)
2406     {
2407 	  int sibthunk_p = (!sibcall_p
2408 			    && fn_type == ARC_FUNCTION_NORMAL
2409 			    && !cfun->machine->frame_info.pretend_size);
2410 
2411 	  gcc_assert (!(cfun->machine->frame_info.gmask
2412 			& (FRAME_POINTER_MASK | RETURN_ADDR_MASK)));
2413 	  arc_save_restore (stack_pointer_rtx,
2414 			    cfun->machine->frame_info.gmask,
2415 			    1 + sibthunk_p, &first_offset);
2416 	  if (sibthunk_p)
2417 	    goto epilogue_done;
2418     }
2419   /* If we are to restore registers, and first_offset would require
2420      a limm to be encoded in a PRE_MODIFY, yet we can add it with a
2421      fast add to the stack pointer, do this now.  */
2422   if ((!SMALL_INT (first_offset)
2423        && cfun->machine->frame_info.gmask
2424        && ((TARGET_ARC700 && !optimize_size)
2425 	    ? first_offset <= 0x800
2426 	    : satisfies_constraint_C2a (GEN_INT (first_offset))))
2427        /* Also do this if we have both gprs and return
2428 	  address to restore, and they both would need a LIMM.  */
2429        || (MUST_SAVE_RETURN_ADDR
2430 	   && !SMALL_INT ((cfun->machine->frame_info.reg_size + first_offset) >> 2)
2431 	   && cfun->machine->frame_info.gmask))
2432     {
2433       frame_stack_add (first_offset);
2434       first_offset = 0;
2435     }
2436   if (MUST_SAVE_RETURN_ADDR)
2437     {
2438       rtx ra = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
2439       int ra_offs = cfun->machine->frame_info.reg_size + first_offset;
2440       rtx addr = plus_constant (Pmode, stack_pointer_rtx, ra_offs);
2441 
2442       /* If the load of blink would need a LIMM, but we can add
2443 	 the offset quickly to sp, do the latter.  */
2444       if (!SMALL_INT (ra_offs >> 2)
2445 	  && !cfun->machine->frame_info.gmask
2446 	  && ((TARGET_ARC700 && !optimize_size)
2447 	       ? ra_offs <= 0x800
2448 	       : satisfies_constraint_C2a (GEN_INT (ra_offs))))
2449 	{
2450 	   size_to_deallocate -= ra_offs - first_offset;
2451 	   first_offset = 0;
2452 	   frame_stack_add (ra_offs);
2453 	   ra_offs = 0;
2454 	   addr = stack_pointer_rtx;
2455 	}
2456       /* See if we can combine the load of the return address with the
2457 	 final stack adjustment.
2458 	 We need a separate load if there are still registers to
2459 	 restore.  We also want a separate load if the combined insn
2460 	 would need a limm, but a separate load doesn't.  */
2461       if (ra_offs
2462 	  && !cfun->machine->frame_info.gmask
2463 	  && (SMALL_INT (ra_offs) || !SMALL_INT (ra_offs >> 2)))
2464 	{
2465 	  addr = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, addr);
2466 	  first_offset = 0;
2467 	  size_to_deallocate -= cfun->machine->frame_info.reg_size;
2468 	}
2469       else if (!ra_offs && size_to_deallocate == UNITS_PER_WORD)
2470 	{
2471 	  addr = gen_rtx_POST_INC (Pmode, addr);
2472 	  size_to_deallocate = 0;
2473 	}
2474       frame_move_inc (ra, gen_frame_mem (Pmode, addr), stack_pointer_rtx, addr);
2475     }
2476 
2477   if (!millicode_p)
2478     {
2479        if (cfun->machine->frame_info.reg_size)
2480 	 arc_save_restore (stack_pointer_rtx,
2481 	   /* The zeroing of these two bits is unnecessary, but leave this in for clarity.  */
2482 			   cfun->machine->frame_info.gmask
2483 			   & ~(FRAME_POINTER_MASK | RETURN_ADDR_MASK), 1, &first_offset);
2484     }
2485 
2486 
2487   /* The rest of this function does the following:
2488      ARCompact    : handle epilogue_delay, restore sp (phase-2), return
2489   */
2490 
2491   /* Keep track of how much of the stack pointer we've restored.
2492      It makes the following a lot more readable.  */
2493   size_to_deallocate += first_offset;
2494   restored = size - size_to_deallocate;
2495 
2496   if (size > restored)
2497     frame_stack_add (size - restored);
2498   /* Emit the return instruction.  */
2499   if (sibcall_p == FALSE)
2500     emit_jump_insn (gen_simple_return ());
2501  epilogue_done:
2502   if (!TARGET_EPILOGUE_CFI)
2503     {
2504       rtx_insn *insn;
2505 
2506       for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2507 	RTX_FRAME_RELATED_P (insn) = 0;
2508     }
2509 }
2510 
2511 /* Return the offset relative to the stack pointer where the return address
2512    is stored, or -1 if it is not stored.  */
2513 
2514 int
2515 arc_return_slot_offset ()
2516 {
2517   struct arc_frame_info *afi = &cfun->machine->frame_info;
2518 
2519   return (afi->save_return_addr
2520 	  ? afi->total_size - afi->pretend_size - afi->extra_size : -1);
2521 }
2522 
2523 /* PIC */
2524 
2525 /* Emit special PIC prologues and epilogues.  */
2526 /* If the function has any GOTOFF relocations, then the GOTBASE
2527    register has to be setup in the prologue
2528    The instruction needed at the function start for setting up the
2529    GOTBASE register is
2530       add rdest, pc,
2531    ----------------------------------------------------------
2532    The rtl to be emitted for this should be:
2533      set (reg basereg)
2534          (plus (reg pc)
2535                (const (unspec (symref _DYNAMIC) 3)))
2536    ----------------------------------------------------------  */
2537 
2538 static void
2539 arc_finalize_pic (void)
2540 {
2541   rtx pat;
2542   rtx baseptr_rtx = gen_rtx_REG (Pmode, PIC_OFFSET_TABLE_REGNUM);
2543 
2544   if (crtl->uses_pic_offset_table == 0)
2545     return;
2546 
2547   gcc_assert (flag_pic != 0);
2548 
2549   pat = gen_rtx_SYMBOL_REF (Pmode, "_DYNAMIC");
2550   pat = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pat), ARC_UNSPEC_GOT);
2551   pat = gen_rtx_CONST (Pmode, pat);
2552 
2553   pat = gen_rtx_SET (VOIDmode, baseptr_rtx, pat);
2554 
2555   emit_insn (pat);
2556 }
2557 
2558 /* !TARGET_BARREL_SHIFTER support.  */
2559 /* Emit a shift insn to set OP0 to OP1 shifted by OP2; CODE specifies what
2560    kind of shift.  */
2561 
2562 void
2563 emit_shift (enum rtx_code code, rtx op0, rtx op1, rtx op2)
2564 {
2565   rtx shift = gen_rtx_fmt_ee (code, SImode, op1, op2);
2566   rtx pat
2567     = ((shift4_operator (shift, SImode) ?  gen_shift_si3 : gen_shift_si3_loop)
2568 	(op0, op1, op2, shift));
2569   emit_insn (pat);
2570 }
2571 
2572 /* Output the assembler code for doing a shift.
2573    We go to a bit of trouble to generate efficient code as the ARC601 only has
2574    single bit shifts.  This is taken from the h8300 port.  We only have one
2575    mode of shifting and can't access individual bytes like the h8300 can, so
2576    this is greatly simplified (at the expense of not generating hyper-
2577    efficient code).
2578 
2579    This function is not used if the variable shift insns are present.  */
2580 
2581 /* FIXME:  This probably can be done using a define_split in arc.md.
2582    Alternately, generate rtx rather than output instructions.  */
2583 
2584 const char *
2585 output_shift (rtx *operands)
2586 {
2587   /*  static int loopend_lab;*/
2588   rtx shift = operands[3];
2589   machine_mode mode = GET_MODE (shift);
2590   enum rtx_code code = GET_CODE (shift);
2591   const char *shift_one;
2592 
2593   gcc_assert (mode == SImode);
2594 
2595   switch (code)
2596     {
2597     case ASHIFT:   shift_one = "add %0,%1,%1"; break;
2598     case ASHIFTRT: shift_one = "asr %0,%1"; break;
2599     case LSHIFTRT: shift_one = "lsr %0,%1"; break;
2600     default:       gcc_unreachable ();
2601     }
2602 
2603   if (GET_CODE (operands[2]) != CONST_INT)
2604     {
2605       output_asm_insn ("and.f lp_count,%2, 0x1f", operands);
2606       goto shiftloop;
2607     }
2608   else
2609     {
2610       int n;
2611 
2612       n = INTVAL (operands[2]);
2613 
2614       /* Only consider the lower 5 bits of the shift count.  */
2615       n = n & 0x1f;
2616 
2617       /* First see if we can do them inline.  */
2618       /* ??? We could get better scheduling & shorter code (using short insns)
2619 	 by using splitters.  Alas, that'd be even more verbose.  */
2620       if (code == ASHIFT && n <= 9 && n > 2
2621 	  && dest_reg_operand (operands[4], SImode))
2622 	{
2623 	  output_asm_insn ("mov %4,0\n\tadd3 %0,%4,%1", operands);
2624 	  for (n -=3 ; n >= 3; n -= 3)
2625 	    output_asm_insn ("add3 %0,%4,%0", operands);
2626 	  if (n == 2)
2627 	    output_asm_insn ("add2 %0,%4,%0", operands);
2628 	  else if (n)
2629 	    output_asm_insn ("add %0,%0,%0", operands);
2630 	}
2631       else if (n <= 4)
2632 	{
2633 	  while (--n >= 0)
2634 	    {
2635 	      output_asm_insn (shift_one, operands);
2636 	      operands[1] = operands[0];
2637 	    }
2638 	}
2639       /* See if we can use a rotate/and.  */
2640       else if (n == BITS_PER_WORD - 1)
2641 	{
2642 	  switch (code)
2643 	    {
2644 	    case ASHIFT :
2645 	      output_asm_insn ("and %0,%1,1\n\tror %0,%0", operands);
2646 	      break;
2647 	    case ASHIFTRT :
2648 	      /* The ARC doesn't have a rol insn.  Use something else.  */
2649 	      output_asm_insn ("add.f 0,%1,%1\n\tsbc %0,%0,%0", operands);
2650 	      break;
2651 	    case LSHIFTRT :
2652 	      /* The ARC doesn't have a rol insn.  Use something else.  */
2653 	      output_asm_insn ("add.f 0,%1,%1\n\trlc %0,0", operands);
2654 	      break;
2655 	    default:
2656 	      break;
2657 	    }
2658 	}
2659       else if (n == BITS_PER_WORD - 2 && dest_reg_operand (operands[4], SImode))
2660 	{
2661 	  switch (code)
2662 	    {
2663 	    case ASHIFT :
2664 	      output_asm_insn ("and %0,%1,3\n\tror %0,%0\n\tror %0,%0", operands);
2665 	      break;
2666 	    case ASHIFTRT :
2667 #if 1 /* Need some scheduling comparisons.  */
2668 	      output_asm_insn ("add.f %4,%1,%1\n\tsbc %0,%0,%0\n\t"
2669 			       "add.f 0,%4,%4\n\trlc %0,%0", operands);
2670 #else
2671 	      output_asm_insn ("add.f %4,%1,%1\n\tbxor %0,%4,31\n\t"
2672 			       "sbc.f %0,%0,%4\n\trlc %0,%0", operands);
2673 #endif
2674 	      break;
2675 	    case LSHIFTRT :
2676 #if 1
2677 	      output_asm_insn ("add.f %4,%1,%1\n\trlc %0,0\n\t"
2678 			       "add.f 0,%4,%4\n\trlc %0,%0", operands);
2679 #else
2680 	      output_asm_insn ("add.f %0,%1,%1\n\trlc.f %0,0\n\t"
2681 			       "and %0,%0,1\n\trlc %0,%0", operands);
2682 #endif
2683 	      break;
2684 	    default:
2685 	      break;
2686 	    }
2687 	}
2688       else if (n == BITS_PER_WORD - 3 && code == ASHIFT)
2689 	output_asm_insn ("and %0,%1,7\n\tror %0,%0\n\tror %0,%0\n\tror %0,%0",
2690 			 operands);
2691       /* Must loop.  */
2692       else
2693 	{
2694 	  operands[2] = GEN_INT (n);
2695 	  output_asm_insn ("mov.f lp_count, %2", operands);
2696 
2697 	shiftloop:
2698 	    {
2699 	      output_asm_insn ("lpnz\t2f", operands);
2700 	      output_asm_insn (shift_one, operands);
2701 	      output_asm_insn ("nop", operands);
2702 	      fprintf (asm_out_file, "2:\t%s end single insn loop\n",
2703 		       ASM_COMMENT_START);
2704 	    }
2705 	}
2706     }
2707 
2708   return "";
2709 }
2710 
2711 /* Nested function support.  */
2712 
2713 /* Directly store VALUE into memory object BLOCK at OFFSET.  */
2714 
2715 static void
2716 emit_store_direct (rtx block, int offset, int value)
2717 {
2718   emit_insn (gen_store_direct (adjust_address (block, SImode, offset),
2719 			       force_reg (SImode,
2720 					  gen_int_mode (value, SImode))));
2721 }
2722 
2723 /* Emit RTL insns to initialize the variable parts of a trampoline.
2724    FNADDR is an RTX for the address of the function's pure code.
2725    CXT is an RTX for the static chain value for the function.  */
2726 /* With potentially multiple shared objects loaded, and multiple stacks
2727    present for multiple thereds where trampolines might reside, a simple
2728    range check will likely not suffice for the profiler to tell if a callee
2729    is a trampoline.  We a speedier check by making the trampoline start at
2730    an address that is not 4-byte aligned.
2731    A trampoline looks like this:
2732 
2733    nop_s	     0x78e0
2734 entry:
2735    ld_s r12,[pcl,12] 0xd403
2736    ld   r11,[pcl,12] 0x170c 700b
2737    j_s [r12]         0x7c00
2738    nop_s	     0x78e0
2739 
2740    The fastest trampoline to execute for trampolines within +-8KB of CTX
2741    would be:
2742    add2 r11,pcl,s12
2743    j [limm]           0x20200f80 limm
2744    and that would also be faster to write to the stack by computing the offset
2745    from CTX to TRAMP at compile time.  However, it would really be better to
2746    get rid of the high cost of cache invalidation when generating trampolines,
2747    which requires that the code part of trampolines stays constant, and
2748    additionally either
2749    - making sure that no executable code but trampolines is on the stack,
2750      no icache entries linger for the area of the stack from when before the
2751      stack was allocated, and allocating trampolines in trampoline-only
2752      cache lines
2753   or
2754    - allocate trampolines fram a special pool of pre-allocated trampolines.  */
2755 
2756 static void
2757 arc_initialize_trampoline (rtx tramp, tree fndecl, rtx cxt)
2758 {
2759   rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
2760 
2761   emit_store_direct (tramp, 0, TARGET_BIG_ENDIAN ? 0x78e0d403 : 0xd40378e0);
2762   emit_store_direct (tramp, 4, TARGET_BIG_ENDIAN ? 0x170c700b : 0x700b170c);
2763   emit_store_direct (tramp, 8, TARGET_BIG_ENDIAN ? 0x7c0078e0 : 0x78e07c00);
2764   emit_move_insn (adjust_address (tramp, SImode, 12), fnaddr);
2765   emit_move_insn (adjust_address (tramp, SImode, 16), cxt);
2766   emit_insn (gen_flush_icache (adjust_address (tramp, SImode, 0)));
2767 }
2768 
2769 /* Allow the profiler to easily distinguish trampolines from normal
2770   functions.  */
2771 
2772 static rtx
2773 arc_trampoline_adjust_address (rtx addr)
2774 {
2775   return plus_constant (Pmode, addr, 2);
2776 }
2777 
2778 /* This is set briefly to 1 when we output a ".as" address modifer, and then
2779    reset when we output the scaled address.  */
2780 static int output_scaled = 0;
2781 
2782 /* Print operand X (an rtx) in assembler syntax to file FILE.
2783    CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
2784    For `%' followed by punctuation, CODE is the punctuation and X is null.  */
2785 /* In final.c:output_asm_insn:
2786     'l' : label
2787     'a' : address
2788     'c' : constant address if CONSTANT_ADDRESS_P
2789     'n' : negative
2790    Here:
2791     'Z': log2(x+1)-1
2792     'z': log2
2793     'M': log2(~x)
2794     '#': condbranch delay slot suffix
2795     '*': jump delay slot suffix
2796     '?' : nonjump-insn suffix for conditional execution or short instruction
2797     '!' : jump / call suffix for conditional execution or short instruction
2798     '`': fold constant inside unary o-perator, re-recognize, and emit.
2799     'd'
2800     'D'
2801     'R': Second word
2802     'S'
2803     'B': Branch comparison operand - suppress sda reference
2804     'H': Most significant word
2805     'L': Least significant word
2806     'A': ASCII decimal representation of floating point value
2807     'U': Load/store update or scaling indicator
2808     'V': cache bypass indicator for volatile
2809     'P'
2810     'F'
2811     '^'
2812     'O': Operator
2813     'o': original symbol - no @ prepending.  */
2814 
2815 void
2816 arc_print_operand (FILE *file, rtx x, int code)
2817 {
2818   switch (code)
2819     {
2820     case 'Z':
2821       if (GET_CODE (x) == CONST_INT)
2822 	fprintf (file, "%d",exact_log2(INTVAL (x) + 1) - 1 );
2823       else
2824 	output_operand_lossage ("invalid operand to %%Z code");
2825 
2826       return;
2827 
2828     case 'z':
2829       if (GET_CODE (x) == CONST_INT)
2830 	fprintf (file, "%d",exact_log2(INTVAL (x)) );
2831       else
2832 	output_operand_lossage ("invalid operand to %%z code");
2833 
2834       return;
2835 
2836     case 'M':
2837       if (GET_CODE (x) == CONST_INT)
2838 	fprintf (file, "%d",exact_log2(~INTVAL (x)) );
2839       else
2840 	output_operand_lossage ("invalid operand to %%M code");
2841 
2842       return;
2843 
2844     case '#' :
2845       /* Conditional branches depending on condition codes.
2846 	 Note that this is only for branches that were known to depend on
2847 	 condition codes before delay slot scheduling;
2848 	 out-of-range brcc / bbit expansions should use '*'.
2849 	 This distinction is important because of the different
2850 	 allowable delay slot insns and the output of the delay suffix
2851 	 for TARGET_AT_DBR_COND_EXEC.  */
2852     case '*' :
2853       /* Unconditional branches / branches not depending on condition codes.
2854 	 This could also be a CALL_INSN.
2855 	 Output the appropriate delay slot suffix.  */
2856       if (final_sequence && final_sequence->len () != 1)
2857 	{
2858 	  rtx_insn *jump = final_sequence->insn (0);
2859 	  rtx_insn *delay = final_sequence->insn (1);
2860 
2861 	  /* For TARGET_PAD_RETURN we might have grabbed the delay insn.  */
2862 	  if (delay->deleted ())
2863 	    return;
2864 	  if (JUMP_P (jump) && INSN_ANNULLED_BRANCH_P (jump))
2865 	    fputs (INSN_FROM_TARGET_P (delay) ? ".d"
2866 		   : TARGET_AT_DBR_CONDEXEC && code == '#' ? ".d"
2867 		   : get_attr_type (jump) == TYPE_RETURN && code == '#' ? ""
2868 		   : ".nd",
2869 		   file);
2870 	  else
2871 	    fputs (".d", file);
2872 	}
2873       return;
2874     case '?' : /* with leading "." */
2875     case '!' : /* without leading "." */
2876       /* This insn can be conditionally executed.  See if the ccfsm machinery
2877 	 says it should be conditionalized.
2878 	 If it shouldn't, we'll check the compact attribute if this insn
2879 	 has a short variant, which may be used depending on code size and
2880 	 alignment considerations.  */
2881       if (current_insn_predicate)
2882 	arc_ccfsm_current.cc
2883 	  = get_arc_condition_code (current_insn_predicate);
2884       if (ARC_CCFSM_COND_EXEC_P (&arc_ccfsm_current))
2885 	{
2886 	  /* Is this insn in a delay slot sequence?  */
2887 	  if (!final_sequence || XVECLEN (final_sequence, 0) < 2
2888 	      || current_insn_predicate
2889 	      || CALL_P (final_sequence->insn (0))
2890 	      || simplejump_p (final_sequence->insn (0)))
2891 	    {
2892 	      /* This insn isn't in a delay slot sequence, or conditionalized
2893 		 independently of its position in a delay slot.  */
2894 	      fprintf (file, "%s%s",
2895 		       code == '?' ? "." : "",
2896 		       arc_condition_codes[arc_ccfsm_current.cc]);
2897 	      /* If this is a jump, there are still short variants.  However,
2898 		 only beq_s / bne_s have the same offset range as b_s,
2899 		 and the only short conditional returns are jeq_s and jne_s.  */
2900 	      if (code == '!'
2901 		  && (arc_ccfsm_current.cc == ARC_CC_EQ
2902 		      || arc_ccfsm_current.cc == ARC_CC_NE
2903 		      || 0 /* FIXME: check if branch in 7 bit range.  */))
2904 		output_short_suffix (file);
2905 	    }
2906 	  else if (code == '!') /* Jump with delay slot.  */
2907 	    fputs (arc_condition_codes[arc_ccfsm_current.cc], file);
2908 	  else /* An Instruction in a delay slot of a jump or call.  */
2909 	    {
2910 	      rtx jump = XVECEXP (final_sequence, 0, 0);
2911 	      rtx insn = XVECEXP (final_sequence, 0, 1);
2912 
2913 	      /* If the insn is annulled and is from the target path, we need
2914 		 to inverse the condition test.  */
2915 	      if (JUMP_P (jump) && INSN_ANNULLED_BRANCH_P (jump))
2916 		{
2917 		  if (INSN_FROM_TARGET_P (insn))
2918 		    fprintf (file, "%s%s",
2919 			     code == '?' ? "." : "",
2920 			     arc_condition_codes[ARC_INVERSE_CONDITION_CODE (arc_ccfsm_current.cc)]);
2921 		  else
2922 		    fprintf (file, "%s%s",
2923 			     code == '?' ? "." : "",
2924 			     arc_condition_codes[arc_ccfsm_current.cc]);
2925 		  if (arc_ccfsm_current.state == 5)
2926 		    arc_ccfsm_current.state = 0;
2927 		}
2928 	      else
2929 		/* This insn is executed for either path, so don't
2930 		   conditionalize it at all.  */
2931 		output_short_suffix (file);
2932 
2933 	    }
2934 	}
2935       else
2936 	output_short_suffix (file);
2937       return;
2938     case'`':
2939       /* FIXME: fold constant inside unary operator, re-recognize, and emit.  */
2940       gcc_unreachable ();
2941     case 'd' :
2942       fputs (arc_condition_codes[get_arc_condition_code (x)], file);
2943       return;
2944     case 'D' :
2945       fputs (arc_condition_codes[ARC_INVERSE_CONDITION_CODE
2946 				 (get_arc_condition_code (x))],
2947 	     file);
2948       return;
2949     case 'R' :
2950       /* Write second word of DImode or DFmode reference,
2951 	 register or memory.  */
2952       if (GET_CODE (x) == REG)
2953 	fputs (reg_names[REGNO (x)+1], file);
2954       else if (GET_CODE (x) == MEM)
2955 	{
2956 	  fputc ('[', file);
2957 
2958 	  /* Handle possible auto-increment.  For PRE_INC / PRE_DEC /
2959 	    PRE_MODIFY, we will have handled the first word already;
2960 	    For POST_INC / POST_DEC / POST_MODIFY, the access to the
2961 	    first word will be done later.  In either case, the access
2962 	    to the first word will do the modify, and we only have
2963 	    to add an offset of four here.  */
2964 	  if (GET_CODE (XEXP (x, 0)) == PRE_INC
2965 	      || GET_CODE (XEXP (x, 0)) == PRE_DEC
2966 	      || GET_CODE (XEXP (x, 0)) == PRE_MODIFY
2967 	      || GET_CODE (XEXP (x, 0)) == POST_INC
2968 	      || GET_CODE (XEXP (x, 0)) == POST_DEC
2969 	      || GET_CODE (XEXP (x, 0)) == POST_MODIFY)
2970 	    output_address (plus_constant (Pmode, XEXP (XEXP (x, 0), 0), 4));
2971 	  else if (output_scaled)
2972 	    {
2973 	      rtx addr = XEXP (x, 0);
2974 	      int size = GET_MODE_SIZE (GET_MODE (x));
2975 
2976 	      output_address (plus_constant (Pmode, XEXP (addr, 0),
2977 					     ((INTVAL (XEXP (addr, 1)) + 4)
2978 					      >> (size == 2 ? 1 : 2))));
2979 	      output_scaled = 0;
2980 	    }
2981 	  else
2982 	    output_address (plus_constant (Pmode, XEXP (x, 0), 4));
2983 	  fputc (']', file);
2984 	}
2985       else
2986 	output_operand_lossage ("invalid operand to %%R code");
2987       return;
2988     case 'S' :
2989 	/* FIXME: remove %S option.  */
2990 	break;
2991     case 'B' /* Branch or other LIMM ref - must not use sda references.  */ :
2992       if (CONSTANT_P (x))
2993 	{
2994 	  output_addr_const (file, x);
2995 	  return;
2996 	}
2997       break;
2998     case 'H' :
2999     case 'L' :
3000       if (GET_CODE (x) == REG)
3001 	{
3002 	  /* L = least significant word, H = most significant word.  */
3003 	  if ((WORDS_BIG_ENDIAN != 0) ^ (code == 'L'))
3004 	    fputs (reg_names[REGNO (x)], file);
3005 	  else
3006 	    fputs (reg_names[REGNO (x)+1], file);
3007 	}
3008       else if (GET_CODE (x) == CONST_INT
3009 	       || GET_CODE (x) == CONST_DOUBLE)
3010 	{
3011 	  rtx first, second;
3012 
3013 	  split_double (x, &first, &second);
3014 
3015 	  if((WORDS_BIG_ENDIAN) == 0)
3016 	      fprintf (file, "0x%08" PRIx64,
3017 		       code == 'L' ? INTVAL (first) : INTVAL (second));
3018 	  else
3019 	      fprintf (file, "0x%08" PRIx64,
3020 		       code == 'L' ? INTVAL (second) : INTVAL (first));
3021 
3022 
3023 	  }
3024       else
3025 	output_operand_lossage ("invalid operand to %%H/%%L code");
3026       return;
3027     case 'A' :
3028       {
3029 	char str[30];
3030 
3031 	gcc_assert (GET_CODE (x) == CONST_DOUBLE
3032 		    && GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT);
3033 
3034 	real_to_decimal (str, CONST_DOUBLE_REAL_VALUE (x), sizeof (str), 0, 1);
3035 	fprintf (file, "%s", str);
3036 	return;
3037       }
3038     case 'U' :
3039       /* Output a load/store with update indicator if appropriate.  */
3040       if (GET_CODE (x) == MEM)
3041 	{
3042 	  rtx addr = XEXP (x, 0);
3043 	  switch (GET_CODE (addr))
3044 	    {
3045 	    case PRE_INC: case PRE_DEC: case PRE_MODIFY:
3046 	      fputs (".a", file); break;
3047 	    case POST_INC: case POST_DEC: case POST_MODIFY:
3048 	      fputs (".ab", file); break;
3049 	    case PLUS:
3050 	      /* Are we using a scaled index?  */
3051 	      if (GET_CODE (XEXP (addr, 0)) == MULT)
3052 		fputs (".as", file);
3053 	      /* Can we use a scaled offset?  */
3054 	      else if (CONST_INT_P (XEXP (addr, 1))
3055 		       && GET_MODE_SIZE (GET_MODE (x)) > 1
3056 		       && (!(INTVAL (XEXP (addr, 1))
3057 			     & (GET_MODE_SIZE (GET_MODE (x)) - 1) & 3))
3058 		       /* Does it make a difference?  */
3059 		       && !SMALL_INT_RANGE(INTVAL (XEXP (addr, 1)),
3060 					   GET_MODE_SIZE (GET_MODE (x)) - 2, 0))
3061 		{
3062 		  fputs (".as", file);
3063 		  output_scaled = 1;
3064 		}
3065 	      break;
3066 	    case REG:
3067 	      break;
3068 	    default:
3069 	      gcc_assert (CONSTANT_P (addr)); break;
3070 	    }
3071 	}
3072       else
3073 	output_operand_lossage ("invalid operand to %%U code");
3074       return;
3075     case 'V' :
3076       /* Output cache bypass indicator for a load/store insn.  Volatile memory
3077 	 refs are defined to use the cache bypass mechanism.  */
3078       if (GET_CODE (x) == MEM)
3079 	{
3080 	  if (MEM_VOLATILE_P (x) && !TARGET_VOLATILE_CACHE_SET )
3081 	    fputs (".di", file);
3082 	}
3083       else
3084 	output_operand_lossage ("invalid operand to %%V code");
3085       return;
3086       /* plt code.  */
3087     case 'P':
3088     case 0 :
3089       /* Do nothing special.  */
3090       break;
3091     case 'F':
3092       fputs (reg_names[REGNO (x)]+1, file);
3093       return;
3094     case '^':
3095 	/* This punctuation character is needed because label references are
3096 	printed in the output template using %l. This is a front end
3097 	character, and when we want to emit a '@' before it, we have to use
3098 	this '^'.  */
3099 
3100 	fputc('@',file);
3101 	return;
3102     case 'O':
3103       /* Output an operator.  */
3104       switch (GET_CODE (x))
3105 	{
3106 	case PLUS:	fputs ("add", file); return;
3107 	case SS_PLUS:	fputs ("adds", file); return;
3108 	case AND:	fputs ("and", file); return;
3109 	case IOR:	fputs ("or", file); return;
3110 	case XOR:	fputs ("xor", file); return;
3111 	case MINUS:	fputs ("sub", file); return;
3112 	case SS_MINUS:	fputs ("subs", file); return;
3113 	case ASHIFT:	fputs ("asl", file); return;
3114 	case ASHIFTRT:	fputs ("asr", file); return;
3115 	case LSHIFTRT:	fputs ("lsr", file); return;
3116 	case ROTATERT:	fputs ("ror", file); return;
3117 	case MULT:	fputs ("mpy", file); return;
3118 	case ABS:	fputs ("abs", file); return; /* Unconditional.  */
3119 	case NEG:	fputs ("neg", file); return;
3120 	case SS_NEG:	fputs ("negs", file); return;
3121 	case NOT:	fputs ("not", file); return; /* Unconditional.  */
3122 	case ZERO_EXTEND:
3123 	  fputs ("ext", file); /* bmsk allows predication.  */
3124 	  goto size_suffix;
3125 	case SIGN_EXTEND: /* Unconditional.  */
3126 	  fputs ("sex", file);
3127 	size_suffix:
3128 	  switch (GET_MODE (XEXP (x, 0)))
3129 	    {
3130 	    case QImode: fputs ("b", file); return;
3131 	    case HImode: fputs ("w", file); return;
3132 	    default: break;
3133 	    }
3134 	  break;
3135 	case SS_TRUNCATE:
3136 	  if (GET_MODE (x) != HImode)
3137 	    break;
3138 	  fputs ("sat16", file);
3139 	default: break;
3140 	}
3141       output_operand_lossage ("invalid operand to %%O code"); return;
3142     case 'o':
3143       if (GET_CODE (x) == SYMBOL_REF)
3144 	{
3145 	  assemble_name (file, XSTR (x, 0));
3146 	  return;
3147 	}
3148       break;
3149     case '&':
3150       if (TARGET_ANNOTATE_ALIGN && cfun->machine->size_reason)
3151 	fprintf (file, "; unalign: %d", cfun->machine->unalign);
3152       return;
3153     default :
3154       /* Unknown flag.  */
3155       output_operand_lossage ("invalid operand output code");
3156     }
3157 
3158   switch (GET_CODE (x))
3159     {
3160     case REG :
3161       fputs (reg_names[REGNO (x)], file);
3162       break;
3163     case MEM :
3164       {
3165 	rtx addr = XEXP (x, 0);
3166 	int size = GET_MODE_SIZE (GET_MODE (x));
3167 
3168 	fputc ('[', file);
3169 
3170 	switch (GET_CODE (addr))
3171 	  {
3172 	  case PRE_INC: case POST_INC:
3173 	    output_address (plus_constant (Pmode, XEXP (addr, 0), size)); break;
3174 	  case PRE_DEC: case POST_DEC:
3175 	    output_address (plus_constant (Pmode, XEXP (addr, 0), -size));
3176 	    break;
3177 	  case PRE_MODIFY: case POST_MODIFY:
3178 	    output_address (XEXP (addr, 1)); break;
3179 	  case PLUS:
3180 	    if (output_scaled)
3181 	      {
3182 		output_address (plus_constant (Pmode, XEXP (addr, 0),
3183 					       (INTVAL (XEXP (addr, 1))
3184 						>> (size == 2 ? 1 : 2))));
3185 		output_scaled = 0;
3186 	      }
3187 	    else
3188 	      output_address (addr);
3189 	    break;
3190 	  default:
3191 	    if (flag_pic && CONSTANT_ADDRESS_P (addr))
3192 	      arc_output_pic_addr_const (file, addr, code);
3193 	    else
3194 	      output_address (addr);
3195 	    break;
3196 	  }
3197 	fputc (']', file);
3198 	break;
3199       }
3200     case CONST_DOUBLE :
3201       /* We handle SFmode constants here as output_addr_const doesn't.  */
3202       if (GET_MODE (x) == SFmode)
3203 	{
3204 	  REAL_VALUE_TYPE d;
3205 	  long l;
3206 
3207 	  REAL_VALUE_FROM_CONST_DOUBLE (d, x);
3208 	  REAL_VALUE_TO_TARGET_SINGLE (d, l);
3209 	  fprintf (file, "0x%08lx", l);
3210 	  break;
3211 	}
3212       /* Fall through.  Let output_addr_const deal with it.  */
3213     default :
3214       if (flag_pic)
3215 	arc_output_pic_addr_const (file, x, code);
3216       else
3217 	{
3218 	  /* FIXME: Dirty way to handle @var@sda+const. Shd be handled
3219 	     with asm_output_symbol_ref */
3220 	  if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
3221 	    {
3222 	      x = XEXP (x, 0);
3223 	      output_addr_const (file, XEXP (x, 0));
3224 	      if (GET_CODE (XEXP (x, 0)) == SYMBOL_REF && SYMBOL_REF_SMALL_P (XEXP (x, 0)))
3225 		fprintf (file, "@sda");
3226 
3227 	      if (GET_CODE (XEXP (x, 1)) != CONST_INT
3228 		  || INTVAL (XEXP (x, 1)) >= 0)
3229 		fprintf (file, "+");
3230 	      output_addr_const (file, XEXP (x, 1));
3231 	    }
3232 	  else
3233 	    output_addr_const (file, x);
3234 	}
3235       if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_SMALL_P (x))
3236 	fprintf (file, "@sda");
3237       break;
3238     }
3239 }
3240 
3241 /* Print a memory address as an operand to reference that memory location.  */
3242 
3243 void
3244 arc_print_operand_address (FILE *file , rtx addr)
3245 {
3246   register rtx base, index = 0;
3247 
3248   switch (GET_CODE (addr))
3249     {
3250     case REG :
3251       fputs (reg_names[REGNO (addr)], file);
3252       break;
3253     case SYMBOL_REF :
3254       output_addr_const (file, addr);
3255       if (SYMBOL_REF_SMALL_P (addr))
3256 	fprintf (file, "@sda");
3257       break;
3258     case PLUS :
3259       if (GET_CODE (XEXP (addr, 0)) == MULT)
3260 	index = XEXP (XEXP (addr, 0), 0), base = XEXP (addr, 1);
3261       else if (CONST_INT_P (XEXP (addr, 0)))
3262 	index = XEXP (addr, 0), base = XEXP (addr, 1);
3263       else
3264 	base = XEXP (addr, 0), index = XEXP (addr, 1);
3265 
3266       gcc_assert (OBJECT_P (base));
3267       arc_print_operand_address (file, base);
3268       if (CONSTANT_P (base) && CONST_INT_P (index))
3269 	fputc ('+', file);
3270       else
3271 	fputc (',', file);
3272       gcc_assert (OBJECT_P (index));
3273       arc_print_operand_address (file, index);
3274       break;
3275     case CONST:
3276       {
3277 	rtx c = XEXP (addr, 0);
3278 
3279 	gcc_assert (GET_CODE (XEXP (c, 0)) == SYMBOL_REF);
3280 	gcc_assert (GET_CODE (XEXP (c, 1)) == CONST_INT);
3281 
3282 	output_address(XEXP(addr,0));
3283 
3284 	break;
3285       }
3286     case PRE_INC :
3287     case PRE_DEC :
3288       /* We shouldn't get here as we've lost the mode of the memory object
3289 	 (which says how much to inc/dec by.  */
3290       gcc_unreachable ();
3291       break;
3292     default :
3293       if (flag_pic)
3294 	arc_output_pic_addr_const (file, addr, 0);
3295       else
3296 	output_addr_const (file, addr);
3297       break;
3298     }
3299 }
3300 
3301 /* Called via walk_stores.  DATA points to a hash table we can use to
3302    establish a unique SYMBOL_REF for each counter, which corresponds to
3303    a caller-callee pair.
3304    X is a store which we want to examine for an UNSPEC_PROF, which
3305    would be an address loaded into a register, or directly used in a MEM.
3306    If we found an UNSPEC_PROF, if we encounter a new counter the first time,
3307    write out a description and a data allocation for a 32 bit counter.
3308    Also, fill in the appropriate symbol_ref into each UNSPEC_PROF instance.  */
3309 
3310 static void
3311 write_profile_sections (rtx dest ATTRIBUTE_UNUSED, rtx x, void *data)
3312 {
3313   rtx *srcp, src;
3314   htab_t htab = (htab_t) data;
3315   rtx *slot;
3316 
3317   if (GET_CODE (x) != SET)
3318     return;
3319   srcp = &SET_SRC (x);
3320   if (MEM_P (*srcp))
3321     srcp = &XEXP (*srcp, 0);
3322   else if (MEM_P (SET_DEST (x)))
3323     srcp = &XEXP (SET_DEST (x), 0);
3324   src = *srcp;
3325   if (GET_CODE (src) != CONST)
3326     return;
3327   src = XEXP (src, 0);
3328   if (GET_CODE (src) != UNSPEC || XINT (src, 1) != UNSPEC_PROF)
3329     return;
3330 
3331   gcc_assert (XVECLEN (src, 0) == 3);
3332   if (!htab_elements (htab))
3333     {
3334       output_asm_insn (".section .__arc_profile_desc, \"a\"\n"
3335 		       "\t.long %0 + 1\n",
3336 		       &XVECEXP (src, 0, 0));
3337     }
3338   slot = (rtx *) htab_find_slot (htab, src, INSERT);
3339   if (*slot == HTAB_EMPTY_ENTRY)
3340     {
3341       static int count_nr;
3342       char buf[24];
3343       rtx count;
3344 
3345       *slot = src;
3346       sprintf (buf, "__prof_count%d", count_nr++);
3347       count = gen_rtx_SYMBOL_REF (Pmode, xstrdup (buf));
3348       XVECEXP (src, 0, 2) = count;
3349       output_asm_insn (".section\t.__arc_profile_desc, \"a\"\n"
3350 		       "\t.long\t%1\n"
3351 		       "\t.section\t.__arc_profile_counters, \"aw\"\n"
3352 		       "\t.type\t%o2, @object\n"
3353 		       "\t.size\t%o2, 4\n"
3354 		       "%o2:\t.zero 4",
3355 		       &XVECEXP (src, 0, 0));
3356       *srcp = count;
3357     }
3358   else
3359     *srcp = XVECEXP (*slot, 0, 2);
3360 }
3361 
3362 /* Hash function for UNSPEC_PROF htab.  Use both the caller's name and
3363    the callee's name (if known).  */
3364 
3365 static hashval_t
3366 unspec_prof_hash (const void *x)
3367 {
3368   const_rtx u = (const_rtx) x;
3369   const_rtx s1 = XVECEXP (u, 0, 1);
3370 
3371   return (htab_hash_string (XSTR (XVECEXP (u, 0, 0), 0))
3372 	  ^ (s1->code == SYMBOL_REF ? htab_hash_string (XSTR (s1, 0)) : 0));
3373 }
3374 
3375 /* Equality function for UNSPEC_PROF htab.  Two pieces of UNSPEC_PROF rtl
3376    shall refer to the same counter if both caller name and callee rtl
3377    are identical.  */
3378 
3379 static int
3380 unspec_prof_htab_eq (const void *x, const void *y)
3381 {
3382   const_rtx u0 = (const_rtx) x;
3383   const_rtx u1 = (const_rtx) y;
3384   const_rtx s01 = XVECEXP (u0, 0, 1);
3385   const_rtx s11 = XVECEXP (u1, 0, 1);
3386 
3387   return (!strcmp (XSTR (XVECEXP (u0, 0, 0), 0),
3388 		   XSTR (XVECEXP (u1, 0, 0), 0))
3389 	  && rtx_equal_p (s01, s11));
3390 }
3391 
3392 /* Conditional execution support.
3393 
3394    This is based on the ARM port but for now is much simpler.
3395 
3396    A finite state machine takes care of noticing whether or not instructions
3397    can be conditionally executed, and thus decrease execution time and code
3398    size by deleting branch instructions.  The fsm is controlled by
3399    arc_ccfsm_advance (called by arc_final_prescan_insn), and controls the
3400    actions of PRINT_OPERAND.  The patterns in the .md file for the branch
3401    insns also have a hand in this.  */
3402 /* The way we leave dealing with non-anulled or annull-false delay slot
3403    insns to the consumer is awkward.  */
3404 
3405 /* The state of the fsm controlling condition codes are:
3406    0: normal, do nothing special
3407    1: don't output this insn
3408    2: don't output this insn
3409    3: make insns conditional
3410    4: make insns conditional
3411    5: make insn conditional (only for outputting anulled delay slot insns)
3412 
3413    special value for cfun->machine->uid_ccfsm_state:
3414    6: return with but one insn before it since function start / call
3415 
3416    State transitions (state->state by whom, under what condition):
3417    0 -> 1 arc_ccfsm_advance, if insn is a conditional branch skipping over
3418           some instructions.
3419    0 -> 2 arc_ccfsm_advance, if insn is a conditional branch followed
3420           by zero or more non-jump insns and an unconditional branch with
3421 	  the same target label as the condbranch.
3422    1 -> 3 branch patterns, after having not output the conditional branch
3423    2 -> 4 branch patterns, after having not output the conditional branch
3424    0 -> 5 branch patterns, for anulled delay slot insn.
3425    3 -> 0 ASM_OUTPUT_INTERNAL_LABEL, if the `target' label is reached
3426           (the target label has CODE_LABEL_NUMBER equal to
3427 	  arc_ccfsm_target_label).
3428    4 -> 0 arc_ccfsm_advance, if `target' unconditional branch is reached
3429    3 -> 1 arc_ccfsm_advance, finding an 'else' jump skipping over some insns.
3430    5 -> 0 when outputting the delay slot insn
3431 
3432    If the jump clobbers the conditions then we use states 2 and 4.
3433 
3434    A similar thing can be done with conditional return insns.
3435 
3436    We also handle separating branches from sets of the condition code.
3437    This is done here because knowledge of the ccfsm state is required,
3438    we may not be outputting the branch.  */
3439 
3440 /* arc_final_prescan_insn calls arc_ccfsm_advance to adjust arc_ccfsm_current,
3441    before letting final output INSN.  */
3442 
3443 static void
3444 arc_ccfsm_advance (rtx_insn *insn, struct arc_ccfsm *state)
3445 {
3446   /* BODY will hold the body of INSN.  */
3447   register rtx body;
3448 
3449   /* This will be 1 if trying to repeat the trick (ie: do the `else' part of
3450      an if/then/else), and things need to be reversed.  */
3451   int reverse = 0;
3452 
3453   /* If we start with a return insn, we only succeed if we find another one.  */
3454   int seeking_return = 0;
3455 
3456   /* START_INSN will hold the insn from where we start looking.  This is the
3457      first insn after the following code_label if REVERSE is true.  */
3458   rtx_insn *start_insn = insn;
3459 
3460   /* Type of the jump_insn. Brcc insns don't affect ccfsm changes,
3461      since they don't rely on a cmp preceding the.  */
3462   enum attr_type jump_insn_type;
3463 
3464   /* Allow -mdebug-ccfsm to turn this off so we can see how well it does.
3465      We can't do this in macro FINAL_PRESCAN_INSN because its called from
3466      final_scan_insn which has `optimize' as a local.  */
3467   if (optimize < 2 || TARGET_NO_COND_EXEC)
3468     return;
3469 
3470   /* Ignore notes and labels.  */
3471   if (!INSN_P (insn))
3472     return;
3473   body = PATTERN (insn);
3474   /* If in state 4, check if the target branch is reached, in order to
3475      change back to state 0.  */
3476   if (state->state == 4)
3477     {
3478       if (insn == state->target_insn)
3479 	{
3480 	  state->target_insn = NULL;
3481 	  state->state = 0;
3482 	}
3483       return;
3484     }
3485 
3486   /* If in state 3, it is possible to repeat the trick, if this insn is an
3487      unconditional branch to a label, and immediately following this branch
3488      is the previous target label which is only used once, and the label this
3489      branch jumps to is not too far off.  Or in other words "we've done the
3490      `then' part, see if we can do the `else' part."  */
3491   if (state->state == 3)
3492     {
3493       if (simplejump_p (insn))
3494 	{
3495 	  start_insn = next_nonnote_insn (start_insn);
3496 	  if (GET_CODE (start_insn) == BARRIER)
3497 	    {
3498 	      /* ??? Isn't this always a barrier?  */
3499 	      start_insn = next_nonnote_insn (start_insn);
3500 	    }
3501 	  if (GET_CODE (start_insn) == CODE_LABEL
3502 	      && CODE_LABEL_NUMBER (start_insn) == state->target_label
3503 	      && LABEL_NUSES (start_insn) == 1)
3504 	    reverse = TRUE;
3505 	  else
3506 	    return;
3507 	}
3508       else if (GET_CODE (body) == SIMPLE_RETURN)
3509 	{
3510 	  start_insn = next_nonnote_insn (start_insn);
3511 	  if (GET_CODE (start_insn) == BARRIER)
3512 	    start_insn = next_nonnote_insn (start_insn);
3513 	  if (GET_CODE (start_insn) == CODE_LABEL
3514 	      && CODE_LABEL_NUMBER (start_insn) == state->target_label
3515 	      && LABEL_NUSES (start_insn) == 1)
3516 	    {
3517 	      reverse = TRUE;
3518 	      seeking_return = 1;
3519 	    }
3520 	  else
3521 	    return;
3522 	}
3523       else
3524 	return;
3525     }
3526 
3527   if (GET_CODE (insn) != JUMP_INSN
3528       || GET_CODE (PATTERN (insn)) == ADDR_VEC
3529       || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC)
3530     return;
3531 
3532  /* We can't predicate BRCC or loop ends.
3533     Also, when generating PIC code, and considering a medium range call,
3534     we can't predicate the call.  */
3535   jump_insn_type = get_attr_type (insn);
3536   if (jump_insn_type == TYPE_BRCC
3537       || jump_insn_type == TYPE_BRCC_NO_DELAY_SLOT
3538       || jump_insn_type == TYPE_LOOP_END
3539       || (jump_insn_type == TYPE_CALL && !get_attr_predicable (insn)))
3540     return;
3541 
3542   /* This jump might be paralleled with a clobber of the condition codes,
3543      the jump should always come first.  */
3544   if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
3545     body = XVECEXP (body, 0, 0);
3546 
3547   if (reverse
3548       || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
3549 	  && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
3550     {
3551       int insns_skipped = 0, fail = FALSE, succeed = FALSE;
3552       /* Flag which part of the IF_THEN_ELSE is the LABEL_REF.  */
3553       int then_not_else = TRUE;
3554       /* Nonzero if next insn must be the target label.  */
3555       int next_must_be_target_label_p;
3556       rtx_insn *this_insn = start_insn;
3557       rtx label = 0;
3558 
3559       /* Register the insn jumped to.  */
3560       if (reverse)
3561 	{
3562 	  if (!seeking_return)
3563 	    label = XEXP (SET_SRC (body), 0);
3564 	}
3565       else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
3566 	label = XEXP (XEXP (SET_SRC (body), 1), 0);
3567       else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
3568 	{
3569 	  label = XEXP (XEXP (SET_SRC (body), 2), 0);
3570 	  then_not_else = FALSE;
3571 	}
3572       else if (GET_CODE (XEXP (SET_SRC (body), 1)) == SIMPLE_RETURN)
3573 	seeking_return = 1;
3574       else if (GET_CODE (XEXP (SET_SRC (body), 2)) == SIMPLE_RETURN)
3575 	{
3576 	  seeking_return = 1;
3577 	  then_not_else = FALSE;
3578 	}
3579       else
3580 	gcc_unreachable ();
3581 
3582       /* If this is a non-annulled branch with a delay slot, there is
3583 	 no need to conditionalize the delay slot.  */
3584       if (NEXT_INSN (PREV_INSN (insn)) != insn
3585 	  && state->state == 0 && !INSN_ANNULLED_BRANCH_P (insn))
3586 	{
3587 	  this_insn = NEXT_INSN (this_insn);
3588 	  gcc_assert (NEXT_INSN (NEXT_INSN (PREV_INSN (start_insn)))
3589 		      == NEXT_INSN (this_insn));
3590 	}
3591       /* See how many insns this branch skips, and what kind of insns.  If all
3592 	 insns are okay, and the label or unconditional branch to the same
3593 	 label is not too far away, succeed.  */
3594       for (insns_skipped = 0, next_must_be_target_label_p = FALSE;
3595 	   !fail && !succeed && insns_skipped < MAX_INSNS_SKIPPED;
3596 	   insns_skipped++)
3597 	{
3598 	  rtx scanbody;
3599 
3600 	  this_insn = next_nonnote_insn (this_insn);
3601 	  if (!this_insn)
3602 	    break;
3603 
3604 	  if (next_must_be_target_label_p)
3605 	    {
3606 	      if (GET_CODE (this_insn) == BARRIER)
3607 		continue;
3608 	      if (GET_CODE (this_insn) == CODE_LABEL
3609 		  && this_insn == label)
3610 		{
3611 		  state->state = 1;
3612 		  succeed = TRUE;
3613 		}
3614 	      else
3615 		fail = TRUE;
3616 	      break;
3617 	    }
3618 
3619 	  scanbody = PATTERN (this_insn);
3620 
3621 	  switch (GET_CODE (this_insn))
3622 	    {
3623 	    case CODE_LABEL:
3624 	      /* Succeed if it is the target label, otherwise fail since
3625 		 control falls in from somewhere else.  */
3626 	      if (this_insn == label)
3627 		{
3628 		  state->state = 1;
3629 		  succeed = TRUE;
3630 		}
3631 	      else
3632 		fail = TRUE;
3633 	      break;
3634 
3635 	    case BARRIER:
3636 	      /* Succeed if the following insn is the target label.
3637 		 Otherwise fail.
3638 		 If return insns are used then the last insn in a function
3639 		 will be a barrier.  */
3640 	      next_must_be_target_label_p = TRUE;
3641 	      break;
3642 
3643 	    case CALL_INSN:
3644 	      /* Can handle a call insn if there are no insns after it.
3645 		 IE: The next "insn" is the target label.  We don't have to
3646 		 worry about delay slots as such insns are SEQUENCE's inside
3647 		 INSN's.  ??? It is possible to handle such insns though.  */
3648 	      if (get_attr_cond (this_insn) == COND_CANUSE)
3649 		next_must_be_target_label_p = TRUE;
3650 	      else
3651 		fail = TRUE;
3652 	      break;
3653 
3654 	    case JUMP_INSN:
3655 	      /* If this is an unconditional branch to the same label, succeed.
3656 		 If it is to another label, do nothing.  If it is conditional,
3657 		 fail.  */
3658 	      /* ??? Probably, the test for the SET and the PC are
3659 		 unnecessary.  */
3660 
3661 	      if (GET_CODE (scanbody) == SET
3662 		  && GET_CODE (SET_DEST (scanbody)) == PC)
3663 		{
3664 		  if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
3665 		      && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
3666 		    {
3667 		      state->state = 2;
3668 		      succeed = TRUE;
3669 		    }
3670 		  else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
3671 		    fail = TRUE;
3672 		  else if (get_attr_cond (this_insn) != COND_CANUSE)
3673 		    fail = TRUE;
3674 		}
3675 	      else if (GET_CODE (scanbody) == SIMPLE_RETURN
3676 		       && seeking_return)
3677 		{
3678 		  state->state = 2;
3679 		  succeed = TRUE;
3680 		}
3681 	      else if (GET_CODE (scanbody) == PARALLEL)
3682 		{
3683 		  if (get_attr_cond (this_insn) != COND_CANUSE)
3684 		    fail = TRUE;
3685 		}
3686 	      break;
3687 
3688 	    case INSN:
3689 	      /* We can only do this with insns that can use the condition
3690 		 codes (and don't set them).  */
3691 	      if (GET_CODE (scanbody) == SET
3692 		  || GET_CODE (scanbody) == PARALLEL)
3693 		{
3694 		  if (get_attr_cond (this_insn) != COND_CANUSE)
3695 		    fail = TRUE;
3696 		}
3697 	      /* We can't handle other insns like sequences.  */
3698 	      else
3699 		fail = TRUE;
3700 	      break;
3701 
3702 	    default:
3703 	      break;
3704 	    }
3705 	}
3706 
3707       if (succeed)
3708 	{
3709 	  if ((!seeking_return) && (state->state == 1 || reverse))
3710 	    state->target_label = CODE_LABEL_NUMBER (label);
3711 	  else if (seeking_return || state->state == 2)
3712 	    {
3713 	      while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
3714 		{
3715 		  this_insn = next_nonnote_insn (this_insn);
3716 
3717 		  gcc_assert (!this_insn ||
3718 			      (GET_CODE (this_insn) != BARRIER
3719 			       && GET_CODE (this_insn) != CODE_LABEL));
3720 		}
3721 	      if (!this_insn)
3722 		{
3723 		  /* Oh dear! we ran off the end, give up.  */
3724 		  extract_insn_cached (insn);
3725 		  state->state = 0;
3726 		  state->target_insn = NULL;
3727 		  return;
3728 		}
3729 	      state->target_insn = this_insn;
3730 	    }
3731 	  else
3732 	    gcc_unreachable ();
3733 
3734 	  /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
3735 	     what it was.  */
3736 	  if (!reverse)
3737 	    {
3738 	      state->cond = XEXP (SET_SRC (body), 0);
3739 	      state->cc = get_arc_condition_code (XEXP (SET_SRC (body), 0));
3740 	    }
3741 
3742 	  if (reverse || then_not_else)
3743 	    state->cc = ARC_INVERSE_CONDITION_CODE (state->cc);
3744 	}
3745 
3746       /* Restore recog_operand.  Getting the attributes of other insns can
3747 	 destroy this array, but final.c assumes that it remains intact
3748 	 across this call; since the insn has been recognized already we
3749 	 call insn_extract direct.  */
3750       extract_insn_cached (insn);
3751     }
3752 }
3753 
3754 /* Record that we are currently outputting label NUM with prefix PREFIX.
3755    It it's the label we're looking for, reset the ccfsm machinery.
3756 
3757    Called from ASM_OUTPUT_INTERNAL_LABEL.  */
3758 
3759 static void
3760 arc_ccfsm_at_label (const char *prefix, int num, struct arc_ccfsm *state)
3761 {
3762   if (state->state == 3 && state->target_label == num
3763       && !strcmp (prefix, "L"))
3764     {
3765       state->state = 0;
3766       state->target_insn = NULL;
3767     }
3768 }
3769 
3770 /* We are considering a conditional branch with the condition COND.
3771    Check if we want to conditionalize a delay slot insn, and if so modify
3772    the ccfsm state accordingly.
3773    REVERSE says branch will branch when the condition is false.  */
3774 void
3775 arc_ccfsm_record_condition (rtx cond, bool reverse, rtx_insn *jump,
3776 			    struct arc_ccfsm *state)
3777 {
3778   rtx_insn *seq_insn = NEXT_INSN (PREV_INSN (jump));
3779   if (!state)
3780     state = &arc_ccfsm_current;
3781 
3782   gcc_assert (state->state == 0);
3783   if (seq_insn != jump)
3784     {
3785       rtx insn = XVECEXP (PATTERN (seq_insn), 0, 1);
3786 
3787       if (!as_a<rtx_insn *> (insn)->deleted ()
3788 	  && INSN_ANNULLED_BRANCH_P (jump)
3789 	  && (TARGET_AT_DBR_CONDEXEC || INSN_FROM_TARGET_P (insn)))
3790 	{
3791 	  state->cond = cond;
3792 	  state->cc = get_arc_condition_code (cond);
3793 	  if (!reverse)
3794 	    arc_ccfsm_current.cc
3795 	      = ARC_INVERSE_CONDITION_CODE (state->cc);
3796 	  rtx pat = PATTERN (insn);
3797 	  if (GET_CODE (pat) == COND_EXEC)
3798 	    gcc_assert ((INSN_FROM_TARGET_P (insn)
3799 			 ? ARC_INVERSE_CONDITION_CODE (state->cc) : state->cc)
3800 			== get_arc_condition_code (XEXP (pat, 0)));
3801 	  else
3802 	    state->state = 5;
3803 	}
3804     }
3805 }
3806 
3807 /* Update *STATE as we would when we emit INSN.  */
3808 
3809 static void
3810 arc_ccfsm_post_advance (rtx_insn *insn, struct arc_ccfsm *state)
3811 {
3812   enum attr_type type;
3813 
3814   if (LABEL_P (insn))
3815     arc_ccfsm_at_label ("L", CODE_LABEL_NUMBER (insn), state);
3816   else if (JUMP_P (insn)
3817 	   && GET_CODE (PATTERN (insn)) != ADDR_VEC
3818 	   && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
3819 	   && ((type = get_attr_type (insn)) == TYPE_BRANCH
3820 	       || (type == TYPE_UNCOND_BRANCH
3821 		   /* ??? Maybe should also handle TYPE_RETURN here,
3822 		      but we don't have a testcase for that.  */
3823 		   && ARC_CCFSM_BRANCH_DELETED_P (state))))
3824     {
3825       if (ARC_CCFSM_BRANCH_DELETED_P (state))
3826 	ARC_CCFSM_RECORD_BRANCH_DELETED (state);
3827       else
3828 	{
3829 	  rtx src = SET_SRC (PATTERN (insn));
3830 	  arc_ccfsm_record_condition (XEXP (src, 0), XEXP (src, 1) == pc_rtx,
3831 				      insn, state);
3832 	}
3833     }
3834   else if (arc_ccfsm_current.state == 5)
3835     arc_ccfsm_current.state = 0;
3836 }
3837 
3838 /* Return true if the current insn, which is a conditional branch, is to be
3839    deleted.  */
3840 
3841 bool
3842 arc_ccfsm_branch_deleted_p (void)
3843 {
3844   return ARC_CCFSM_BRANCH_DELETED_P (&arc_ccfsm_current);
3845 }
3846 
3847 /* Record a branch isn't output because subsequent insns can be
3848    conditionalized.  */
3849 
3850 void
3851 arc_ccfsm_record_branch_deleted (void)
3852 {
3853   ARC_CCFSM_RECORD_BRANCH_DELETED (&arc_ccfsm_current);
3854 }
3855 
3856 /* During insn output, indicate if the current insn is predicated.  */
3857 
3858 bool
3859 arc_ccfsm_cond_exec_p (void)
3860 {
3861   return (cfun->machine->prescan_initialized
3862 	  && ARC_CCFSM_COND_EXEC_P (&arc_ccfsm_current));
3863 }
3864 
3865 /* Like next_active_insn, but return NULL if we find an ADDR_(DIFF_)VEC,
3866    and look inside SEQUENCEs.  */
3867 
3868 static rtx_insn *
3869 arc_next_active_insn (rtx_insn *insn, struct arc_ccfsm *statep)
3870 {
3871   rtx pat;
3872 
3873   do
3874     {
3875       if (statep)
3876 	arc_ccfsm_post_advance (insn, statep);
3877       insn = NEXT_INSN (insn);
3878       if (!insn || BARRIER_P (insn))
3879 	return NULL;
3880       if (statep)
3881 	arc_ccfsm_advance (insn, statep);
3882     }
3883   while (NOTE_P (insn)
3884 	 || (cfun->machine->arc_reorg_started
3885 	     && LABEL_P (insn) && !label_to_alignment (insn))
3886 	 || (NONJUMP_INSN_P (insn)
3887 	     && (GET_CODE (PATTERN (insn)) == USE
3888 		 || GET_CODE (PATTERN (insn)) == CLOBBER)));
3889   if (!LABEL_P (insn))
3890     {
3891       gcc_assert (INSN_P (insn));
3892       pat = PATTERN (insn);
3893       if (GET_CODE (pat) == ADDR_VEC || GET_CODE (pat) == ADDR_DIFF_VEC)
3894 	return NULL;
3895       if (GET_CODE (pat) == SEQUENCE)
3896 	return as_a <rtx_insn *> (XVECEXP (pat, 0, 0));
3897     }
3898   return insn;
3899 }
3900 
3901 /* When deciding if an insn should be output short, we want to know something
3902    about the following insns:
3903    - if another insn follows which we know we can output as a short insn
3904      before an alignment-sensitive point, we can output this insn short:
3905      the decision about the eventual alignment can be postponed.
3906    - if a to-be-aligned label comes next, we should output this insn such
3907      as to get / preserve 4-byte alignment.
3908    - if a likely branch without delay slot insn, or a call with an immediately
3909      following short insn comes next, we should out output this insn such as to
3910      get / preserve 2 mod 4 unalignment.
3911    - do the same for a not completely unlikely branch with a short insn
3912      following before any other branch / label.
3913    - in order to decide if we are actually looking at a branch, we need to
3914      call arc_ccfsm_advance.
3915    - in order to decide if we are looking at a short insn, we should know
3916      if it is conditionalized.  To a first order of approximation this is
3917      the case if the state from arc_ccfsm_advance from before this insn
3918      indicates the insn is conditionalized.  However, a further refinement
3919      could be to not conditionalize an insn if the destination register(s)
3920      is/are dead in the non-executed case.  */
3921 /* Return non-zero if INSN should be output as a short insn.  UNALIGN is
3922    zero if the current insn is aligned to a 4-byte-boundary, two otherwise.
3923    If CHECK_ATTR is greater than 0, check the iscompact attribute first.  */
3924 
3925 int
3926 arc_verify_short (rtx_insn *insn, int, int check_attr)
3927 {
3928   enum attr_iscompact iscompact;
3929   struct machine_function *machine;
3930 
3931   if (check_attr > 0)
3932     {
3933       iscompact = get_attr_iscompact (insn);
3934       if (iscompact == ISCOMPACT_FALSE)
3935 	return 0;
3936     }
3937   machine = cfun->machine;
3938 
3939   if (machine->force_short_suffix >= 0)
3940     return machine->force_short_suffix;
3941 
3942   return (get_attr_length (insn) & 2) != 0;
3943 }
3944 
3945 /* When outputting an instruction (alternative) that can potentially be short,
3946    output the short suffix if the insn is in fact short, and update
3947    cfun->machine->unalign accordingly.  */
3948 
3949 static void
3950 output_short_suffix (FILE *file)
3951 {
3952   rtx_insn *insn = current_output_insn;
3953 
3954   if (arc_verify_short (insn, cfun->machine->unalign, 1))
3955     {
3956       fprintf (file, "_s");
3957       cfun->machine->unalign ^= 2;
3958     }
3959   /* Restore recog_operand.  */
3960   extract_insn_cached (insn);
3961 }
3962 
3963 /* Implement FINAL_PRESCAN_INSN.  */
3964 
3965 void
3966 arc_final_prescan_insn (rtx_insn *insn, rtx *opvec ATTRIBUTE_UNUSED,
3967 			int noperands ATTRIBUTE_UNUSED)
3968 {
3969   if (TARGET_DUMPISIZE)
3970     fprintf (asm_out_file, "\n; at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
3971 
3972   /* Output a nop if necessary to prevent a hazard.
3973      Don't do this for delay slots: inserting a nop would
3974      alter semantics, and the only time we would find a hazard is for a
3975      call function result - and in that case, the hazard is spurious to
3976      start with.  */
3977   if (PREV_INSN (insn)
3978       && PREV_INSN (NEXT_INSN (insn)) == insn
3979       && arc_hazard (prev_real_insn (insn), insn))
3980     {
3981       current_output_insn =
3982 	emit_insn_before (gen_nop (), NEXT_INSN (PREV_INSN (insn)));
3983       final_scan_insn (current_output_insn, asm_out_file, optimize, 1, NULL);
3984       current_output_insn = insn;
3985     }
3986   /* Restore extraction data which might have been clobbered by arc_hazard.  */
3987   extract_constrain_insn_cached (insn);
3988 
3989   if (!cfun->machine->prescan_initialized)
3990     {
3991       /* Clear lingering state from branch shortening.  */
3992       memset (&arc_ccfsm_current, 0, sizeof arc_ccfsm_current);
3993       cfun->machine->prescan_initialized = 1;
3994     }
3995   arc_ccfsm_advance (insn, &arc_ccfsm_current);
3996 
3997   cfun->machine->size_reason = 0;
3998 }
3999 
4000 /* Given FROM and TO register numbers, say whether this elimination is allowed.
4001    Frame pointer elimination is automatically handled.
4002 
4003    All eliminations are permissible. If we need a frame
4004    pointer, we must eliminate ARG_POINTER_REGNUM into
4005    FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM.  */
4006 
4007 static bool
4008 arc_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
4009 {
4010   return to == FRAME_POINTER_REGNUM || !arc_frame_pointer_required ();
4011 }
4012 
4013 /* Define the offset between two registers, one to be eliminated, and
4014    the other its replacement, at the start of a routine.  */
4015 
4016 int
4017 arc_initial_elimination_offset (int from, int to)
4018 {
4019   if (! cfun->machine->frame_info.initialized)
4020      arc_compute_frame_size (get_frame_size ());
4021 
4022   if (from == ARG_POINTER_REGNUM && to == FRAME_POINTER_REGNUM)
4023     {
4024       return (cfun->machine->frame_info.extra_size
4025 	      + cfun->machine->frame_info.reg_size);
4026     }
4027 
4028   if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
4029     {
4030       return (cfun->machine->frame_info.total_size
4031 	      - cfun->machine->frame_info.pretend_size);
4032     }
4033 
4034   if ((from == FRAME_POINTER_REGNUM) && (to == STACK_POINTER_REGNUM))
4035     {
4036       return (cfun->machine->frame_info.total_size
4037 	      - (cfun->machine->frame_info.pretend_size
4038 	      + cfun->machine->frame_info.extra_size
4039 	      + cfun->machine->frame_info.reg_size));
4040     }
4041 
4042   gcc_unreachable ();
4043 }
4044 
4045 static bool
4046 arc_frame_pointer_required (void)
4047 {
4048  return cfun->calls_alloca;
4049 }
4050 
4051 
4052 /* Return the destination address of a branch.  */
4053 
4054 int
4055 branch_dest (rtx branch)
4056 {
4057   rtx pat = PATTERN (branch);
4058   rtx dest = (GET_CODE (pat) == PARALLEL
4059 	      ? SET_SRC (XVECEXP (pat, 0, 0)) : SET_SRC (pat));
4060   int dest_uid;
4061 
4062   if (GET_CODE (dest) == IF_THEN_ELSE)
4063     dest = XEXP (dest, XEXP (dest, 1) == pc_rtx ? 2 : 1);
4064 
4065   dest = XEXP (dest, 0);
4066   dest_uid = INSN_UID (dest);
4067 
4068   return INSN_ADDRESSES (dest_uid);
4069 }
4070 
4071 
4072 /* Implement TARGET_ENCODE_SECTION_INFO hook.  */
4073 
4074 static void
4075 arc_encode_section_info (tree decl, rtx rtl, int first)
4076 {
4077   /* For sdata, SYMBOL_FLAG_LOCAL and SYMBOL_FLAG_FUNCTION.
4078      This clears machine specific flags, so has to come first.  */
4079   default_encode_section_info (decl, rtl, first);
4080 
4081   /* Check if it is a function, and whether it has the
4082      [long/medium/short]_call attribute specified.  */
4083   if (TREE_CODE (decl) == FUNCTION_DECL)
4084     {
4085       rtx symbol = XEXP (rtl, 0);
4086       int flags = SYMBOL_REF_FLAGS (symbol);
4087 
4088       tree attr = (TREE_TYPE (decl) != error_mark_node
4089 		   ? TYPE_ATTRIBUTES (TREE_TYPE (decl)) : NULL_TREE);
4090       tree long_call_attr = lookup_attribute ("long_call", attr);
4091       tree medium_call_attr = lookup_attribute ("medium_call", attr);
4092       tree short_call_attr = lookup_attribute ("short_call", attr);
4093 
4094       if (long_call_attr != NULL_TREE)
4095 	flags |= SYMBOL_FLAG_LONG_CALL;
4096       else if (medium_call_attr != NULL_TREE)
4097 	flags |= SYMBOL_FLAG_MEDIUM_CALL;
4098       else if (short_call_attr != NULL_TREE)
4099 	flags |= SYMBOL_FLAG_SHORT_CALL;
4100 
4101       SYMBOL_REF_FLAGS (symbol) = flags;
4102     }
4103 }
4104 
4105 /* This is how to output a definition of an internal numbered label where
4106    PREFIX is the class of label and NUM is the number within the class.  */
4107 
4108 static void arc_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
4109 {
4110   if (cfun)
4111     arc_ccfsm_at_label (prefix, labelno, &arc_ccfsm_current);
4112   default_internal_label (stream, prefix, labelno);
4113 }
4114 
4115 /* Set the cpu type and print out other fancy things,
4116    at the top of the file.  */
4117 
4118 static void arc_file_start (void)
4119 {
4120   default_file_start ();
4121   fprintf (asm_out_file, "\t.cpu %s\n", arc_cpu_string);
4122 }
4123 
4124 /* Cost functions.  */
4125 
4126 /* Compute a (partial) cost for rtx X.  Return true if the complete
4127    cost has been computed, and false if subexpressions should be
4128    scanned.  In either case, *TOTAL contains the cost result.  */
4129 
4130 static bool
4131 arc_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
4132 	       int *total, bool speed)
4133 {
4134   switch (code)
4135     {
4136       /* Small integers are as cheap as registers.  */
4137     case CONST_INT:
4138       {
4139 	bool nolimm = false; /* Can we do without long immediate?  */
4140 	bool fast = false; /* Is the result available immediately?  */
4141 	bool condexec = false; /* Does this allow conditiobnal execution?  */
4142 	bool compact = false; /* Is a 16 bit opcode available?  */
4143 	/* CONDEXEC also implies that we can have an unconditional
4144 	   3-address operation.  */
4145 
4146 	nolimm = compact = condexec = false;
4147 	if (UNSIGNED_INT6 (INTVAL (x)))
4148 	  nolimm = condexec = compact = true;
4149 	else
4150 	  {
4151 	    if (SMALL_INT (INTVAL (x)))
4152 	      nolimm = fast = true;
4153 	    switch (outer_code)
4154 	      {
4155 	      case AND: /* bclr, bmsk, ext[bw] */
4156 		if (satisfies_constraint_Ccp (x) /* bclr */
4157 		    || satisfies_constraint_C1p (x) /* bmsk */)
4158 		  nolimm = fast = condexec = compact = true;
4159 		break;
4160 	      case IOR: /* bset */
4161 		if (satisfies_constraint_C0p (x)) /* bset */
4162 		  nolimm = fast = condexec = compact = true;
4163 		break;
4164 	      case XOR:
4165 		if (satisfies_constraint_C0p (x)) /* bxor */
4166 		  nolimm = fast = condexec = true;
4167 		break;
4168 	      case SET:
4169 		if (satisfies_constraint_Crr (x)) /* ror b,u6 */
4170 		  nolimm = true;
4171 	      default:
4172 		break;
4173 	      }
4174 	  }
4175 	/* FIXME: Add target options to attach a small cost if
4176 	   condexec / compact is not true.  */
4177 	if (nolimm)
4178 	  {
4179 	    *total = 0;
4180 	    return true;
4181 	  }
4182       }
4183       /* FALLTHRU */
4184 
4185       /*  4 byte values can be fetched as immediate constants -
4186 	  let's give that the cost of an extra insn.  */
4187     case CONST:
4188     case LABEL_REF:
4189     case SYMBOL_REF:
4190       *total = COSTS_N_INSNS (1);
4191       return true;
4192 
4193     case CONST_DOUBLE:
4194       {
4195 	rtx high, low;
4196 
4197 	if (TARGET_DPFP)
4198 	  {
4199 	    *total = COSTS_N_INSNS (1);
4200 	    return true;
4201 	  }
4202 	/* FIXME: correct the order of high,low */
4203 	split_double (x, &high, &low);
4204 	*total = COSTS_N_INSNS (!SMALL_INT (INTVAL (high))
4205 				+ !SMALL_INT (INTVAL (low)));
4206 	return true;
4207       }
4208 
4209     /* Encourage synth_mult to find a synthetic multiply when reasonable.
4210        If we need more than 12 insns to do a multiply, then go out-of-line,
4211        since the call overhead will be < 10% of the cost of the multiply.  */
4212     case ASHIFT:
4213     case ASHIFTRT:
4214     case LSHIFTRT:
4215       if (TARGET_BARREL_SHIFTER)
4216 	{
4217 	  /* If we want to shift a constant, we need a LIMM.  */
4218 	  /* ??? when the optimizers want to know if a constant should be
4219 	     hoisted, they ask for the cost of the constant.  OUTER_CODE is
4220 	     insufficient context for shifts since we don't know which operand
4221 	     we are looking at.  */
4222 	  if (CONSTANT_P (XEXP (x, 0)))
4223 	    {
4224 	      *total += (COSTS_N_INSNS (2)
4225 			 + rtx_cost (XEXP (x, 1), (enum rtx_code) code, 0, speed));
4226 	      return true;
4227 	    }
4228 	  *total = COSTS_N_INSNS (1);
4229 	}
4230       else if (GET_CODE (XEXP (x, 1)) != CONST_INT)
4231 	*total = COSTS_N_INSNS (16);
4232       else
4233 	{
4234 	  *total = COSTS_N_INSNS (INTVAL (XEXP ((x), 1)));
4235 	  /* ??? want_to_gcse_p can throw negative shift counts at us,
4236 	     and then panics when it gets a negative cost as result.
4237 	     Seen for gcc.c-torture/compile/20020710-1.c -Os .  */
4238 	  if (*total < 0)
4239 	    *total = 0;
4240 	}
4241       return false;
4242 
4243     case DIV:
4244     case UDIV:
4245       if (speed)
4246 	*total = COSTS_N_INSNS(30);
4247       else
4248 	*total = COSTS_N_INSNS(1);
4249 	return false;
4250 
4251     case MULT:
4252       if ((TARGET_DPFP && GET_MODE (x) == DFmode))
4253 	*total = COSTS_N_INSNS (1);
4254       else if (speed)
4255 	*total= arc_multcost;
4256       /* We do not want synth_mult sequences when optimizing
4257 	 for size.  */
4258       else if (TARGET_MUL64_SET || (TARGET_ARC700 && !TARGET_NOMPY_SET))
4259 	*total = COSTS_N_INSNS (1);
4260       else
4261 	*total = COSTS_N_INSNS (2);
4262       return false;
4263     case PLUS:
4264       if (GET_CODE (XEXP (x, 0)) == MULT
4265 	  && _2_4_8_operand (XEXP (XEXP (x, 0), 1), VOIDmode))
4266 	{
4267 	  *total += (rtx_cost (XEXP (x, 1), PLUS, 0, speed)
4268 		     + rtx_cost (XEXP (XEXP (x, 0), 0), PLUS, 1, speed));
4269 	  return true;
4270 	}
4271       return false;
4272     case MINUS:
4273       if (GET_CODE (XEXP (x, 1)) == MULT
4274 	  && _2_4_8_operand (XEXP (XEXP (x, 1), 1), VOIDmode))
4275 	{
4276 	  *total += (rtx_cost (XEXP (x, 0), PLUS, 0, speed)
4277 		     + rtx_cost (XEXP (XEXP (x, 1), 0), PLUS, 1, speed));
4278 	  return true;
4279 	}
4280       return false;
4281     case COMPARE:
4282       {
4283 	rtx op0 = XEXP (x, 0);
4284 	rtx op1 = XEXP (x, 1);
4285 
4286 	if (GET_CODE (op0) == ZERO_EXTRACT && op1 == const0_rtx
4287 	    && XEXP (op0, 1) == const1_rtx)
4288 	  {
4289 	    /* btst / bbit0 / bbit1:
4290 	       Small integers and registers are free; everything else can
4291 	       be put in a register.  */
4292 	    *total = (rtx_cost (XEXP (op0, 0), SET, 1, speed)
4293 		      + rtx_cost (XEXP (op0, 2), SET, 1, speed));
4294 	    return true;
4295 	  }
4296 	if (GET_CODE (op0) == AND && op1 == const0_rtx
4297 	    && satisfies_constraint_C1p (XEXP (op0, 1)))
4298 	  {
4299 	    /* bmsk.f */
4300 	    *total = rtx_cost (XEXP (op0, 0), SET, 1, speed);
4301 	    return true;
4302 	  }
4303 	/* add.f  */
4304 	if (GET_CODE (op1) == NEG)
4305 	  {
4306 	    /* op0 might be constant, the inside of op1 is rather
4307 	       unlikely to be so.  So swapping the operands might lower
4308 	       the cost.  */
4309 	    *total = (rtx_cost (op0, PLUS, 1, speed)
4310 		      + rtx_cost (XEXP (op1, 0), PLUS, 0, speed));
4311 	  }
4312 	return false;
4313       }
4314     case EQ: case NE:
4315       if (outer_code == IF_THEN_ELSE
4316 	  && GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
4317 	  && XEXP (x, 1) == const0_rtx
4318 	  && XEXP (XEXP (x, 0), 1) == const1_rtx)
4319 	{
4320 	  /* btst / bbit0 / bbit1:
4321 	     Small integers and registers are free; everything else can
4322 	     be put in a register.  */
4323 	  rtx op0 = XEXP (x, 0);
4324 
4325 	  *total = (rtx_cost (XEXP (op0, 0), SET, 1, speed)
4326 		    + rtx_cost (XEXP (op0, 2), SET, 1, speed));
4327 	  return true;
4328 	}
4329       /* Fall through.  */
4330     /* scc_insn expands into two insns.  */
4331     case GTU: case GEU: case LEU:
4332       if (GET_MODE (x) == SImode)
4333 	*total += COSTS_N_INSNS (1);
4334       return false;
4335     case LTU: /* might use adc.  */
4336       if (GET_MODE (x) == SImode)
4337 	*total += COSTS_N_INSNS (1) - 1;
4338       return false;
4339     default:
4340       return false;
4341     }
4342 }
4343 
4344 /* Return true if ADDR is an address that needs to be expressed as an
4345    explicit sum of pcl + offset.  */
4346 
4347 bool
4348 arc_legitimate_pc_offset_p (rtx addr)
4349 {
4350   if (GET_CODE (addr) != CONST)
4351     return false;
4352   addr = XEXP (addr, 0);
4353   if (GET_CODE (addr) == PLUS)
4354     {
4355       if (GET_CODE (XEXP (addr, 1)) != CONST_INT)
4356 	return false;
4357       addr = XEXP (addr, 0);
4358     }
4359   return (GET_CODE (addr) == UNSPEC
4360 	  && XVECLEN (addr, 0) == 1
4361 	  && XINT (addr, 1) == ARC_UNSPEC_GOT
4362 	  && GET_CODE (XVECEXP (addr, 0, 0)) == SYMBOL_REF);
4363 }
4364 
4365 /* Return true if ADDR is a valid pic address.
4366    A valid pic address on arc should look like
4367    const (unspec (SYMBOL_REF/LABEL) (ARC_UNSPEC_GOTOFF/ARC_UNSPEC_GOT))  */
4368 
4369 bool
4370 arc_legitimate_pic_addr_p (rtx addr)
4371 {
4372   if (GET_CODE (addr) == LABEL_REF)
4373     return true;
4374   if (GET_CODE (addr) != CONST)
4375     return false;
4376 
4377   addr = XEXP (addr, 0);
4378 
4379 
4380   if (GET_CODE (addr) == PLUS)
4381     {
4382       if (GET_CODE (XEXP (addr, 1)) != CONST_INT)
4383 	return false;
4384       addr = XEXP (addr, 0);
4385     }
4386 
4387   if (GET_CODE (addr) != UNSPEC
4388       || XVECLEN (addr, 0) != 1)
4389     return false;
4390 
4391   /* Must be @GOT or @GOTOFF.  */
4392   if (XINT (addr, 1) != ARC_UNSPEC_GOT
4393       && XINT (addr, 1) != ARC_UNSPEC_GOTOFF)
4394     return false;
4395 
4396   if (GET_CODE (XVECEXP (addr, 0, 0)) != SYMBOL_REF
4397       && GET_CODE (XVECEXP (addr, 0, 0)) != LABEL_REF)
4398     return false;
4399 
4400   return true;
4401 }
4402 
4403 
4404 
4405 /* Return true if OP contains a symbol reference.  */
4406 
4407 static bool
4408 symbolic_reference_mentioned_p (rtx op)
4409 {
4410   register const char *fmt;
4411   register int i;
4412 
4413   if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
4414     return true;
4415 
4416   fmt = GET_RTX_FORMAT (GET_CODE (op));
4417   for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4418     {
4419       if (fmt[i] == 'E')
4420 	{
4421 	  register int j;
4422 
4423 	  for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4424 	    if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4425 	      return true;
4426 	}
4427 
4428       else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
4429 	return true;
4430     }
4431 
4432   return false;
4433 }
4434 
4435 /* Return true if OP contains a SYMBOL_REF that is not wrapped in an unspec.
4436    If SKIP_LOCAL is true, skip symbols that bind locally.
4437    This is used further down in this file, and, without SKIP_LOCAL,
4438    in the addsi3 / subsi3 expanders when generating PIC code.  */
4439 
4440 bool
4441 arc_raw_symbolic_reference_mentioned_p (rtx op, bool skip_local)
4442 {
4443   register const char *fmt;
4444   register int i;
4445 
4446   if (GET_CODE(op) == UNSPEC)
4447     return false;
4448 
4449   if (GET_CODE (op) == SYMBOL_REF)
4450     {
4451       tree decl = SYMBOL_REF_DECL (op);
4452       return !skip_local || !decl || !default_binds_local_p (decl);
4453     }
4454 
4455   fmt = GET_RTX_FORMAT (GET_CODE (op));
4456   for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4457     {
4458       if (fmt[i] == 'E')
4459 	{
4460 	  register int j;
4461 
4462 	  for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4463 	    if (arc_raw_symbolic_reference_mentioned_p (XVECEXP (op, i, j),
4464 							skip_local))
4465 	      return true;
4466 	}
4467 
4468       else if (fmt[i] == 'e'
4469 	       && arc_raw_symbolic_reference_mentioned_p (XEXP (op, i),
4470 							  skip_local))
4471 	return true;
4472     }
4473 
4474   return false;
4475 }
4476 
4477 /* Legitimize a pic address reference in ORIG.
4478    The return value is the legitimated address.
4479    If OLDX is non-zero, it is the target to assign the address to first.  */
4480 
4481 rtx
4482 arc_legitimize_pic_address (rtx orig, rtx oldx)
4483 {
4484   rtx addr = orig;
4485   rtx pat = orig;
4486   rtx base;
4487 
4488   if (oldx == orig)
4489     oldx = NULL;
4490 
4491   if (GET_CODE (addr) == LABEL_REF)
4492     ; /* Do nothing.  */
4493   else if (GET_CODE (addr) == SYMBOL_REF
4494 	   && (CONSTANT_POOL_ADDRESS_P (addr)
4495 	       || SYMBOL_REF_LOCAL_P (addr)))
4496     {
4497       /* This symbol may be referenced via a displacement from the PIC
4498 	 base address (@GOTOFF).  */
4499 
4500       /* FIXME: if we had a way to emit pc-relative adds that don't
4501 	 create a GOT entry, we could do without the use of the gp register.  */
4502       crtl->uses_pic_offset_table = 1;
4503       pat = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), ARC_UNSPEC_GOTOFF);
4504       pat = gen_rtx_CONST (Pmode, pat);
4505       pat = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, pat);
4506 
4507       if (oldx == NULL)
4508 	oldx = gen_reg_rtx (Pmode);
4509 
4510       if (oldx != 0)
4511 	{
4512 	  emit_move_insn (oldx, pat);
4513 	  pat = oldx;
4514 	}
4515 
4516     }
4517   else if (GET_CODE (addr) == SYMBOL_REF)
4518     {
4519       /* This symbol must be referenced via a load from the
4520 	 Global Offset Table (@GOTPC).  */
4521 
4522       pat = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), ARC_UNSPEC_GOT);
4523       pat = gen_rtx_CONST (Pmode, pat);
4524       pat = gen_const_mem (Pmode, pat);
4525 
4526       if (oldx == 0)
4527 	oldx = gen_reg_rtx (Pmode);
4528 
4529       emit_move_insn (oldx, pat);
4530       pat = oldx;
4531     }
4532   else
4533     {
4534       if (GET_CODE (addr) == CONST)
4535 	{
4536 	  addr = XEXP (addr, 0);
4537 	  if (GET_CODE (addr) == UNSPEC)
4538 	    {
4539 	      /* Check that the unspec is one of the ones we generate?  */
4540 	    }
4541 	  else
4542 	    gcc_assert (GET_CODE (addr) == PLUS);
4543 	}
4544 
4545       if (GET_CODE (addr) == PLUS)
4546 	{
4547 	  rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
4548 
4549 	  /* Check first to see if this is a constant offset from a @GOTOFF
4550 	     symbol reference.  */
4551 	  if ((GET_CODE (op0) == LABEL_REF
4552 	       || (GET_CODE (op0) == SYMBOL_REF
4553 		   && (CONSTANT_POOL_ADDRESS_P (op0)
4554 		       || SYMBOL_REF_LOCAL_P (op0))))
4555 	      && GET_CODE (op1) == CONST_INT)
4556 	    {
4557 	      /* FIXME: like above, could do without gp reference.  */
4558 	      crtl->uses_pic_offset_table = 1;
4559 	      pat
4560 		= gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0), ARC_UNSPEC_GOTOFF);
4561 	      pat = gen_rtx_PLUS (Pmode, pat, op1);
4562 	      pat = gen_rtx_CONST (Pmode, pat);
4563 	      pat = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, pat);
4564 
4565 	      if (oldx != 0)
4566 		{
4567 		  emit_move_insn (oldx, pat);
4568 		  pat = oldx;
4569 		}
4570 	    }
4571 	  else
4572 	    {
4573 	      base = arc_legitimize_pic_address (XEXP (addr, 0), oldx);
4574 	      pat  = arc_legitimize_pic_address (XEXP (addr, 1),
4575 					     base == oldx ? NULL_RTX : oldx);
4576 
4577 	      if (GET_CODE (pat) == CONST_INT)
4578 		pat = plus_constant (Pmode, base, INTVAL (pat));
4579 	      else
4580 		{
4581 		  if (GET_CODE (pat) == PLUS && CONSTANT_P (XEXP (pat, 1)))
4582 		    {
4583 		      base = gen_rtx_PLUS (Pmode, base, XEXP (pat, 0));
4584 		      pat = XEXP (pat, 1);
4585 		    }
4586 		  pat = gen_rtx_PLUS (Pmode, base, pat);
4587 		}
4588 	    }
4589 	}
4590     }
4591 
4592  return pat;
4593 }
4594 
4595 /* Output address constant X to FILE, taking PIC into account.  */
4596 
4597 void
4598 arc_output_pic_addr_const (FILE * file, rtx x, int code)
4599 {
4600   char buf[256];
4601 
4602  restart:
4603   switch (GET_CODE (x))
4604     {
4605     case PC:
4606       if (flag_pic)
4607 	putc ('.', file);
4608       else
4609 	gcc_unreachable ();
4610       break;
4611 
4612     case SYMBOL_REF:
4613       output_addr_const (file, x);
4614 
4615       /* Local functions do not get references through the PLT.  */
4616       if (code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
4617 	fputs ("@plt", file);
4618       break;
4619 
4620     case LABEL_REF:
4621       ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (XEXP (x, 0)));
4622       assemble_name (file, buf);
4623       break;
4624 
4625     case CODE_LABEL:
4626       ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
4627       assemble_name (file, buf);
4628       break;
4629 
4630     case CONST_INT:
4631       fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
4632       break;
4633 
4634     case CONST:
4635       arc_output_pic_addr_const (file, XEXP (x, 0), code);
4636       break;
4637 
4638     case CONST_DOUBLE:
4639       if (GET_MODE (x) == VOIDmode)
4640 	{
4641 	  /* We can use %d if the number is one word and positive.  */
4642 	  if (CONST_DOUBLE_HIGH (x))
4643 	    fprintf (file, HOST_WIDE_INT_PRINT_DOUBLE_HEX,
4644 		     CONST_DOUBLE_HIGH (x), CONST_DOUBLE_LOW (x));
4645 	  else if  (CONST_DOUBLE_LOW (x) < 0)
4646 	    fprintf (file, HOST_WIDE_INT_PRINT_HEX, CONST_DOUBLE_LOW (x));
4647 	  else
4648 	    fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
4649 	}
4650       else
4651 	/* We can't handle floating point constants;
4652 	   PRINT_OPERAND must handle them.  */
4653 	output_operand_lossage ("floating constant misused");
4654       break;
4655 
4656     case PLUS:
4657       /* FIXME: Not needed here.  */
4658       /* Some assemblers need integer constants to appear last (eg masm).  */
4659       if (GET_CODE (XEXP (x, 0)) == CONST_INT)
4660 	{
4661 	  arc_output_pic_addr_const (file, XEXP (x, 1), code);
4662 	  fprintf (file, "+");
4663 	  arc_output_pic_addr_const (file, XEXP (x, 0), code);
4664 	}
4665       else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
4666 	{
4667 	  arc_output_pic_addr_const (file, XEXP (x, 0), code);
4668 	  if (INTVAL (XEXP (x, 1)) >= 0)
4669 	    fprintf (file, "+");
4670 	  arc_output_pic_addr_const (file, XEXP (x, 1), code);
4671 	}
4672       else
4673 	gcc_unreachable();
4674       break;
4675 
4676     case MINUS:
4677       /* Avoid outputting things like x-x or x+5-x,
4678 	 since some assemblers can't handle that.  */
4679       x = simplify_subtraction (x);
4680       if (GET_CODE (x) != MINUS)
4681 	goto restart;
4682 
4683       arc_output_pic_addr_const (file, XEXP (x, 0), code);
4684       fprintf (file, "-");
4685       if (GET_CODE (XEXP (x, 1)) == CONST_INT
4686 	  && INTVAL (XEXP (x, 1)) < 0)
4687 	{
4688 	  fprintf (file, "(");
4689 	  arc_output_pic_addr_const (file, XEXP (x, 1), code);
4690 	  fprintf (file, ")");
4691 	}
4692       else
4693 	arc_output_pic_addr_const (file, XEXP (x, 1), code);
4694       break;
4695 
4696     case ZERO_EXTEND:
4697     case SIGN_EXTEND:
4698       arc_output_pic_addr_const (file, XEXP (x, 0), code);
4699       break;
4700 
4701 
4702     case UNSPEC:
4703       gcc_assert (XVECLEN (x, 0) == 1);
4704       if (XINT (x, 1) == ARC_UNSPEC_GOT)
4705 	fputs ("pcl,", file);
4706       arc_output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
4707       switch (XINT (x, 1))
4708 	{
4709 	case ARC_UNSPEC_GOT:
4710 	  fputs ("@gotpc", file);
4711 	  break;
4712 	case ARC_UNSPEC_GOTOFF:
4713 	  fputs ("@gotoff", file);
4714 	  break;
4715 	case ARC_UNSPEC_PLT:
4716 	  fputs ("@plt", file);
4717 	  break;
4718 	default:
4719 	  output_operand_lossage ("invalid UNSPEC as operand: %d", XINT (x,1));
4720 	  break;
4721 	}
4722        break;
4723 
4724     default:
4725       output_operand_lossage ("invalid expression as operand");
4726     }
4727 }
4728 
4729 #define SYMBOLIC_CONST(X)	\
4730 (GET_CODE (X) == SYMBOL_REF						\
4731  || GET_CODE (X) == LABEL_REF						\
4732  || (GET_CODE (X) == CONST && symbolic_reference_mentioned_p (X)))
4733 
4734 /* Emit insns to move operands[1] into operands[0].  */
4735 
4736 void
4737 emit_pic_move (rtx *operands, machine_mode)
4738 {
4739   rtx temp = reload_in_progress ? operands[0] : gen_reg_rtx (Pmode);
4740 
4741   if (GET_CODE (operands[0]) == MEM && SYMBOLIC_CONST (operands[1]))
4742     operands[1] = force_reg (Pmode, operands[1]);
4743   else
4744     operands[1] = arc_legitimize_pic_address (operands[1], temp);
4745 }
4746 
4747 
4748 /* The function returning the number of words, at the beginning of an
4749    argument, must be put in registers.  The returned value must be
4750    zero for arguments that are passed entirely in registers or that
4751    are entirely pushed on the stack.
4752 
4753    On some machines, certain arguments must be passed partially in
4754    registers and partially in memory.  On these machines, typically
4755    the first N words of arguments are passed in registers, and the
4756    rest on the stack.  If a multi-word argument (a `double' or a
4757    structure) crosses that boundary, its first few words must be
4758    passed in registers and the rest must be pushed.  This function
4759    tells the compiler when this occurs, and how many of the words
4760    should go in registers.
4761 
4762    `FUNCTION_ARG' for these arguments should return the first register
4763    to be used by the caller for this argument; likewise
4764    `FUNCTION_INCOMING_ARG', for the called function.
4765 
4766    The function is used to implement macro FUNCTION_ARG_PARTIAL_NREGS.  */
4767 
4768 /* If REGNO is the least arg reg available then what is the total number of arg
4769    regs available.  */
4770 #define GPR_REST_ARG_REGS(REGNO) \
4771   ((REGNO) <= MAX_ARC_PARM_REGS ? MAX_ARC_PARM_REGS - (REGNO) : 0 )
4772 
4773 /* Since arc parm regs are contiguous.  */
4774 #define ARC_NEXT_ARG_REG(REGNO) ( (REGNO) + 1 )
4775 
4776 /* Implement TARGET_ARG_PARTIAL_BYTES.  */
4777 
4778 static int
4779 arc_arg_partial_bytes (cumulative_args_t cum_v, machine_mode mode,
4780 		       tree type, bool named ATTRIBUTE_UNUSED)
4781 {
4782   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
4783   int bytes = (mode == BLKmode
4784 	       ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode));
4785   int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4786   int arg_num = *cum;
4787   int ret;
4788 
4789   arg_num = ROUND_ADVANCE_CUM (arg_num, mode, type);
4790   ret = GPR_REST_ARG_REGS (arg_num);
4791 
4792   /* ICEd at function.c:2361, and ret is copied to data->partial */
4793     ret = (ret >= words ? 0 : ret * UNITS_PER_WORD);
4794 
4795   return ret;
4796 }
4797 
4798 
4799 
4800 /* This function is used to control a function argument is passed in a
4801    register, and which register.
4802 
4803    The arguments are CUM, of type CUMULATIVE_ARGS, which summarizes
4804    (in a way defined by INIT_CUMULATIVE_ARGS and FUNCTION_ARG_ADVANCE)
4805    all of the previous arguments so far passed in registers; MODE, the
4806    machine mode of the argument; TYPE, the data type of the argument
4807    as a tree node or 0 if that is not known (which happens for C
4808    support library functions); and NAMED, which is 1 for an ordinary
4809    argument and 0 for nameless arguments that correspond to `...' in
4810    the called function's prototype.
4811 
4812    The returned value should either be a `reg' RTX for the hard
4813    register in which to pass the argument, or zero to pass the
4814    argument on the stack.
4815 
4816    For machines like the Vax and 68000, where normally all arguments
4817    are pushed, zero suffices as a definition.
4818 
4819    The usual way to make the ANSI library `stdarg.h' work on a machine
4820    where some arguments are usually passed in registers, is to cause
4821    nameless arguments to be passed on the stack instead.  This is done
4822    by making the function return 0 whenever NAMED is 0.
4823 
4824    You may use the macro `MUST_PASS_IN_STACK (MODE, TYPE)' in the
4825    definition of this function to determine if this argument is of a
4826    type that must be passed in the stack.  If `REG_PARM_STACK_SPACE'
4827    is not defined and the function returns non-zero for such an
4828    argument, the compiler will abort.  If `REG_PARM_STACK_SPACE' is
4829    defined, the argument will be computed in the stack and then loaded
4830    into a register.
4831 
4832    The function is used to implement macro FUNCTION_ARG.  */
4833 /* On the ARC the first MAX_ARC_PARM_REGS args are normally in registers
4834    and the rest are pushed.  */
4835 
4836 static rtx
4837 arc_function_arg (cumulative_args_t cum_v, machine_mode mode,
4838 		  const_tree type ATTRIBUTE_UNUSED, bool named ATTRIBUTE_UNUSED)
4839 {
4840   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
4841   int arg_num = *cum;
4842   rtx ret;
4843   const char *debstr ATTRIBUTE_UNUSED;
4844 
4845   arg_num = ROUND_ADVANCE_CUM (arg_num, mode, type);
4846   /* Return a marker for use in the call instruction.  */
4847   if (mode == VOIDmode)
4848     {
4849       ret = const0_rtx;
4850       debstr = "<0>";
4851     }
4852   else if (GPR_REST_ARG_REGS (arg_num) > 0)
4853     {
4854       ret = gen_rtx_REG (mode, arg_num);
4855       debstr = reg_names [arg_num];
4856     }
4857   else
4858     {
4859       ret = NULL_RTX;
4860       debstr = "memory";
4861     }
4862   return ret;
4863 }
4864 
4865 /* The function to update the summarizer variable *CUM to advance past
4866    an argument in the argument list.  The values MODE, TYPE and NAMED
4867    describe that argument.  Once this is done, the variable *CUM is
4868    suitable for analyzing the *following* argument with
4869    `FUNCTION_ARG', etc.
4870 
4871    This function need not do anything if the argument in question was
4872    passed on the stack.  The compiler knows how to track the amount of
4873    stack space used for arguments without any special help.
4874 
4875    The function is used to implement macro FUNCTION_ARG_ADVANCE.  */
4876 /* For the ARC: the cum set here is passed on to function_arg where we
4877    look at its value and say which reg to use. Strategy: advance the
4878    regnumber here till we run out of arg regs, then set *cum to last
4879    reg. In function_arg, since *cum > last arg reg we would return 0
4880    and thus the arg will end up on the stack. For straddling args of
4881    course function_arg_partial_nregs will come into play.  */
4882 
4883 static void
4884 arc_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
4885 			  const_tree type, bool named ATTRIBUTE_UNUSED)
4886 {
4887   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
4888   int bytes = (mode == BLKmode
4889 	       ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode));
4890   int words = (bytes + UNITS_PER_WORD  - 1) / UNITS_PER_WORD;
4891   int i;
4892 
4893   if (words)
4894     *cum = ROUND_ADVANCE_CUM (*cum, mode, type);
4895   for (i = 0; i < words; i++)
4896     *cum = ARC_NEXT_ARG_REG (*cum);
4897 
4898 }
4899 
4900 /* Define how to find the value returned by a function.
4901    VALTYPE is the data type of the value (as a tree).
4902    If the precise function being called is known, FN_DECL_OR_TYPE is its
4903    FUNCTION_DECL; otherwise, FN_DECL_OR_TYPE is its type.  */
4904 
4905 static rtx
4906 arc_function_value (const_tree valtype,
4907 		    const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
4908 		    bool outgoing ATTRIBUTE_UNUSED)
4909 {
4910   machine_mode mode = TYPE_MODE (valtype);
4911   int unsignedp ATTRIBUTE_UNUSED;
4912 
4913   unsignedp = TYPE_UNSIGNED (valtype);
4914   if (INTEGRAL_TYPE_P (valtype) || TREE_CODE (valtype) == OFFSET_TYPE)
4915     PROMOTE_MODE (mode, unsignedp, valtype);
4916   return gen_rtx_REG (mode, 0);
4917 }
4918 
4919 /* Returns the return address that is used by builtin_return_address.  */
4920 
4921 rtx
4922 arc_return_addr_rtx (int count, ATTRIBUTE_UNUSED rtx frame)
4923 {
4924   if (count != 0)
4925     return const0_rtx;
4926 
4927   return get_hard_reg_initial_val (Pmode , RETURN_ADDR_REGNUM);
4928 }
4929 
4930 /* Nonzero if the constant value X is a legitimate general operand
4931    when generating PIC code.  It is given that flag_pic is on and
4932    that X satisfies CONSTANT_P or is a CONST_DOUBLE.  */
4933 
4934 bool
4935 arc_legitimate_pic_operand_p (rtx x)
4936 {
4937   return !arc_raw_symbolic_reference_mentioned_p (x, true);
4938 }
4939 
4940 /* Determine if a given RTX is a valid constant.  We already know this
4941    satisfies CONSTANT_P.  */
4942 
4943 bool
4944 arc_legitimate_constant_p (machine_mode, rtx x)
4945 {
4946   if (!flag_pic)
4947     return true;
4948 
4949   switch (GET_CODE (x))
4950     {
4951     case CONST:
4952       x = XEXP (x, 0);
4953 
4954       if (GET_CODE (x) == PLUS)
4955 	{
4956 	  if (GET_CODE (XEXP (x, 1)) != CONST_INT)
4957 	    return false;
4958 	  x = XEXP (x, 0);
4959 	}
4960 
4961       /* Only some unspecs are valid as "constants".  */
4962       if (GET_CODE (x) == UNSPEC)
4963 	switch (XINT (x, 1))
4964 	  {
4965 	  case ARC_UNSPEC_PLT:
4966 	  case ARC_UNSPEC_GOTOFF:
4967 	  case ARC_UNSPEC_GOT:
4968 	  case UNSPEC_PROF:
4969 	    return true;
4970 
4971 	  default:
4972 	    gcc_unreachable ();
4973 	  }
4974 
4975       /* We must have drilled down to a symbol.  */
4976       if (arc_raw_symbolic_reference_mentioned_p (x, false))
4977 	return false;
4978 
4979       /* Return true.  */
4980       break;
4981 
4982     case LABEL_REF:
4983     case SYMBOL_REF:
4984       return false;
4985 
4986     default:
4987       break;
4988     }
4989 
4990   /* Otherwise we handle everything else in the move patterns.  */
4991   return true;
4992 }
4993 
4994 static bool
4995 arc_legitimate_address_p (machine_mode mode, rtx x, bool strict)
4996 {
4997   if (RTX_OK_FOR_BASE_P (x, strict))
4998      return true;
4999   if (LEGITIMATE_OFFSET_ADDRESS_P (mode, x, TARGET_INDEXED_LOADS, strict))
5000      return true;
5001   if (LEGITIMATE_SCALED_ADDRESS_P (mode, x, strict))
5002     return true;
5003   if (LEGITIMATE_SMALL_DATA_ADDRESS_P (x))
5004      return true;
5005   if (GET_CODE (x) == CONST_INT && LARGE_INT (INTVAL (x)))
5006      return true;
5007   if ((GET_MODE_SIZE (mode) != 16)
5008       && (GET_CODE (x) == SYMBOL_REF
5009 	  || GET_CODE (x) == LABEL_REF
5010 	  || GET_CODE (x) == CONST))
5011     {
5012       if (!flag_pic || arc_legitimate_pic_addr_p (x))
5013 	return true;
5014     }
5015   if ((GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC
5016        || GET_CODE (x) == POST_DEC || GET_CODE (x) == POST_INC)
5017       && RTX_OK_FOR_BASE_P (XEXP (x, 0), strict))
5018     return true;
5019       /* We're restricted here by the `st' insn.  */
5020   if ((GET_CODE (x) == PRE_MODIFY || GET_CODE (x) == POST_MODIFY)
5021       && GET_CODE (XEXP ((x), 1)) == PLUS
5022       && rtx_equal_p (XEXP ((x), 0), XEXP (XEXP (x, 1), 0))
5023       && LEGITIMATE_OFFSET_ADDRESS_P (QImode, XEXP (x, 1),
5024 				      TARGET_AUTO_MODIFY_REG, strict))
5025     return true;
5026   return false;
5027 }
5028 
5029 /* Return true iff ADDR (a legitimate address expression)
5030    has an effect that depends on the machine mode it is used for.  */
5031 
5032 static bool
5033 arc_mode_dependent_address_p (const_rtx addr, addr_space_t)
5034 {
5035   /* SYMBOL_REF is not mode dependent: it is either a small data reference,
5036      which is valid for loads and stores, or a limm offset, which is valid for
5037      loads.  */
5038   /* Scaled indices are scaled by the access mode; likewise for scaled
5039      offsets, which are needed for maximum offset stores.  */
5040   if (GET_CODE (addr) == PLUS
5041       && (GET_CODE (XEXP ((addr), 0)) == MULT
5042 	  || (CONST_INT_P (XEXP ((addr), 1))
5043 	      && !SMALL_INT (INTVAL (XEXP ((addr), 1))))))
5044     return true;
5045   return false;
5046 }
5047 
5048 /* Determine if it's legal to put X into the constant pool.  */
5049 
5050 static bool
5051 arc_cannot_force_const_mem (machine_mode mode, rtx x)
5052 {
5053   return !arc_legitimate_constant_p (mode, x);
5054 }
5055 
5056 
5057 /* Generic function to define a builtin.  */
5058 #define def_mbuiltin(MASK, NAME, TYPE, CODE)				\
5059   do									\
5060     {									\
5061        if (MASK)							\
5062 	  add_builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, NULL, NULL_TREE); \
5063     }									\
5064   while (0)
5065 
5066 
5067 static void
5068 arc_init_builtins (void)
5069 {
5070     tree endlink = void_list_node;
5071 
5072     tree void_ftype_void
5073 	= build_function_type (void_type_node,
5074 			       endlink);
5075 
5076     tree int_ftype_int
5077 	= build_function_type (integer_type_node,
5078 			   tree_cons (NULL_TREE, integer_type_node, endlink));
5079 
5080     tree pcvoid_type_node
5081 	= build_pointer_type (build_qualified_type (void_type_node, TYPE_QUAL_CONST));
5082     tree int_ftype_pcvoid_int
5083 	= build_function_type (integer_type_node,
5084 			   tree_cons (NULL_TREE, pcvoid_type_node,
5085 			       tree_cons (NULL_TREE, integer_type_node,
5086 				    endlink)));
5087 
5088     tree int_ftype_short_int
5089 	= build_function_type (integer_type_node,
5090 			       tree_cons (NULL_TREE, short_integer_type_node, endlink));
5091 
5092     tree void_ftype_int_int
5093 	= build_function_type (void_type_node,
5094 			       tree_cons (NULL_TREE, integer_type_node,
5095 					  tree_cons (NULL_TREE, integer_type_node, endlink)));
5096     tree void_ftype_usint_usint
5097 	= build_function_type (void_type_node,
5098 			       tree_cons (NULL_TREE, long_unsigned_type_node,
5099 					  tree_cons (NULL_TREE, long_unsigned_type_node, endlink)));
5100 
5101     tree int_ftype_int_int
5102 	= build_function_type (integer_type_node,
5103 			       tree_cons (NULL_TREE, integer_type_node,
5104 					  tree_cons (NULL_TREE, integer_type_node, endlink)));
5105 
5106     tree usint_ftype_usint
5107 	= build_function_type (long_unsigned_type_node,
5108 			   tree_cons (NULL_TREE, long_unsigned_type_node, endlink));
5109 
5110     tree void_ftype_usint
5111 	= build_function_type (void_type_node,
5112 			   tree_cons (NULL_TREE, long_unsigned_type_node, endlink));
5113 
5114     /* Add the builtins.  */
5115     def_mbuiltin (1,"__builtin_arc_nop", void_ftype_void, ARC_BUILTIN_NOP);
5116     def_mbuiltin (TARGET_NORM, "__builtin_arc_norm", int_ftype_int, ARC_BUILTIN_NORM);
5117     def_mbuiltin (TARGET_NORM, "__builtin_arc_normw", int_ftype_short_int, ARC_BUILTIN_NORMW);
5118     def_mbuiltin (TARGET_SWAP, "__builtin_arc_swap", int_ftype_int, ARC_BUILTIN_SWAP);
5119     def_mbuiltin (TARGET_MUL64_SET,"__builtin_arc_mul64", void_ftype_int_int, ARC_BUILTIN_MUL64);
5120     def_mbuiltin (TARGET_MUL64_SET,"__builtin_arc_mulu64", void_ftype_usint_usint, ARC_BUILTIN_MULU64);
5121     def_mbuiltin (1,"__builtin_arc_rtie", void_ftype_void, ARC_BUILTIN_RTIE);
5122     def_mbuiltin (TARGET_ARC700,"__builtin_arc_sync", void_ftype_void, ARC_BUILTIN_SYNC);
5123     def_mbuiltin ((TARGET_EA_SET),"__builtin_arc_divaw", int_ftype_int_int, ARC_BUILTIN_DIVAW);
5124     def_mbuiltin (1,"__builtin_arc_brk", void_ftype_void, ARC_BUILTIN_BRK);
5125     def_mbuiltin (1,"__builtin_arc_flag", void_ftype_usint, ARC_BUILTIN_FLAG);
5126     def_mbuiltin (1,"__builtin_arc_sleep", void_ftype_usint, ARC_BUILTIN_SLEEP);
5127     def_mbuiltin (1,"__builtin_arc_swi", void_ftype_void, ARC_BUILTIN_SWI);
5128     def_mbuiltin (1,"__builtin_arc_core_read", usint_ftype_usint, ARC_BUILTIN_CORE_READ);
5129     def_mbuiltin (1,"__builtin_arc_core_write", void_ftype_usint_usint, ARC_BUILTIN_CORE_WRITE);
5130     def_mbuiltin (1,"__builtin_arc_lr", usint_ftype_usint, ARC_BUILTIN_LR);
5131     def_mbuiltin (1,"__builtin_arc_sr", void_ftype_usint_usint, ARC_BUILTIN_SR);
5132     def_mbuiltin (TARGET_ARC700,"__builtin_arc_trap_s", void_ftype_usint, ARC_BUILTIN_TRAP_S);
5133     def_mbuiltin (TARGET_ARC700,"__builtin_arc_unimp_s", void_ftype_void, ARC_BUILTIN_UNIMP_S);
5134     def_mbuiltin (1,"__builtin_arc_aligned", int_ftype_pcvoid_int, ARC_BUILTIN_ALIGNED);
5135 
5136     if (TARGET_SIMD_SET)
5137       arc_init_simd_builtins ();
5138 }
5139 
5140 static rtx arc_expand_simd_builtin (tree, rtx, rtx, machine_mode, int);
5141 
5142 /* Expand an expression EXP that calls a built-in function,
5143    with result going to TARGET if that's convenient
5144    (and in mode MODE if that's convenient).
5145    SUBTARGET may be used as the target for computing one of EXP's operands.
5146    IGNORE is nonzero if the value is to be ignored.  */
5147 
5148 static rtx
5149 arc_expand_builtin (tree exp,
5150 		    rtx target,
5151 		    rtx subtarget,
5152 		    machine_mode mode,
5153 		    int ignore)
5154 {
5155   tree              fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
5156   tree              arg0;
5157   tree              arg1;
5158   rtx               op0;
5159   rtx               op1;
5160   int               fcode = DECL_FUNCTION_CODE (fndecl);
5161   int               icode;
5162   machine_mode mode0;
5163   machine_mode mode1;
5164 
5165   if (fcode > ARC_SIMD_BUILTIN_BEGIN && fcode < ARC_SIMD_BUILTIN_END)
5166     return arc_expand_simd_builtin (exp, target, subtarget, mode, ignore);
5167 
5168   switch (fcode)
5169     {
5170     case ARC_BUILTIN_NOP:
5171       emit_insn (gen_nop ());
5172       return NULL_RTX;
5173 
5174     case ARC_BUILTIN_NORM:
5175       icode = CODE_FOR_clrsbsi2;
5176       arg0 = CALL_EXPR_ARG (exp, 0);
5177       op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL);
5178       mode0 =  insn_data[icode].operand[1].mode;
5179       target = gen_reg_rtx (SImode);
5180 
5181       if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
5182 	op0 = copy_to_mode_reg (mode0, op0);
5183 
5184       emit_insn (gen_clrsbsi2 (target, op0));
5185       return target;
5186 
5187     case ARC_BUILTIN_NORMW:
5188 
5189       /* FIXME : This should all be HImode, not SImode.  */
5190       icode = CODE_FOR_normw;
5191       arg0 = CALL_EXPR_ARG (exp, 0);
5192       op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL);
5193       mode0 =  insn_data[icode].operand[1].mode;
5194       target = gen_reg_rtx (SImode);
5195 
5196       if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
5197 	op0 = copy_to_mode_reg (mode0, convert_to_mode (mode0, op0,0));
5198 
5199       emit_insn (gen_normw (target, op0));
5200       return target;
5201 
5202     case ARC_BUILTIN_MUL64:
5203       icode = CODE_FOR_mul64;
5204       arg0 = CALL_EXPR_ARG (exp, 0);
5205       arg1 = CALL_EXPR_ARG (exp, 1);
5206       op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL);
5207       op1 = expand_expr (arg1, NULL_RTX, VOIDmode, EXPAND_NORMAL);
5208 
5209       mode0 =  insn_data[icode].operand[0].mode;
5210       mode1 =  insn_data[icode].operand[1].mode;
5211 
5212       if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
5213 	op0 = copy_to_mode_reg (mode0, op0);
5214 
5215       if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
5216 	op1 = copy_to_mode_reg (mode1, op1);
5217 
5218       emit_insn (gen_mul64 (op0,op1));
5219       return NULL_RTX;
5220 
5221     case ARC_BUILTIN_MULU64:
5222       icode = CODE_FOR_mulu64;
5223       arg0 = CALL_EXPR_ARG (exp, 0);
5224       arg1 = CALL_EXPR_ARG (exp, 1);
5225       op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL);
5226       op1 = expand_expr (arg1, NULL_RTX, VOIDmode, EXPAND_NORMAL);
5227 
5228       mode0 =  insn_data[icode].operand[0].mode;
5229       mode1 =  insn_data[icode].operand[1].mode;
5230 
5231       if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
5232 	op0 = copy_to_mode_reg (mode0, op0);
5233 
5234       if (! (*insn_data[icode].operand[0].predicate) (op1, mode1))
5235 	op1 = copy_to_mode_reg (mode1, op1);
5236 
5237       emit_insn (gen_mulu64 (op0,op1));
5238       return NULL_RTX;
5239 
5240     case ARC_BUILTIN_RTIE:
5241       icode = CODE_FOR_rtie;
5242       emit_insn (gen_rtie (const1_rtx));
5243       return NULL_RTX;
5244 
5245     case ARC_BUILTIN_SYNC:
5246       icode = CODE_FOR_sync;
5247       emit_insn (gen_sync (const1_rtx));
5248       return NULL_RTX;
5249 
5250     case ARC_BUILTIN_SWAP:
5251       icode = CODE_FOR_swap;
5252       arg0 = CALL_EXPR_ARG (exp, 0);
5253       op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL);
5254       mode0 =  insn_data[icode].operand[1].mode;
5255       target = gen_reg_rtx (SImode);
5256 
5257       if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
5258 	op0 = copy_to_mode_reg (mode0, op0);
5259 
5260       emit_insn (gen_swap (target, op0));
5261       return target;
5262 
5263     case ARC_BUILTIN_DIVAW:
5264       icode = CODE_FOR_divaw;
5265       arg0 = CALL_EXPR_ARG (exp, 0);
5266       arg1 = CALL_EXPR_ARG (exp, 1);
5267 
5268       op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL);
5269       op1 = expand_expr (arg1, NULL_RTX, VOIDmode, EXPAND_NORMAL);
5270       target = gen_reg_rtx (SImode);
5271 
5272       mode0 =  insn_data[icode].operand[0].mode;
5273       mode1 =  insn_data[icode].operand[1].mode;
5274 
5275       if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
5276 	op0 = copy_to_mode_reg (mode0, op0);
5277 
5278       if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
5279 	op1 = copy_to_mode_reg (mode1, op1);
5280 
5281       emit_insn (gen_divaw (target, op0, op1));
5282       return target;
5283 
5284     case ARC_BUILTIN_BRK:
5285       icode = CODE_FOR_brk;
5286       emit_insn (gen_brk (const1_rtx));
5287       return NULL_RTX;
5288 
5289     case ARC_BUILTIN_SLEEP:
5290       icode = CODE_FOR_sleep;
5291       arg0 = CALL_EXPR_ARG (exp, 0);
5292 
5293       fold (arg0);
5294 
5295       op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL);
5296       mode0 = insn_data[icode].operand[1].mode;
5297 
5298       emit_insn (gen_sleep (op0));
5299       return NULL_RTX;
5300 
5301     case ARC_BUILTIN_SWI:
5302       icode = CODE_FOR_swi;
5303       emit_insn (gen_swi (const1_rtx));
5304       return NULL_RTX;
5305 
5306     case ARC_BUILTIN_FLAG:
5307       icode = CODE_FOR_flag;
5308       arg0 = CALL_EXPR_ARG (exp, 0);
5309       op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL);
5310       mode0 =  insn_data[icode].operand[0].mode;
5311 
5312       if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
5313 	op0 = copy_to_mode_reg (mode0, op0);
5314 
5315       emit_insn (gen_flag (op0));
5316       return NULL_RTX;
5317 
5318     case ARC_BUILTIN_CORE_READ:
5319       icode = CODE_FOR_core_read;
5320       arg0 = CALL_EXPR_ARG (exp, 0);
5321       target = gen_reg_rtx (SImode);
5322 
5323       fold (arg0);
5324 
5325       op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL);
5326       mode0 = insn_data[icode].operand[1].mode;
5327 
5328       emit_insn (gen_core_read (target, op0));
5329       return target;
5330 
5331     case ARC_BUILTIN_CORE_WRITE:
5332       icode = CODE_FOR_core_write;
5333       arg0 = CALL_EXPR_ARG (exp, 0);
5334       arg1 = CALL_EXPR_ARG (exp, 1);
5335 
5336       fold (arg1);
5337 
5338       op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL);
5339       op1 = expand_expr (arg1, NULL_RTX, VOIDmode, EXPAND_NORMAL);
5340 
5341       mode0 = insn_data[icode].operand[0].mode;
5342       mode1 = insn_data[icode].operand[1].mode;
5343 
5344       emit_insn (gen_core_write (op0, op1));
5345       return NULL_RTX;
5346 
5347     case ARC_BUILTIN_LR:
5348       icode = CODE_FOR_lr;
5349       arg0 = CALL_EXPR_ARG (exp, 0);
5350       target = gen_reg_rtx (SImode);
5351 
5352       fold (arg0);
5353 
5354       op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL);
5355       mode0 = insn_data[icode].operand[1].mode;
5356 
5357       emit_insn (gen_lr (target, op0));
5358       return target;
5359 
5360     case ARC_BUILTIN_SR:
5361       icode = CODE_FOR_sr;
5362       arg0 = CALL_EXPR_ARG (exp, 0);
5363       arg1 = CALL_EXPR_ARG (exp, 1);
5364 
5365       fold (arg1);
5366 
5367       op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL);
5368       op1 = expand_expr (arg1, NULL_RTX, VOIDmode, EXPAND_NORMAL);
5369 
5370       mode0 = insn_data[icode].operand[0].mode;
5371       mode1 = insn_data[icode].operand[1].mode;
5372 
5373       emit_insn (gen_sr (op0, op1));
5374       return NULL_RTX;
5375 
5376     case ARC_BUILTIN_TRAP_S:
5377       icode = CODE_FOR_trap_s;
5378       arg0 = CALL_EXPR_ARG (exp, 0);
5379 
5380       fold (arg0);
5381 
5382       op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL);
5383       mode0 = insn_data[icode].operand[1].mode;
5384 
5385       /* We don't give an error for non-cost values here because
5386 	 we still want to allow things to be fixed up by later inlining /
5387 	 constant folding / dead code elimination.  */
5388       if  (CONST_INT_P (op0) && !satisfies_constraint_L (op0))
5389 	{
5390 	  /* Keep this message in sync with the one in arc.md:trap_s,
5391 	     because *.md files don't get scanned by exgettext.  */
5392 	  error ("operand to trap_s should be an unsigned 6-bit value");
5393 	}
5394       emit_insn (gen_trap_s (op0));
5395       return NULL_RTX;
5396 
5397     case ARC_BUILTIN_UNIMP_S:
5398       icode = CODE_FOR_unimp_s;
5399       emit_insn (gen_unimp_s (const1_rtx));
5400       return NULL_RTX;
5401 
5402     case ARC_BUILTIN_ALIGNED:
5403       /* __builtin_arc_aligned (void* val, int alignval) */
5404       arg0 = CALL_EXPR_ARG (exp, 0);
5405       arg1 = CALL_EXPR_ARG (exp, 1);
5406       fold (arg1);
5407       op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL);
5408       op1 = expand_expr (arg1, NULL_RTX, VOIDmode, EXPAND_NORMAL);
5409       target = gen_reg_rtx (SImode);
5410 
5411       if (!CONST_INT_P (op1))
5412 	{
5413 	  /* If we can't fold the alignment to a constant integer
5414 	     whilst optimizing, this is probably a user error.  */
5415 	  if (optimize)
5416 	    warning (0, "__builtin_arc_aligned with non-constant alignment");
5417 	}
5418       else
5419 	{
5420 	  HOST_WIDE_INT alignTest = INTVAL (op1);
5421 	  /* Check alignTest is positive, and a power of two.  */
5422 	  if (alignTest <= 0 || alignTest != (alignTest & -alignTest))
5423 	    {
5424 	      error ("invalid alignment value for __builtin_arc_aligned");
5425 	      return NULL_RTX;
5426 	    }
5427 
5428 	  if (CONST_INT_P (op0))
5429 	    {
5430 	      HOST_WIDE_INT pnt = INTVAL (op0);
5431 
5432 	      if ((pnt & (alignTest - 1)) == 0)
5433 		return const1_rtx;
5434 	    }
5435 	  else
5436 	    {
5437 	      unsigned  align = get_pointer_alignment (arg0);
5438 	      unsigned  numBits = alignTest * BITS_PER_UNIT;
5439 
5440 	      if (align && align >= numBits)
5441 		return const1_rtx;
5442 	      /* Another attempt to ascertain alignment.  Check the type
5443 		 we are pointing to.  */
5444 	      if (POINTER_TYPE_P (TREE_TYPE (arg0))
5445 		  && TYPE_ALIGN (TREE_TYPE (TREE_TYPE (arg0))) >= numBits)
5446 		return const1_rtx;
5447 	    }
5448 	}
5449 
5450       /* Default to false.  */
5451       return const0_rtx;
5452 
5453     default:
5454       break;
5455     }
5456 
5457   /* @@@ Should really do something sensible here.  */
5458   return NULL_RTX;
5459 }
5460 
5461 /* Returns true if the operands[opno] is a valid compile-time constant to be
5462    used as register number in the code for builtins.  Else it flags an error
5463    and returns false.  */
5464 
5465 bool
5466 check_if_valid_regno_const (rtx *operands, int opno)
5467 {
5468 
5469   switch (GET_CODE (operands[opno]))
5470     {
5471     case SYMBOL_REF :
5472     case CONST :
5473     case CONST_INT :
5474       return true;
5475     default:
5476 	error ("register number must be a compile-time constant. Try giving higher optimization levels");
5477 	break;
5478     }
5479   return false;
5480 }
5481 
5482 /* Check that after all the constant folding, whether the operand to
5483    __builtin_arc_sleep is an unsigned int of 6 bits.  If not, flag an error.  */
5484 
5485 bool
5486 check_if_valid_sleep_operand (rtx *operands, int opno)
5487 {
5488   switch (GET_CODE (operands[opno]))
5489     {
5490     case CONST :
5491     case CONST_INT :
5492 	if( UNSIGNED_INT6 (INTVAL (operands[opno])))
5493 	    return true;
5494     default:
5495 	fatal_error (input_location,
5496 		     "operand for sleep instruction must be an unsigned 6 bit compile-time constant");
5497 	break;
5498     }
5499   return false;
5500 }
5501 
5502 /* Return true if it is ok to make a tail-call to DECL.  */
5503 
5504 static bool
5505 arc_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED,
5506 			     tree exp ATTRIBUTE_UNUSED)
5507 {
5508   /* Never tailcall from an ISR routine - it needs a special exit sequence.  */
5509   if (ARC_INTERRUPT_P (arc_compute_function_type (cfun)))
5510     return false;
5511 
5512   /* Everything else is ok.  */
5513   return true;
5514 }
5515 
5516 /* Output code to add DELTA to the first argument, and then jump
5517    to FUNCTION.  Used for C++ multiple inheritance.  */
5518 
5519 static void
5520 arc_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
5521 		     HOST_WIDE_INT delta,
5522 		     HOST_WIDE_INT vcall_offset,
5523 		     tree function)
5524 {
5525   int mi_delta = delta;
5526   const char *const mi_op = mi_delta < 0 ? "sub" : "add";
5527   int shift = 0;
5528   int this_regno
5529     = aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function) ? 1 : 0;
5530   rtx fnaddr;
5531 
5532   if (mi_delta < 0)
5533     mi_delta = - mi_delta;
5534 
5535   /* Add DELTA.  When possible use a plain add, otherwise load it into
5536      a register first.  */
5537 
5538   while (mi_delta != 0)
5539     {
5540       if ((mi_delta & (3 << shift)) == 0)
5541 	shift += 2;
5542       else
5543 	{
5544 	  asm_fprintf (file, "\t%s\t%s, %s, %d\n",
5545 		       mi_op, reg_names[this_regno], reg_names[this_regno],
5546 		       mi_delta & (0xff << shift));
5547 	  mi_delta &= ~(0xff << shift);
5548 	  shift += 8;
5549 	}
5550     }
5551 
5552   /* If needed, add *(*THIS + VCALL_OFFSET) to THIS.  */
5553   if (vcall_offset != 0)
5554     {
5555       /* ld  r12,[this]           --> temp = *this
5556 	 add r12,r12,vcall_offset --> temp = *(*this + vcall_offset)
5557 	 ld r12,[r12]
5558 	 add this,this,r12        --> this+ = *(*this + vcall_offset) */
5559       asm_fprintf (file, "\tld\t%s, [%s]\n",
5560 		   ARC_TEMP_SCRATCH_REG, reg_names[this_regno]);
5561       asm_fprintf (file, "\tadd\t%s, %s, " HOST_WIDE_INT_PRINT_DEC "\n",
5562 		   ARC_TEMP_SCRATCH_REG, ARC_TEMP_SCRATCH_REG, vcall_offset);
5563       asm_fprintf (file, "\tld\t%s, [%s]\n",
5564 		   ARC_TEMP_SCRATCH_REG, ARC_TEMP_SCRATCH_REG);
5565       asm_fprintf (file, "\tadd\t%s, %s, %s\n", reg_names[this_regno],
5566 		   reg_names[this_regno], ARC_TEMP_SCRATCH_REG);
5567     }
5568 
5569   fnaddr = XEXP (DECL_RTL (function), 0);
5570 
5571   if (arc_is_longcall_p (fnaddr))
5572     fputs ("\tj\t", file);
5573   else
5574     fputs ("\tb\t", file);
5575   assemble_name (file, XSTR (fnaddr, 0));
5576   fputc ('\n', file);
5577 }
5578 
5579 /* Return true if a 32 bit "long_call" should be generated for
5580    this calling SYM_REF.  We generate a long_call if the function:
5581 
5582         a.  has an __attribute__((long call))
5583      or b.  the -mlong-calls command line switch has been specified
5584 
5585    However we do not generate a long call if the function has an
5586    __attribute__ ((short_call)) or __attribute__ ((medium_call))
5587 
5588    This function will be called by C fragments contained in the machine
5589    description file.  */
5590 
5591 bool
5592 arc_is_longcall_p (rtx sym_ref)
5593 {
5594   if (GET_CODE (sym_ref) != SYMBOL_REF)
5595     return false;
5596 
5597   return (SYMBOL_REF_LONG_CALL_P (sym_ref)
5598 	  || (TARGET_LONG_CALLS_SET
5599 	      && !SYMBOL_REF_SHORT_CALL_P (sym_ref)
5600 	      && !SYMBOL_REF_MEDIUM_CALL_P (sym_ref)));
5601 
5602 }
5603 
5604 /* Likewise for short calls.  */
5605 
5606 bool
5607 arc_is_shortcall_p (rtx sym_ref)
5608 {
5609   if (GET_CODE (sym_ref) != SYMBOL_REF)
5610     return false;
5611 
5612   return (SYMBOL_REF_SHORT_CALL_P (sym_ref)
5613 	  || (!TARGET_LONG_CALLS_SET && !TARGET_MEDIUM_CALLS
5614 	      && !SYMBOL_REF_LONG_CALL_P (sym_ref)
5615 	      && !SYMBOL_REF_MEDIUM_CALL_P (sym_ref)));
5616 
5617 }
5618 
5619 /* Emit profiling code for calling CALLEE.  Return true if a special
5620    call pattern needs to be generated.  */
5621 
5622 bool
5623 arc_profile_call (rtx callee)
5624 {
5625   rtx from = XEXP (DECL_RTL (current_function_decl), 0);
5626 
5627   if (TARGET_UCB_MCOUNT)
5628     /* Profiling is done by instrumenting the callee.  */
5629     return false;
5630 
5631   if (CONSTANT_P (callee))
5632     {
5633       rtx count_ptr
5634 	= gen_rtx_CONST (Pmode,
5635 			 gen_rtx_UNSPEC (Pmode,
5636 					 gen_rtvec (3, from, callee,
5637 						    CONST0_RTX (Pmode)),
5638 					 UNSPEC_PROF));
5639       rtx counter = gen_rtx_MEM (SImode, count_ptr);
5640       /* ??? The increment would better be done atomically, but as there is
5641 	 no proper hardware support, that would be too expensive.  */
5642       emit_move_insn (counter,
5643 		      force_reg (SImode, plus_constant (SImode, counter, 1)));
5644       return false;
5645     }
5646   else
5647     {
5648       rtx count_list_ptr
5649 	= gen_rtx_CONST (Pmode,
5650 			 gen_rtx_UNSPEC (Pmode,
5651 					 gen_rtvec (3, from, CONST0_RTX (Pmode),
5652 						    CONST0_RTX (Pmode)),
5653 					 UNSPEC_PROF));
5654       emit_move_insn (gen_rtx_REG (Pmode, 8), count_list_ptr);
5655       emit_move_insn (gen_rtx_REG (Pmode, 9), callee);
5656       return true;
5657     }
5658 }
5659 
5660 /* Worker function for TARGET_RETURN_IN_MEMORY.  */
5661 
5662 static bool
5663 arc_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
5664 {
5665   if (AGGREGATE_TYPE_P (type) || TREE_ADDRESSABLE (type))
5666     return true;
5667   else
5668     {
5669       HOST_WIDE_INT size = int_size_in_bytes (type);
5670       return (size == -1 || size > 8);
5671     }
5672 }
5673 
5674 
5675 /* This was in rtlanal.c, and can go in there when we decide we want
5676    to submit the change for inclusion in the GCC tree.  */
5677 /* Like note_stores, but allow the callback to have side effects on the rtl
5678    (like the note_stores of yore):
5679    Call FUN on each register or MEM that is stored into or clobbered by X.
5680    (X would be the pattern of an insn).  DATA is an arbitrary pointer,
5681    ignored by note_stores, but passed to FUN.
5682    FUN may alter parts of the RTL.
5683 
5684    FUN receives three arguments:
5685    1. the REG, MEM, CC0 or PC being stored in or clobbered,
5686    2. the SET or CLOBBER rtx that does the store,
5687    3. the pointer DATA provided to note_stores.
5688 
5689   If the item being stored in or clobbered is a SUBREG of a hard register,
5690   the SUBREG will be passed.  */
5691 
5692 /* For now.  */ static
5693 void
5694 walk_stores (rtx x, void (*fun) (rtx, rtx, void *), void *data)
5695 {
5696   int i;
5697 
5698   if (GET_CODE (x) == COND_EXEC)
5699     x = COND_EXEC_CODE (x);
5700 
5701   if (GET_CODE (x) == SET || GET_CODE (x) == CLOBBER)
5702     {
5703       rtx dest = SET_DEST (x);
5704 
5705       while ((GET_CODE (dest) == SUBREG
5706 	      && (!REG_P (SUBREG_REG (dest))
5707 		  || REGNO (SUBREG_REG (dest)) >= FIRST_PSEUDO_REGISTER))
5708 	     || GET_CODE (dest) == ZERO_EXTRACT
5709 	     || GET_CODE (dest) == STRICT_LOW_PART)
5710 	dest = XEXP (dest, 0);
5711 
5712       /* If we have a PARALLEL, SET_DEST is a list of EXPR_LIST expressions,
5713 	 each of whose first operand is a register.  */
5714       if (GET_CODE (dest) == PARALLEL)
5715 	{
5716 	  for (i = XVECLEN (dest, 0) - 1; i >= 0; i--)
5717 	    if (XEXP (XVECEXP (dest, 0, i), 0) != 0)
5718 	      (*fun) (XEXP (XVECEXP (dest, 0, i), 0), x, data);
5719 	}
5720       else
5721 	(*fun) (dest, x, data);
5722     }
5723 
5724   else if (GET_CODE (x) == PARALLEL)
5725     for (i = XVECLEN (x, 0) - 1; i >= 0; i--)
5726       walk_stores (XVECEXP (x, 0, i), fun, data);
5727 }
5728 
5729 static bool
5730 arc_pass_by_reference (cumulative_args_t ca_v ATTRIBUTE_UNUSED,
5731 		       machine_mode mode ATTRIBUTE_UNUSED,
5732 		       const_tree type,
5733 		       bool named ATTRIBUTE_UNUSED)
5734 {
5735   return (type != 0
5736 	  && (TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
5737 	      || TREE_ADDRESSABLE (type)));
5738 }
5739 
5740 /* Implement TARGET_CAN_USE_DOLOOP_P.  */
5741 
5742 static bool
5743 arc_can_use_doloop_p (const widest_int &iterations, const widest_int &,
5744 		      unsigned int loop_depth, bool entered_at_top)
5745 {
5746   if (loop_depth > 1)
5747     return false;
5748   /* Setting up the loop with two sr instructions costs 6 cycles.  */
5749   if (TARGET_ARC700
5750       && !entered_at_top
5751       && wi::gtu_p (iterations, 0)
5752       && wi::leu_p (iterations, flag_pic ? 6 : 3))
5753     return false;
5754   return true;
5755 }
5756 
5757 /* NULL if INSN insn is valid within a low-overhead loop.
5758    Otherwise return why doloop cannot be applied.  */
5759 
5760 static const char *
5761 arc_invalid_within_doloop (const rtx_insn *insn)
5762 {
5763   if (CALL_P (insn))
5764     return "Function call in the loop.";
5765   return NULL;
5766 }
5767 
5768 static int arc_reorg_in_progress = 0;
5769 
5770 /* ARC's machince specific reorg function.  */
5771 
5772 static void
5773 arc_reorg (void)
5774 {
5775   rtx_insn *insn;
5776   rtx pattern;
5777   rtx pc_target;
5778   long offset;
5779   int changed;
5780 
5781   cfun->machine->arc_reorg_started = 1;
5782   arc_reorg_in_progress = 1;
5783 
5784   /* Emit special sections for profiling.  */
5785   if (crtl->profile)
5786     {
5787       section *save_text_section;
5788       rtx_insn *insn;
5789       int size = get_max_uid () >> 4;
5790       htab_t htab = htab_create (size, unspec_prof_hash, unspec_prof_htab_eq,
5791 				 NULL);
5792 
5793       save_text_section = in_section;
5794       for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
5795 	if (NONJUMP_INSN_P (insn))
5796 	  walk_stores (PATTERN (insn), write_profile_sections, htab);
5797       if (htab_elements (htab))
5798 	in_section = 0;
5799       switch_to_section (save_text_section);
5800       htab_delete (htab);
5801     }
5802 
5803   /* Link up loop ends with their loop start.  */
5804   {
5805     for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
5806       if (GET_CODE (insn) == JUMP_INSN
5807 	  && recog_memoized (insn) == CODE_FOR_doloop_end_i)
5808 	{
5809 	  rtx_insn *top_label
5810 	    = as_a <rtx_insn *> (XEXP (XEXP (SET_SRC (XVECEXP (PATTERN (insn), 0, 0)), 1), 0));
5811 	  rtx num = GEN_INT (CODE_LABEL_NUMBER (top_label));
5812 	  rtx_insn *lp, *prev = prev_nonnote_insn (top_label);
5813 	  rtx_insn *lp_simple = NULL;
5814 	  rtx_insn *next = NULL;
5815 	  rtx op0 = XEXP (XVECEXP (PATTERN (insn), 0, 1), 0);
5816 	  HOST_WIDE_INT loop_end_id
5817 	    = -INTVAL (XEXP (XVECEXP (PATTERN (insn), 0, 4), 0));
5818 	  int seen_label = 0;
5819 
5820 	  for (lp = prev;
5821 	       (lp && NONJUMP_INSN_P (lp)
5822 		&& recog_memoized (lp) != CODE_FOR_doloop_begin_i);
5823 	       lp = prev_nonnote_insn (lp))
5824 	    ;
5825 	  if (!lp || !NONJUMP_INSN_P (lp)
5826 	      || dead_or_set_regno_p (lp, LP_COUNT))
5827 	    {
5828 	      for (prev = next = insn, lp = NULL ; prev || next;)
5829 		{
5830 		  if (prev)
5831 		    {
5832 		      if (NONJUMP_INSN_P (prev)
5833 			  && recog_memoized (prev) == CODE_FOR_doloop_begin_i
5834 			  && (INTVAL (XEXP (XVECEXP (PATTERN (prev), 0, 5), 0))
5835 			      == loop_end_id))
5836 			{
5837 			  lp = prev;
5838 			  break;
5839 			}
5840 		      else if (LABEL_P (prev))
5841 			seen_label = 1;
5842 		      prev = prev_nonnote_insn (prev);
5843 		    }
5844 		  if (next)
5845 		    {
5846 		      if (NONJUMP_INSN_P (next)
5847 			  && recog_memoized (next) == CODE_FOR_doloop_begin_i
5848 			  && (INTVAL (XEXP (XVECEXP (PATTERN (next), 0, 5), 0))
5849 			      == loop_end_id))
5850 			{
5851 			  lp = next;
5852 			  break;
5853 			}
5854 		      next = next_nonnote_insn (next);
5855 		    }
5856 		}
5857 	      prev = NULL;
5858 	    }
5859 	  else
5860 	    lp_simple = lp;
5861 	  if (lp && !dead_or_set_regno_p (lp, LP_COUNT))
5862 	    {
5863 	      rtx begin_cnt = XEXP (XVECEXP (PATTERN (lp), 0 ,3), 0);
5864 	      if (INTVAL (XEXP (XVECEXP (PATTERN (lp), 0, 4), 0)))
5865 		/* The loop end insn has been duplicated.  That can happen
5866 		   when there is a conditional block at the very end of
5867 		   the loop.  */
5868 		goto failure;
5869 	      /* If Register allocation failed to allocate to the right
5870 		 register, There is no point into teaching reload to
5871 		 fix this up with reloads, as that would cost more
5872 		 than using an ordinary core register with the
5873 		 doloop_fallback pattern.  */
5874 	      if ((true_regnum (op0) != LP_COUNT || !REG_P (begin_cnt))
5875 	      /* Likewise, if the loop setup is evidently inside the loop,
5876 		 we loose.  */
5877 		  || (!lp_simple && lp != next && !seen_label))
5878 		{
5879 		  remove_insn (lp);
5880 		  goto failure;
5881 		}
5882 	      /* It is common that the optimizers copy the loop count from
5883 		 another register, and doloop_begin_i is stuck with the
5884 		 source of the move.  Making doloop_begin_i only accept "l"
5885 		 is nonsentical, as this then makes reload evict the pseudo
5886 		 used for the loop end.  The underlying cause is that the
5887 		 optimizers don't understand that the register allocation for
5888 		 doloop_begin_i should be treated as part of the loop.
5889 		 Try to work around this problem by verifying the previous
5890 		 move exists.  */
5891 	      if (true_regnum (begin_cnt) != LP_COUNT)
5892 		{
5893 		  rtx_insn *mov;
5894 		  rtx set, note;
5895 
5896 		  for (mov = prev_nonnote_insn (lp); mov;
5897 		       mov = prev_nonnote_insn (mov))
5898 		    {
5899 		      if (!NONJUMP_INSN_P (mov))
5900 			mov = 0;
5901 		      else if ((set = single_set (mov))
5902 			  && rtx_equal_p (SET_SRC (set), begin_cnt)
5903 			  && rtx_equal_p (SET_DEST (set), op0))
5904 			break;
5905 		    }
5906 		  if (mov)
5907 		    {
5908 		      XEXP (XVECEXP (PATTERN (lp), 0 ,3), 0) = op0;
5909 		      note = find_regno_note (lp, REG_DEAD, REGNO (begin_cnt));
5910 		      if (note)
5911 			remove_note (lp, note);
5912 		    }
5913 		  else
5914 		    {
5915 		      remove_insn (lp);
5916 		      goto failure;
5917 		    }
5918 		}
5919 	      XEXP (XVECEXP (PATTERN (insn), 0, 4), 0) = num;
5920 	      XEXP (XVECEXP (PATTERN (lp), 0, 4), 0) = num;
5921 	      if (next == lp)
5922 		XEXP (XVECEXP (PATTERN (lp), 0, 6), 0) = const2_rtx;
5923 	      else if (!lp_simple)
5924 		XEXP (XVECEXP (PATTERN (lp), 0, 6), 0) = const1_rtx;
5925 	      else if (prev != lp)
5926 		{
5927 		  remove_insn (lp);
5928 		  add_insn_after (lp, prev, NULL);
5929 		}
5930 	      if (!lp_simple)
5931 		{
5932 		  XEXP (XVECEXP (PATTERN (lp), 0, 7), 0)
5933 		    = gen_rtx_LABEL_REF (Pmode, top_label);
5934 		  add_reg_note (lp, REG_LABEL_OPERAND, top_label);
5935 		  LABEL_NUSES (top_label)++;
5936 		}
5937 	      /* We can avoid tedious loop start / end setting for empty loops
5938 		 be merely setting the loop count to its final value.  */
5939 	      if (next_active_insn (top_label) == insn)
5940 		{
5941 		  rtx lc_set
5942 		    = gen_rtx_SET (VOIDmode,
5943 				   XEXP (XVECEXP (PATTERN (lp), 0, 3), 0),
5944 				   const0_rtx);
5945 
5946 		  rtx_insn *lc_set_insn = emit_insn_before (lc_set, insn);
5947 		  delete_insn (lp);
5948 		  delete_insn (insn);
5949 		  insn = lc_set_insn;
5950 		}
5951 	      /* If the loop is non-empty with zero length, we can't make it
5952 		 a zero-overhead loop.  That can happen for empty asms.  */
5953 	      else
5954 		{
5955 		  rtx_insn *scan;
5956 
5957 		  for (scan = top_label;
5958 		       (scan && scan != insn
5959 			&& (!NONJUMP_INSN_P (scan) || !get_attr_length (scan)));
5960 		       scan = NEXT_INSN (scan));
5961 		  if (scan == insn)
5962 		    {
5963 		      remove_insn (lp);
5964 		      goto failure;
5965 		    }
5966 		}
5967 	    }
5968 	  else
5969 	    {
5970 	      /* Sometimes the loop optimizer makes a complete hash of the
5971 		 loop.  If it were only that the loop is not entered at the
5972 		 top, we could fix this up by setting LP_START with SR .
5973 		 However, if we can't find the loop begin were it should be,
5974 		 chances are that it does not even dominate the loop, but is
5975 		 inside the loop instead.  Using SR there would kill
5976 		 performance.
5977 		 We use the doloop_fallback pattern here, which executes
5978 		 in two cycles on the ARC700 when predicted correctly.  */
5979 	    failure:
5980 	      if (!REG_P (op0))
5981 		{
5982 		  rtx op3 = XEXP (XVECEXP (PATTERN (insn), 0, 5), 0);
5983 
5984 		  emit_insn_before (gen_move_insn (op3, op0), insn);
5985 		  PATTERN (insn)
5986 		    = gen_doloop_fallback_m (op3, JUMP_LABEL (insn), op0);
5987 		}
5988 	      else
5989 		XVEC (PATTERN (insn), 0)
5990 		  = gen_rtvec (2, XVECEXP (PATTERN (insn), 0, 0),
5991 			       XVECEXP (PATTERN (insn), 0, 1));
5992 	      INSN_CODE (insn) = -1;
5993 	    }
5994 	}
5995     }
5996 
5997 /* FIXME: should anticipate ccfsm action, generate special patterns for
5998    to-be-deleted branches that have no delay slot and have at least the
5999    length of the size increase forced on other insns that are conditionalized.
6000    This can also have an insn_list inside that enumerates insns which are
6001    not actually conditionalized because the destinations are dead in the
6002    not-execute case.
6003    Could also tag branches that we want to be unaligned if they get no delay
6004    slot, or even ones that we don't want to do delay slot sheduling for
6005    because we can unalign them.
6006 
6007    However, there are cases when conditional execution is only possible after
6008    delay slot scheduling:
6009 
6010    - If a delay slot is filled with a nocond/set insn from above, the previous
6011      basic block can become elegible for conditional execution.
6012    - If a delay slot is filled with a nocond insn from the fall-through path,
6013      the branch with that delay slot can become eligble for conditional
6014      execution (however, with the same sort of data flow analysis that dbr
6015      does, we could have figured out before that we don't need to
6016      conditionalize this insn.)
6017      - If a delay slot insn is filled with an insn from the target, the
6018        target label gets its uses decremented (even deleted if falling to zero),
6019    thus possibly creating more condexec opportunities there.
6020    Therefore, we should still be prepared to apply condexec optimization on
6021    non-prepared branches if the size increase of conditionalized insns is no
6022    more than the size saved from eliminating the branch.  An invocation option
6023    could also be used to reserve a bit of extra size for condbranches so that
6024    this'll work more often (could also test in arc_reorg if the block is
6025    'close enough' to be eligible for condexec to make this likely, and
6026    estimate required size increase).  */
6027   /* Generate BRcc insns, by combining cmp and Bcc insns wherever possible.  */
6028   if (TARGET_NO_BRCC_SET)
6029     return;
6030 
6031   do
6032     {
6033       init_insn_lengths();
6034       changed = 0;
6035 
6036       if (optimize > 1 && !TARGET_NO_COND_EXEC)
6037 	{
6038 	  arc_ifcvt ();
6039 	  unsigned int flags = pass_data_arc_ifcvt.todo_flags_finish;
6040 	  df_finish_pass ((flags & TODO_df_verify) != 0);
6041 	}
6042 
6043       /* Call shorten_branches to calculate the insn lengths.  */
6044       shorten_branches (get_insns());
6045       cfun->machine->ccfsm_current_insn = NULL_RTX;
6046 
6047       if (!INSN_ADDRESSES_SET_P())
6048 	  fatal_error (input_location, "Insn addresses not set after shorten_branches");
6049 
6050       for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
6051 	{
6052 	  rtx label;
6053 	  enum attr_type insn_type;
6054 
6055 	  /* If a non-jump insn (or a casesi jump table), continue.  */
6056 	  if (GET_CODE (insn) != JUMP_INSN ||
6057 	      GET_CODE (PATTERN (insn)) == ADDR_VEC
6058 	      || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC)
6059 	    continue;
6060 
6061 	  /* If we already have a brcc, note if it is suitable for brcc_s.
6062 	     Be a bit generous with the brcc_s range so that we can take
6063 	     advantage of any code shortening from delay slot scheduling.  */
6064 	  if (recog_memoized (insn) == CODE_FOR_cbranchsi4_scratch)
6065 	    {
6066 	      rtx pat = PATTERN (insn);
6067 	      rtx op = XEXP (SET_SRC (XVECEXP (pat, 0, 0)), 0);
6068 	      rtx *ccp = &XEXP (XVECEXP (pat, 0, 1), 0);
6069 
6070 	      offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
6071 	      if ((offset >= -140 && offset < 140)
6072 		  && rtx_equal_p (XEXP (op, 1), const0_rtx)
6073 		  && compact_register_operand (XEXP (op, 0), VOIDmode)
6074 		  && equality_comparison_operator (op, VOIDmode))
6075 		PUT_MODE (*ccp, CC_Zmode);
6076 	      else if (GET_MODE (*ccp) == CC_Zmode)
6077 		PUT_MODE (*ccp, CC_ZNmode);
6078 	      continue;
6079 	    }
6080 	  if ((insn_type =  get_attr_type (insn)) == TYPE_BRCC
6081 	      || insn_type == TYPE_BRCC_NO_DELAY_SLOT)
6082 	    continue;
6083 
6084 	  /* OK. so we have a jump insn.  */
6085 	  /* We need to check that it is a bcc.  */
6086 	  /* Bcc => set (pc) (if_then_else ) */
6087 	  pattern = PATTERN (insn);
6088 	  if (GET_CODE (pattern) != SET
6089 	      || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE
6090 	      || ANY_RETURN_P (XEXP (SET_SRC (pattern), 1)))
6091 	    continue;
6092 
6093 	  /* Now check if the jump is beyond the s9 range.  */
6094 	  if (CROSSING_JUMP_P (insn))
6095 	    continue;
6096 	  offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
6097 
6098 	  if(offset > 253 || offset < -254)
6099 	    continue;
6100 
6101 	  pc_target = SET_SRC (pattern);
6102 
6103 	  /* Now go back and search for the set cc insn.  */
6104 
6105 	  label = XEXP (pc_target, 1);
6106 
6107 	    {
6108 	      rtx pat;
6109 	      rtx_insn *scan, *link_insn = NULL;
6110 
6111 	      for (scan = PREV_INSN (insn);
6112 		   scan && GET_CODE (scan) != CODE_LABEL;
6113 		   scan = PREV_INSN (scan))
6114 		{
6115 		  if (! INSN_P (scan))
6116 		    continue;
6117 		  pat = PATTERN (scan);
6118 		  if (GET_CODE (pat) == SET
6119 		      && cc_register (SET_DEST (pat), VOIDmode))
6120 		    {
6121 		      link_insn = scan;
6122 		      break;
6123 		    }
6124 		}
6125 	      if (! link_insn)
6126 		continue;
6127 	      else
6128 		/* Check if this is a data dependency.  */
6129 		{
6130 		  rtx op, cc_clob_rtx, op0, op1, brcc_insn, note;
6131 		  rtx cmp0, cmp1;
6132 
6133 		  /* Ok this is the set cc. copy args here.  */
6134 		  op = XEXP (pc_target, 0);
6135 
6136 		  op0 = cmp0 = XEXP (SET_SRC (pat), 0);
6137 		  op1 = cmp1 = XEXP (SET_SRC (pat), 1);
6138 		  if (GET_CODE (op0) == ZERO_EXTRACT
6139 		      && XEXP (op0, 1) == const1_rtx
6140 		      && (GET_CODE (op) == EQ
6141 			  || GET_CODE (op) == NE))
6142 		    {
6143 		      /* btst / b{eq,ne} -> bbit{0,1} */
6144 		      op0 = XEXP (cmp0, 0);
6145 		      op1 = XEXP (cmp0, 2);
6146 		    }
6147 		  else if (!register_operand (op0, VOIDmode)
6148 			  || !general_operand (op1, VOIDmode))
6149 		    continue;
6150 		  /* Be careful not to break what cmpsfpx_raw is
6151 		     trying to create for checking equality of
6152 		     single-precision floats.  */
6153 		  else if (TARGET_SPFP
6154 			   && GET_MODE (op0) == SFmode
6155 			   && GET_MODE (op1) == SFmode)
6156 		    continue;
6157 
6158 		  /* None of the two cmp operands should be set between the
6159 		     cmp and the branch.  */
6160 		  if (reg_set_between_p (op0, link_insn, insn))
6161 		    continue;
6162 
6163 		  if (reg_set_between_p (op1, link_insn, insn))
6164 		    continue;
6165 
6166 		  /* Since the MODE check does not work, check that this is
6167 		     CC reg's last set location before insn, and also no
6168 		     instruction between the cmp and branch uses the
6169 		     condition codes.  */
6170 		  if ((reg_set_between_p (SET_DEST (pat), link_insn, insn))
6171 		      || (reg_used_between_p (SET_DEST (pat), link_insn, insn)))
6172 		    continue;
6173 
6174 		  /* CC reg should be dead after insn.  */
6175 		  if (!find_regno_note (insn, REG_DEAD, CC_REG))
6176 		    continue;
6177 
6178 		  op = gen_rtx_fmt_ee (GET_CODE (op),
6179 				       GET_MODE (op), cmp0, cmp1);
6180 		  /* If we create a LIMM where there was none before,
6181 		     we only benefit if we can avoid a scheduling bubble
6182 		     for the ARC600.  Otherwise, we'd only forgo chances
6183 		     at short insn generation, and risk out-of-range
6184 		     branches.  */
6185 		  if (!brcc_nolimm_operator (op, VOIDmode)
6186 		      && !long_immediate_operand (op1, VOIDmode)
6187 		      && (TARGET_ARC700
6188 			  || next_active_insn (link_insn) != insn))
6189 		    continue;
6190 
6191 		  /* Emit bbit / brcc (or brcc_s if possible).
6192 		     CC_Zmode indicates that brcc_s is possible.  */
6193 
6194 		  if (op0 != cmp0)
6195 		    cc_clob_rtx = gen_rtx_REG (CC_ZNmode, CC_REG);
6196 		  else if ((offset >= -140 && offset < 140)
6197 			   && rtx_equal_p (op1, const0_rtx)
6198 			   && compact_register_operand (op0, VOIDmode)
6199 			   && (GET_CODE (op) == EQ
6200 			       || GET_CODE (op) == NE))
6201 		    cc_clob_rtx = gen_rtx_REG (CC_Zmode, CC_REG);
6202 		  else
6203 		    cc_clob_rtx = gen_rtx_REG (CCmode, CC_REG);
6204 
6205 		  brcc_insn
6206 		    = gen_rtx_IF_THEN_ELSE (VOIDmode, op, label, pc_rtx);
6207 		  brcc_insn = gen_rtx_SET (VOIDmode, pc_rtx, brcc_insn);
6208 		  cc_clob_rtx = gen_rtx_CLOBBER (VOIDmode, cc_clob_rtx);
6209 		  brcc_insn
6210 		    = gen_rtx_PARALLEL
6211 			(VOIDmode, gen_rtvec (2, brcc_insn, cc_clob_rtx));
6212 		  brcc_insn = emit_jump_insn_before (brcc_insn, insn);
6213 
6214 		  JUMP_LABEL (brcc_insn) = JUMP_LABEL (insn);
6215 		  note = find_reg_note (insn, REG_BR_PROB, 0);
6216 		  if (note)
6217 		    {
6218 		      XEXP (note, 1) = REG_NOTES (brcc_insn);
6219 		      REG_NOTES (brcc_insn) = note;
6220 		    }
6221 		  note = find_reg_note (link_insn, REG_DEAD, op0);
6222 		  if (note)
6223 		    {
6224 		      remove_note (link_insn, note);
6225 		      XEXP (note, 1) = REG_NOTES (brcc_insn);
6226 		      REG_NOTES (brcc_insn) = note;
6227 		    }
6228 		  note = find_reg_note (link_insn, REG_DEAD, op1);
6229 		  if (note)
6230 		    {
6231 		      XEXP (note, 1) = REG_NOTES (brcc_insn);
6232 		      REG_NOTES (brcc_insn) = note;
6233 		    }
6234 
6235 		  changed = 1;
6236 
6237 		  /* Delete the bcc insn.  */
6238 		  set_insn_deleted (insn);
6239 
6240 		  /* Delete the cmp insn.  */
6241 		  set_insn_deleted (link_insn);
6242 
6243 		}
6244 	    }
6245 	}
6246       /* Clear out insn_addresses.  */
6247       INSN_ADDRESSES_FREE ();
6248 
6249     } while (changed);
6250 
6251   if (INSN_ADDRESSES_SET_P())
6252     fatal_error (input_location, "insn addresses not freed");
6253 
6254   arc_reorg_in_progress = 0;
6255 }
6256 
6257  /* Check if the operands are valid for BRcc.d generation
6258     Valid Brcc.d patterns are
6259         Brcc.d b, c, s9
6260         Brcc.d b, u6, s9
6261 
6262         For cc={GT, LE, GTU, LEU}, u6=63 can not be allowed,
6263       since they are encoded by the assembler as {GE, LT, HS, LS} 64, which
6264       does not have a delay slot
6265 
6266   Assumed precondition: Second operand is either a register or a u6 value.  */
6267 
6268 bool
6269 valid_brcc_with_delay_p (rtx *operands)
6270 {
6271   if (optimize_size && GET_MODE (operands[4]) == CC_Zmode)
6272     return false;
6273   return brcc_nolimm_operator (operands[0], VOIDmode);
6274 }
6275 
6276 /* ??? Hack.  This should no really be here.  See PR32143.  */
6277 static bool
6278 arc_decl_anon_ns_mem_p (const_tree decl)
6279 {
6280   while (1)
6281     {
6282       if (decl == NULL_TREE || decl == error_mark_node)
6283 	return false;
6284       if (TREE_CODE (decl) == NAMESPACE_DECL
6285 	  && DECL_NAME (decl) == NULL_TREE)
6286 	return true;
6287       /* Classes and namespaces inside anonymous namespaces have
6288 	 TREE_PUBLIC == 0, so we can shortcut the search.  */
6289       else if (TYPE_P (decl))
6290 	return (TREE_PUBLIC (TYPE_NAME (decl)) == 0);
6291       else if (TREE_CODE (decl) == NAMESPACE_DECL)
6292 	return (TREE_PUBLIC (decl) == 0);
6293       else
6294 	decl = DECL_CONTEXT (decl);
6295     }
6296 }
6297 
6298 /* Implement TARGET_IN_SMALL_DATA_P.  Return true if it would be safe to
6299    access DECL using %gp_rel(...)($gp).  */
6300 
6301 static bool
6302 arc_in_small_data_p (const_tree decl)
6303 {
6304   HOST_WIDE_INT size;
6305 
6306   if (TREE_CODE (decl) == STRING_CST || TREE_CODE (decl) == FUNCTION_DECL)
6307     return false;
6308 
6309 
6310   /* We don't yet generate small-data references for -mabicalls.  See related
6311      -G handling in override_options.  */
6312   if (TARGET_NO_SDATA_SET)
6313     return false;
6314 
6315   if (TREE_CODE (decl) == VAR_DECL && DECL_SECTION_NAME (decl) != 0)
6316     {
6317       const char *name;
6318 
6319       /* Reject anything that isn't in a known small-data section.  */
6320       name = DECL_SECTION_NAME (decl);
6321       if (strcmp (name, ".sdata") != 0 && strcmp (name, ".sbss") != 0)
6322 	return false;
6323 
6324       /* If a symbol is defined externally, the assembler will use the
6325 	 usual -G rules when deciding how to implement macros.  */
6326       if (!DECL_EXTERNAL (decl))
6327 	  return true;
6328     }
6329   /* Only global variables go into sdata section for now.  */
6330   else if (1)
6331     {
6332       /* Don't put constants into the small data section: we want them
6333 	 to be in ROM rather than RAM.  */
6334       if (TREE_CODE (decl) != VAR_DECL)
6335 	return false;
6336 
6337       if (TREE_READONLY (decl)
6338 	  && !TREE_SIDE_EFFECTS (decl)
6339 	  && (!DECL_INITIAL (decl) || TREE_CONSTANT (DECL_INITIAL (decl))))
6340 	return false;
6341 
6342       /* TREE_PUBLIC might change after the first call, because of the patch
6343 	 for PR19238.  */
6344       if (default_binds_local_p_1 (decl, 1)
6345 	  || arc_decl_anon_ns_mem_p (decl))
6346 	return false;
6347 
6348       /* To ensure -mvolatile-cache works
6349 	 ld.di does not have a gp-relative variant.  */
6350       if (TREE_THIS_VOLATILE (decl))
6351 	return false;
6352     }
6353 
6354   /* Disable sdata references to weak variables.  */
6355   if (DECL_WEAK (decl))
6356     return false;
6357 
6358   size = int_size_in_bytes (TREE_TYPE (decl));
6359 
6360 /*   if (AGGREGATE_TYPE_P (TREE_TYPE (decl))) */
6361 /*     return false; */
6362 
6363   /* Allow only <=4B long data types into sdata.  */
6364   return (size > 0 && size <= 4);
6365 }
6366 
6367 /* Return true if X is a small data address that can be rewritten
6368    as a gp+symref.  */
6369 
6370 static bool
6371 arc_rewrite_small_data_p (const_rtx x)
6372 {
6373   if (GET_CODE (x) == CONST)
6374     x = XEXP (x, 0);
6375 
6376   if (GET_CODE (x) == PLUS)
6377     {
6378       if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6379 	x = XEXP (x, 0);
6380     }
6381 
6382   return (GET_CODE (x) ==  SYMBOL_REF
6383 	  && SYMBOL_REF_SMALL_P(x));
6384 }
6385 
6386 /* If possible, rewrite OP so that it refers to small data using
6387    explicit relocations.  */
6388 
6389 rtx
6390 arc_rewrite_small_data (rtx op)
6391 {
6392   op = copy_insn (op);
6393   subrtx_ptr_iterator::array_type array;
6394   FOR_EACH_SUBRTX_PTR (iter, array, &op, ALL)
6395     {
6396       rtx *loc = *iter;
6397       if (arc_rewrite_small_data_p (*loc))
6398 	{
6399 	  gcc_assert (SDATA_BASE_REGNUM == PIC_OFFSET_TABLE_REGNUM);
6400 	  *loc = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, *loc);
6401 	  if (loc != &op)
6402 	    {
6403 	      if (GET_CODE (op) == MEM && &XEXP (op, 0) == loc)
6404 		; /* OK.  */
6405 	      else if (GET_CODE (op) == MEM
6406 		       && GET_CODE (XEXP (op, 0)) == PLUS
6407 		       && GET_CODE (XEXP (XEXP (op, 0), 0)) == MULT)
6408 		*loc = force_reg (Pmode, *loc);
6409 	      else
6410 		gcc_unreachable ();
6411 	    }
6412 	  iter.skip_subrtxes ();
6413 	}
6414       else if (GET_CODE (*loc) == PLUS
6415 	       && rtx_equal_p (XEXP (*loc, 0), pic_offset_table_rtx))
6416 	iter.skip_subrtxes ();
6417     }
6418   return op;
6419 }
6420 
6421 /* Return true if OP refers to small data symbols directly, not through
6422    a PLUS.  */
6423 
6424 bool
6425 small_data_pattern (rtx op, machine_mode)
6426 {
6427   if (GET_CODE (op) == SEQUENCE)
6428     return false;
6429   subrtx_iterator::array_type array;
6430   FOR_EACH_SUBRTX (iter, array, op, ALL)
6431     {
6432       const_rtx x = *iter;
6433       if (GET_CODE (x) == PLUS
6434 	  && rtx_equal_p (XEXP (x, 0), pic_offset_table_rtx))
6435 	iter.skip_subrtxes ();
6436       else if (arc_rewrite_small_data_p (x))
6437 	return true;
6438     }
6439   return false;
6440 }
6441 
6442 /* Return true if OP is an acceptable memory operand for ARCompact
6443    16-bit gp-relative load instructions.
6444    op shd look like : [r26, symref@sda]
6445    i.e. (mem (plus (reg 26) (symref with smalldata flag set))
6446   */
6447 /* volatile cache option still to be handled.  */
6448 
6449 bool
6450 compact_sda_memory_operand (rtx op, machine_mode mode)
6451 {
6452   rtx addr;
6453   int size;
6454 
6455   /* Eliminate non-memory operations.  */
6456   if (GET_CODE (op) != MEM)
6457     return false;
6458 
6459   if (mode == VOIDmode)
6460     mode = GET_MODE (op);
6461 
6462   size = GET_MODE_SIZE (mode);
6463 
6464   /* dword operations really put out 2 instructions, so eliminate them.  */
6465   if (size > UNITS_PER_WORD)
6466     return false;
6467 
6468   /* Decode the address now.  */
6469   addr = XEXP (op, 0);
6470 
6471   return LEGITIMATE_SMALL_DATA_ADDRESS_P  (addr);
6472 }
6473 
6474 /* Implement ASM_OUTPUT_ALIGNED_DECL_LOCAL.  */
6475 
6476 void
6477 arc_asm_output_aligned_decl_local (FILE * stream, tree decl, const char * name,
6478 				   unsigned HOST_WIDE_INT size,
6479 				   unsigned HOST_WIDE_INT align,
6480 				   unsigned HOST_WIDE_INT globalize_p)
6481 {
6482   int in_small_data =   arc_in_small_data_p (decl);
6483 
6484   if (in_small_data)
6485     switch_to_section (get_named_section (NULL, ".sbss", 0));
6486   /*    named_section (0,".sbss",0); */
6487   else
6488     switch_to_section (bss_section);
6489 
6490   if (globalize_p)
6491     (*targetm.asm_out.globalize_label) (stream, name);
6492 
6493   ASM_OUTPUT_ALIGN (stream, floor_log2 ((align) / BITS_PER_UNIT));
6494   ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "object");
6495   ASM_OUTPUT_SIZE_DIRECTIVE (stream, name, size);
6496   ASM_OUTPUT_LABEL (stream, name);
6497 
6498   if (size != 0)
6499     ASM_OUTPUT_SKIP (stream, size);
6500 }
6501 
6502 
6503 
6504 
6505 
6506 
6507 
6508 
6509 
6510 
6511 
6512 
6513 
6514 
6515 
6516 
6517 
6518 
6519 
6520 
6521 
6522 
6523 
6524 
6525 
6526 
6527 
6528 
6529 
6530 
6531 
6532 
6533 
6534 
6535 /* SIMD builtins support.  */
6536 enum simd_insn_args_type {
6537   Va_Vb_Vc,
6538   Va_Vb_rlimm,
6539   Va_Vb_Ic,
6540   Va_Vb_u6,
6541   Va_Vb_u8,
6542   Va_rlimm_u8,
6543 
6544   Va_Vb,
6545 
6546   void_rlimm,
6547   void_u6,
6548 
6549   Da_u3_rlimm,
6550   Da_rlimm_rlimm,
6551 
6552   Va_Ib_u8,
6553   void_Va_Ib_u8,
6554 
6555   Va_Vb_Ic_u8,
6556   void_Va_u3_Ib_u8
6557 };
6558 
6559 struct builtin_description
6560 {
6561   enum simd_insn_args_type args_type;
6562   const enum insn_code     icode;
6563   const char * const       name;
6564   const enum arc_builtins  code;
6565 };
6566 
6567 static const struct builtin_description arc_simd_builtin_desc_list[] =
6568 {
6569   /* VVV builtins go first.  */
6570 #define SIMD_BUILTIN(type, code, string, builtin) \
6571   { type,CODE_FOR_##code, "__builtin_arc_" string, \
6572     ARC_SIMD_BUILTIN_##builtin },
6573 
6574   SIMD_BUILTIN (Va_Vb_Vc,    vaddaw_insn,   "vaddaw",     VADDAW)
6575   SIMD_BUILTIN (Va_Vb_Vc,     vaddw_insn,    "vaddw",      VADDW)
6576   SIMD_BUILTIN (Va_Vb_Vc,      vavb_insn,     "vavb",       VAVB)
6577   SIMD_BUILTIN (Va_Vb_Vc,     vavrb_insn,    "vavrb",      VAVRB)
6578   SIMD_BUILTIN (Va_Vb_Vc,    vdifaw_insn,   "vdifaw",     VDIFAW)
6579   SIMD_BUILTIN (Va_Vb_Vc,     vdifw_insn,    "vdifw",      VDIFW)
6580   SIMD_BUILTIN (Va_Vb_Vc,    vmaxaw_insn,   "vmaxaw",     VMAXAW)
6581   SIMD_BUILTIN (Va_Vb_Vc,     vmaxw_insn,    "vmaxw",      VMAXW)
6582   SIMD_BUILTIN (Va_Vb_Vc,    vminaw_insn,   "vminaw",     VMINAW)
6583   SIMD_BUILTIN (Va_Vb_Vc,     vminw_insn,    "vminw",      VMINW)
6584   SIMD_BUILTIN (Va_Vb_Vc,    vmulaw_insn,   "vmulaw",     VMULAW)
6585   SIMD_BUILTIN (Va_Vb_Vc,   vmulfaw_insn,  "vmulfaw",    VMULFAW)
6586   SIMD_BUILTIN (Va_Vb_Vc,    vmulfw_insn,   "vmulfw",     VMULFW)
6587   SIMD_BUILTIN (Va_Vb_Vc,     vmulw_insn,    "vmulw",      VMULW)
6588   SIMD_BUILTIN (Va_Vb_Vc,    vsubaw_insn,   "vsubaw",     VSUBAW)
6589   SIMD_BUILTIN (Va_Vb_Vc,     vsubw_insn,    "vsubw",      VSUBW)
6590   SIMD_BUILTIN (Va_Vb_Vc,    vsummw_insn,   "vsummw",     VSUMMW)
6591   SIMD_BUILTIN (Va_Vb_Vc,      vand_insn,     "vand",       VAND)
6592   SIMD_BUILTIN (Va_Vb_Vc,    vandaw_insn,   "vandaw",     VANDAW)
6593   SIMD_BUILTIN (Va_Vb_Vc,      vbic_insn,     "vbic",       VBIC)
6594   SIMD_BUILTIN (Va_Vb_Vc,    vbicaw_insn,   "vbicaw",     VBICAW)
6595   SIMD_BUILTIN (Va_Vb_Vc,       vor_insn,      "vor",        VOR)
6596   SIMD_BUILTIN (Va_Vb_Vc,      vxor_insn,     "vxor",       VXOR)
6597   SIMD_BUILTIN (Va_Vb_Vc,    vxoraw_insn,   "vxoraw",     VXORAW)
6598   SIMD_BUILTIN (Va_Vb_Vc,      veqw_insn,     "veqw",       VEQW)
6599   SIMD_BUILTIN (Va_Vb_Vc,      vlew_insn,     "vlew",       VLEW)
6600   SIMD_BUILTIN (Va_Vb_Vc,      vltw_insn,     "vltw",       VLTW)
6601   SIMD_BUILTIN (Va_Vb_Vc,      vnew_insn,     "vnew",       VNEW)
6602   SIMD_BUILTIN (Va_Vb_Vc,    vmr1aw_insn,   "vmr1aw",     VMR1AW)
6603   SIMD_BUILTIN (Va_Vb_Vc,     vmr1w_insn,    "vmr1w",      VMR1W)
6604   SIMD_BUILTIN (Va_Vb_Vc,    vmr2aw_insn,   "vmr2aw",     VMR2AW)
6605   SIMD_BUILTIN (Va_Vb_Vc,     vmr2w_insn,    "vmr2w",      VMR2W)
6606   SIMD_BUILTIN (Va_Vb_Vc,    vmr3aw_insn,   "vmr3aw",     VMR3AW)
6607   SIMD_BUILTIN (Va_Vb_Vc,     vmr3w_insn,    "vmr3w",      VMR3W)
6608   SIMD_BUILTIN (Va_Vb_Vc,    vmr4aw_insn,   "vmr4aw",     VMR4AW)
6609   SIMD_BUILTIN (Va_Vb_Vc,     vmr4w_insn,    "vmr4w",      VMR4W)
6610   SIMD_BUILTIN (Va_Vb_Vc,    vmr5aw_insn,   "vmr5aw",     VMR5AW)
6611   SIMD_BUILTIN (Va_Vb_Vc,     vmr5w_insn,    "vmr5w",      VMR5W)
6612   SIMD_BUILTIN (Va_Vb_Vc,    vmr6aw_insn,   "vmr6aw",     VMR6AW)
6613   SIMD_BUILTIN (Va_Vb_Vc,     vmr6w_insn,    "vmr6w",      VMR6W)
6614   SIMD_BUILTIN (Va_Vb_Vc,    vmr7aw_insn,   "vmr7aw",     VMR7AW)
6615   SIMD_BUILTIN (Va_Vb_Vc,     vmr7w_insn,    "vmr7w",      VMR7W)
6616   SIMD_BUILTIN (Va_Vb_Vc,      vmrb_insn,     "vmrb",       VMRB)
6617   SIMD_BUILTIN (Va_Vb_Vc,    vh264f_insn,   "vh264f",     VH264F)
6618   SIMD_BUILTIN (Va_Vb_Vc,   vh264ft_insn,  "vh264ft",    VH264FT)
6619   SIMD_BUILTIN (Va_Vb_Vc,   vh264fw_insn,  "vh264fw",    VH264FW)
6620   SIMD_BUILTIN (Va_Vb_Vc,     vvc1f_insn,    "vvc1f",      VVC1F)
6621   SIMD_BUILTIN (Va_Vb_Vc,    vvc1ft_insn,   "vvc1ft",     VVC1FT)
6622 
6623   SIMD_BUILTIN (Va_Vb_rlimm,    vbaddw_insn,   "vbaddw",     VBADDW)
6624   SIMD_BUILTIN (Va_Vb_rlimm,    vbmaxw_insn,   "vbmaxw",     VBMAXW)
6625   SIMD_BUILTIN (Va_Vb_rlimm,    vbminw_insn,   "vbminw",     VBMINW)
6626   SIMD_BUILTIN (Va_Vb_rlimm,   vbmulaw_insn,  "vbmulaw",    VBMULAW)
6627   SIMD_BUILTIN (Va_Vb_rlimm,   vbmulfw_insn,  "vbmulfw",    VBMULFW)
6628   SIMD_BUILTIN (Va_Vb_rlimm,    vbmulw_insn,   "vbmulw",     VBMULW)
6629   SIMD_BUILTIN (Va_Vb_rlimm,   vbrsubw_insn,  "vbrsubw",    VBRSUBW)
6630   SIMD_BUILTIN (Va_Vb_rlimm,    vbsubw_insn,   "vbsubw",     VBSUBW)
6631 
6632   /* Va, Vb, Ic instructions.  */
6633   SIMD_BUILTIN (Va_Vb_Ic,        vasrw_insn,    "vasrw",      VASRW)
6634   SIMD_BUILTIN (Va_Vb_Ic,         vsr8_insn,     "vsr8",       VSR8)
6635   SIMD_BUILTIN (Va_Vb_Ic,       vsr8aw_insn,   "vsr8aw",     VSR8AW)
6636 
6637   /* Va, Vb, u6 instructions.  */
6638   SIMD_BUILTIN (Va_Vb_u6,      vasrrwi_insn,  "vasrrwi",    VASRRWi)
6639   SIMD_BUILTIN (Va_Vb_u6,     vasrsrwi_insn, "vasrsrwi",   VASRSRWi)
6640   SIMD_BUILTIN (Va_Vb_u6,       vasrwi_insn,   "vasrwi",     VASRWi)
6641   SIMD_BUILTIN (Va_Vb_u6,     vasrpwbi_insn, "vasrpwbi",   VASRPWBi)
6642   SIMD_BUILTIN (Va_Vb_u6,    vasrrpwbi_insn,"vasrrpwbi",  VASRRPWBi)
6643   SIMD_BUILTIN (Va_Vb_u6,      vsr8awi_insn,  "vsr8awi",    VSR8AWi)
6644   SIMD_BUILTIN (Va_Vb_u6,        vsr8i_insn,    "vsr8i",      VSR8i)
6645 
6646   /* Va, Vb, u8 (simm) instructions.  */
6647   SIMD_BUILTIN (Va_Vb_u8,        vmvaw_insn,    "vmvaw",      VMVAW)
6648   SIMD_BUILTIN (Va_Vb_u8,         vmvw_insn,     "vmvw",       VMVW)
6649   SIMD_BUILTIN (Va_Vb_u8,        vmvzw_insn,    "vmvzw",      VMVZW)
6650   SIMD_BUILTIN (Va_Vb_u8,      vd6tapf_insn,  "vd6tapf",    VD6TAPF)
6651 
6652   /* Va, rlimm, u8 (simm) instructions.  */
6653   SIMD_BUILTIN (Va_rlimm_u8,    vmovaw_insn,   "vmovaw",     VMOVAW)
6654   SIMD_BUILTIN (Va_rlimm_u8,     vmovw_insn,    "vmovw",      VMOVW)
6655   SIMD_BUILTIN (Va_rlimm_u8,    vmovzw_insn,   "vmovzw",     VMOVZW)
6656 
6657   /* Va, Vb instructions.  */
6658   SIMD_BUILTIN (Va_Vb,          vabsaw_insn,   "vabsaw",     VABSAW)
6659   SIMD_BUILTIN (Va_Vb,           vabsw_insn,    "vabsw",      VABSW)
6660   SIMD_BUILTIN (Va_Vb,         vaddsuw_insn,  "vaddsuw",    VADDSUW)
6661   SIMD_BUILTIN (Va_Vb,          vsignw_insn,   "vsignw",     VSIGNW)
6662   SIMD_BUILTIN (Va_Vb,          vexch1_insn,   "vexch1",     VEXCH1)
6663   SIMD_BUILTIN (Va_Vb,          vexch2_insn,   "vexch2",     VEXCH2)
6664   SIMD_BUILTIN (Va_Vb,          vexch4_insn,   "vexch4",     VEXCH4)
6665   SIMD_BUILTIN (Va_Vb,          vupbaw_insn,   "vupbaw",     VUPBAW)
6666   SIMD_BUILTIN (Va_Vb,           vupbw_insn,    "vupbw",      VUPBW)
6667   SIMD_BUILTIN (Va_Vb,         vupsbaw_insn,  "vupsbaw",    VUPSBAW)
6668   SIMD_BUILTIN (Va_Vb,          vupsbw_insn,   "vupsbw",     VUPSBW)
6669 
6670   /* DIb, rlimm, rlimm instructions.  */
6671   SIMD_BUILTIN (Da_rlimm_rlimm,  vdirun_insn,  "vdirun",     VDIRUN)
6672   SIMD_BUILTIN (Da_rlimm_rlimm,  vdorun_insn,  "vdorun",     VDORUN)
6673 
6674   /* DIb, limm, rlimm instructions.  */
6675   SIMD_BUILTIN (Da_u3_rlimm,   vdiwr_insn,    "vdiwr",      VDIWR)
6676   SIMD_BUILTIN (Da_u3_rlimm,    vdowr_insn,    "vdowr",     VDOWR)
6677 
6678   /* rlimm instructions.  */
6679   SIMD_BUILTIN (void_rlimm,        vrec_insn,     "vrec",      VREC)
6680   SIMD_BUILTIN (void_rlimm,        vrun_insn,     "vrun",      VRUN)
6681   SIMD_BUILTIN (void_rlimm,     vrecrun_insn,  "vrecrun",   VRECRUN)
6682   SIMD_BUILTIN (void_rlimm,     vendrec_insn,  "vendrec",   VENDREC)
6683 
6684   /* Va, [Ib,u8] instructions.  */
6685   SIMD_BUILTIN (Va_Vb_Ic_u8,       vld32wh_insn,  "vld32wh",   VLD32WH)
6686   SIMD_BUILTIN (Va_Vb_Ic_u8,       vld32wl_insn,  "vld32wl",   VLD32WL)
6687   SIMD_BUILTIN (Va_Vb_Ic_u8,         vld64_insn,    "vld64",     VLD64)
6688   SIMD_BUILTIN (Va_Vb_Ic_u8,         vld32_insn,    "vld32",     VLD32)
6689 
6690   SIMD_BUILTIN (Va_Ib_u8,           vld64w_insn,   "vld64w",   VLD64W)
6691   SIMD_BUILTIN (Va_Ib_u8,           vld128_insn,   "vld128",   VLD128)
6692   SIMD_BUILTIN (void_Va_Ib_u8,      vst128_insn,   "vst128",   VST128)
6693   SIMD_BUILTIN (void_Va_Ib_u8,       vst64_insn,    "vst64",    VST64)
6694 
6695   /* Va, [Ib, u8] instructions.  */
6696   SIMD_BUILTIN (void_Va_u3_Ib_u8,  vst16_n_insn,  "vst16_n",   VST16_N)
6697   SIMD_BUILTIN (void_Va_u3_Ib_u8,  vst32_n_insn,  "vst32_n",   VST32_N)
6698 
6699   SIMD_BUILTIN (void_u6,  vinti_insn,  "vinti",   VINTI)
6700 };
6701 
6702 static void
6703 arc_init_simd_builtins (void)
6704 {
6705   int i;
6706   tree endlink = void_list_node;
6707   tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
6708 
6709   tree v8hi_ftype_v8hi_v8hi
6710     = build_function_type (V8HI_type_node,
6711 			   tree_cons (NULL_TREE, V8HI_type_node,
6712 				      tree_cons (NULL_TREE, V8HI_type_node,
6713 						 endlink)));
6714   tree v8hi_ftype_v8hi_int
6715     = build_function_type (V8HI_type_node,
6716 			   tree_cons (NULL_TREE, V8HI_type_node,
6717 				      tree_cons (NULL_TREE, integer_type_node,
6718 						 endlink)));
6719 
6720   tree v8hi_ftype_v8hi_int_int
6721     = build_function_type (V8HI_type_node,
6722 			   tree_cons (NULL_TREE, V8HI_type_node,
6723 				      tree_cons (NULL_TREE, integer_type_node,
6724 						 tree_cons (NULL_TREE,
6725 							    integer_type_node,
6726 							    endlink))));
6727 
6728   tree void_ftype_v8hi_int_int
6729     = build_function_type (void_type_node,
6730 			   tree_cons (NULL_TREE, V8HI_type_node,
6731 				      tree_cons (NULL_TREE, integer_type_node,
6732 						 tree_cons (NULL_TREE,
6733 							    integer_type_node,
6734 							    endlink))));
6735 
6736   tree void_ftype_v8hi_int_int_int
6737     = (build_function_type
6738 	(void_type_node,
6739 	 tree_cons (NULL_TREE, V8HI_type_node,
6740 		    tree_cons (NULL_TREE, integer_type_node,
6741 			       tree_cons (NULL_TREE, integer_type_node,
6742 					  tree_cons (NULL_TREE,
6743 						     integer_type_node,
6744 						     endlink))))));
6745 
6746   tree v8hi_ftype_int_int
6747     = build_function_type (V8HI_type_node,
6748 			   tree_cons (NULL_TREE, integer_type_node,
6749 				      tree_cons (NULL_TREE, integer_type_node,
6750 						 endlink)));
6751 
6752   tree void_ftype_int_int
6753     = build_function_type (void_type_node,
6754 			   tree_cons (NULL_TREE, integer_type_node,
6755 				      tree_cons (NULL_TREE, integer_type_node,
6756 						 endlink)));
6757 
6758   tree void_ftype_int
6759     = build_function_type (void_type_node,
6760 			   tree_cons (NULL_TREE, integer_type_node, endlink));
6761 
6762   tree v8hi_ftype_v8hi
6763     = build_function_type (V8HI_type_node, tree_cons (NULL_TREE, V8HI_type_node,
6764 						      endlink));
6765 
6766   /* These asserts have been introduced to ensure that the order of builtins
6767      does not get messed up, else the initialization goes wrong.  */
6768   gcc_assert (arc_simd_builtin_desc_list [0].args_type == Va_Vb_Vc);
6769   for (i=0; arc_simd_builtin_desc_list [i].args_type == Va_Vb_Vc; i++)
6770     def_mbuiltin (TARGET_SIMD_SET, arc_simd_builtin_desc_list[i].name,
6771 		  v8hi_ftype_v8hi_v8hi, arc_simd_builtin_desc_list[i].code);
6772 
6773   gcc_assert (arc_simd_builtin_desc_list [i].args_type == Va_Vb_rlimm);
6774   for (; arc_simd_builtin_desc_list [i].args_type == Va_Vb_rlimm; i++)
6775     def_mbuiltin (TARGET_SIMD_SET, arc_simd_builtin_desc_list[i].name,
6776 		  v8hi_ftype_v8hi_int, arc_simd_builtin_desc_list[i].code);
6777 
6778   gcc_assert (arc_simd_builtin_desc_list [i].args_type == Va_Vb_Ic);
6779   for (; arc_simd_builtin_desc_list [i].args_type == Va_Vb_Ic; i++)
6780     def_mbuiltin (TARGET_SIMD_SET, arc_simd_builtin_desc_list[i].name,
6781 		  v8hi_ftype_v8hi_int, arc_simd_builtin_desc_list[i].code);
6782 
6783   gcc_assert (arc_simd_builtin_desc_list [i].args_type == Va_Vb_u6);
6784   for (; arc_simd_builtin_desc_list [i].args_type == Va_Vb_u6; i++)
6785     def_mbuiltin (TARGET_SIMD_SET, arc_simd_builtin_desc_list[i].name,
6786 		  v8hi_ftype_v8hi_int, arc_simd_builtin_desc_list[i].code);
6787 
6788   gcc_assert (arc_simd_builtin_desc_list [i].args_type == Va_Vb_u8);
6789   for (; arc_simd_builtin_desc_list [i].args_type == Va_Vb_u8; i++)
6790     def_mbuiltin (TARGET_SIMD_SET, arc_simd_builtin_desc_list[i].name,
6791 		  v8hi_ftype_v8hi_int, arc_simd_builtin_desc_list[i].code);
6792 
6793   gcc_assert (arc_simd_builtin_desc_list [i].args_type == Va_rlimm_u8);
6794   for (; arc_simd_builtin_desc_list [i].args_type == Va_rlimm_u8; i++)
6795     def_mbuiltin (TARGET_SIMD_SET, arc_simd_builtin_desc_list[i].name,
6796 		  v8hi_ftype_int_int, arc_simd_builtin_desc_list[i].code);
6797 
6798   gcc_assert (arc_simd_builtin_desc_list [i].args_type == Va_Vb);
6799   for (; arc_simd_builtin_desc_list [i].args_type == Va_Vb; i++)
6800     def_mbuiltin (TARGET_SIMD_SET, arc_simd_builtin_desc_list[i].name,
6801 		  v8hi_ftype_v8hi, arc_simd_builtin_desc_list[i].code);
6802 
6803   gcc_assert (arc_simd_builtin_desc_list [i].args_type == Da_rlimm_rlimm);
6804   for (; arc_simd_builtin_desc_list [i].args_type == Da_rlimm_rlimm; i++)
6805     def_mbuiltin (TARGET_SIMD_SET, arc_simd_builtin_desc_list [i].name,
6806 		  void_ftype_int_int, arc_simd_builtin_desc_list[i].code);
6807 
6808   gcc_assert (arc_simd_builtin_desc_list [i].args_type == Da_u3_rlimm);
6809   for (; arc_simd_builtin_desc_list [i].args_type == Da_u3_rlimm; i++)
6810     def_mbuiltin (TARGET_SIMD_SET, arc_simd_builtin_desc_list[i].name,
6811 		  void_ftype_int_int, arc_simd_builtin_desc_list[i].code);
6812 
6813   gcc_assert (arc_simd_builtin_desc_list [i].args_type == void_rlimm);
6814   for (; arc_simd_builtin_desc_list [i].args_type == void_rlimm; i++)
6815     def_mbuiltin (TARGET_SIMD_SET, arc_simd_builtin_desc_list[i].name,
6816 		  void_ftype_int, arc_simd_builtin_desc_list[i].code);
6817 
6818   gcc_assert (arc_simd_builtin_desc_list [i].args_type == Va_Vb_Ic_u8);
6819   for (; arc_simd_builtin_desc_list [i].args_type == Va_Vb_Ic_u8; i++)
6820     def_mbuiltin (TARGET_SIMD_SET, arc_simd_builtin_desc_list[i].name,
6821 		  v8hi_ftype_v8hi_int_int, arc_simd_builtin_desc_list[i].code);
6822 
6823   gcc_assert (arc_simd_builtin_desc_list [i].args_type == Va_Ib_u8);
6824   for (; arc_simd_builtin_desc_list [i].args_type == Va_Ib_u8; i++)
6825     def_mbuiltin (TARGET_SIMD_SET, arc_simd_builtin_desc_list[i].name,
6826 		  v8hi_ftype_int_int, arc_simd_builtin_desc_list[i].code);
6827 
6828   gcc_assert (arc_simd_builtin_desc_list [i].args_type == void_Va_Ib_u8);
6829   for (; arc_simd_builtin_desc_list [i].args_type == void_Va_Ib_u8; i++)
6830     def_mbuiltin (TARGET_SIMD_SET, arc_simd_builtin_desc_list [i].name,
6831 		  void_ftype_v8hi_int_int, arc_simd_builtin_desc_list[i].code);
6832 
6833   gcc_assert (arc_simd_builtin_desc_list [i].args_type == void_Va_u3_Ib_u8);
6834   for (; arc_simd_builtin_desc_list [i].args_type == void_Va_u3_Ib_u8; i++)
6835     def_mbuiltin (TARGET_SIMD_SET, arc_simd_builtin_desc_list[i].name,
6836 		  void_ftype_v8hi_int_int_int,
6837 		  arc_simd_builtin_desc_list[i].code);
6838 
6839   gcc_assert (arc_simd_builtin_desc_list [i].args_type == void_u6);
6840   for (; arc_simd_builtin_desc_list [i].args_type == void_u6; i++)
6841     def_mbuiltin (TARGET_SIMD_SET, arc_simd_builtin_desc_list[i].name,
6842 		  void_ftype_int, arc_simd_builtin_desc_list[i].code);
6843 
6844   gcc_assert(i == ARRAY_SIZE (arc_simd_builtin_desc_list));
6845 }
6846 
6847 /* Helper function of arc_expand_builtin; has the same parameters,
6848    except that EXP is now known to be a call to a simd builtin.  */
6849 
6850 static rtx
6851 arc_expand_simd_builtin (tree exp,
6852 			 rtx target,
6853 			 rtx subtarget ATTRIBUTE_UNUSED,
6854 			 machine_mode mode ATTRIBUTE_UNUSED,
6855 			 int ignore ATTRIBUTE_UNUSED)
6856 {
6857   tree              fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
6858   tree              arg0;
6859   tree              arg1;
6860   tree              arg2;
6861   tree              arg3;
6862   rtx               op0;
6863   rtx               op1;
6864   rtx               op2;
6865   rtx               op3;
6866   rtx               op4;
6867   rtx pat;
6868   unsigned int         i;
6869   int               fcode = DECL_FUNCTION_CODE (fndecl);
6870   int               icode;
6871   machine_mode mode0;
6872   machine_mode mode1;
6873   machine_mode mode2;
6874   machine_mode mode3;
6875   machine_mode mode4;
6876   const struct builtin_description * d;
6877 
6878   for (i = 0, d = arc_simd_builtin_desc_list;
6879        i < ARRAY_SIZE (arc_simd_builtin_desc_list); i++, d++)
6880     if (d->code == (const enum arc_builtins) fcode)
6881       break;
6882 
6883   /* We must get an entry here.  */
6884   gcc_assert (i < ARRAY_SIZE (arc_simd_builtin_desc_list));
6885 
6886   switch (d->args_type)
6887     {
6888     case Va_Vb_rlimm:
6889       icode = d->icode;
6890       arg0 = CALL_EXPR_ARG (exp, 0);
6891       arg1 = CALL_EXPR_ARG (exp, 1);
6892       op0 = expand_expr (arg0, NULL_RTX, V8HImode, EXPAND_NORMAL);
6893       op1 = expand_expr (arg1, NULL_RTX, SImode, EXPAND_NORMAL);
6894 
6895       target = gen_reg_rtx (V8HImode);
6896       mode0 =  insn_data[icode].operand[1].mode;
6897       mode1 =  insn_data[icode].operand[2].mode;
6898 
6899       if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
6900 	op0 = copy_to_mode_reg (mode0, op0);
6901 
6902       if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
6903 	  op1 = copy_to_mode_reg (mode1, op1);
6904 
6905       pat = GEN_FCN (icode) (target, op0, op1);
6906       if (! pat)
6907 	return 0;
6908 
6909       emit_insn (pat);
6910       return target;
6911 
6912     case Va_Vb_u6:
6913     case Va_Vb_u8:
6914       icode = d->icode;
6915       arg0 = CALL_EXPR_ARG (exp, 0);
6916       arg1 = CALL_EXPR_ARG (exp, 1);
6917       op0 = expand_expr (arg0, NULL_RTX, V8HImode, EXPAND_NORMAL);
6918       op1 = expand_expr (arg1, NULL_RTX, SImode, EXPAND_NORMAL);
6919 
6920       target = gen_reg_rtx (V8HImode);
6921       mode0 =  insn_data[icode].operand[1].mode;
6922       mode1 =  insn_data[icode].operand[2].mode;
6923 
6924       if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
6925 	op0 = copy_to_mode_reg (mode0, op0);
6926 
6927       if (! (*insn_data[icode].operand[2].predicate) (op1, mode1)
6928 	  ||  (d->args_type == Va_Vb_u6 && !UNSIGNED_INT6 (INTVAL (op1)))
6929 	  ||  (d->args_type == Va_Vb_u8 && !UNSIGNED_INT8 (INTVAL (op1))))
6930 	error ("operand 2 of %s instruction should be an unsigned %d-bit value",
6931 	       d->name,
6932 	       (d->args_type == Va_Vb_u6)? 6: 8);
6933 
6934       pat = GEN_FCN (icode) (target, op0, op1);
6935       if (! pat)
6936 	return 0;
6937 
6938       emit_insn (pat);
6939       return target;
6940 
6941     case Va_rlimm_u8:
6942       icode = d->icode;
6943       arg0 = CALL_EXPR_ARG (exp, 0);
6944       arg1 = CALL_EXPR_ARG (exp, 1);
6945       op0 = expand_expr (arg0, NULL_RTX, SImode, EXPAND_NORMAL);
6946       op1 = expand_expr (arg1, NULL_RTX, SImode, EXPAND_NORMAL);
6947 
6948       target = gen_reg_rtx (V8HImode);
6949       mode0 =  insn_data[icode].operand[1].mode;
6950       mode1 =  insn_data[icode].operand[2].mode;
6951 
6952       if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
6953 	op0 = copy_to_mode_reg (mode0, op0);
6954 
6955       if ( (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
6956 	   || !(UNSIGNED_INT8 (INTVAL (op1))))
6957 	error ("operand 2 of %s instruction should be an unsigned 8-bit value",
6958 	       d->name);
6959 
6960       pat = GEN_FCN (icode) (target, op0, op1);
6961       if (! pat)
6962 	return 0;
6963 
6964       emit_insn (pat);
6965       return target;
6966 
6967     case Va_Vb_Ic:
6968       icode = d->icode;
6969       arg0 = CALL_EXPR_ARG (exp, 0);
6970       arg1 = CALL_EXPR_ARG (exp, 1);
6971       op0 = expand_expr (arg0, NULL_RTX, V8HImode, EXPAND_NORMAL);
6972       op1 = expand_expr (arg1, NULL_RTX, SImode, EXPAND_NORMAL);
6973       op2 = gen_rtx_REG (V8HImode, ARC_FIRST_SIMD_VR_REG);
6974 
6975       target = gen_reg_rtx (V8HImode);
6976       mode0 =  insn_data[icode].operand[1].mode;
6977       mode1 =  insn_data[icode].operand[2].mode;
6978 
6979       if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
6980 	op0 = copy_to_mode_reg (mode0, op0);
6981 
6982       if ( (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
6983 	   || !(UNSIGNED_INT3 (INTVAL (op1))))
6984 	error ("operand 2 of %s instruction should be an unsigned 3-bit value (I0-I7)",
6985 	       d->name);
6986 
6987       pat = GEN_FCN (icode) (target, op0, op1, op2);
6988       if (! pat)
6989 	return 0;
6990 
6991       emit_insn (pat);
6992       return target;
6993 
6994     case Va_Vb_Vc:
6995       icode = d->icode;
6996       arg0 = CALL_EXPR_ARG (exp, 0);
6997       arg1 = CALL_EXPR_ARG (exp, 1);
6998       op0 = expand_expr (arg0, NULL_RTX, V8HImode, EXPAND_NORMAL);
6999       op1 = expand_expr (arg1, NULL_RTX, V8HImode, EXPAND_NORMAL);
7000 
7001       target = gen_reg_rtx (V8HImode);
7002       mode0 =  insn_data[icode].operand[1].mode;
7003       mode1 =  insn_data[icode].operand[2].mode;
7004 
7005       if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
7006 	op0 = copy_to_mode_reg (mode0, op0);
7007 
7008       if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
7009 	op1 = copy_to_mode_reg (mode1, op1);
7010 
7011       pat = GEN_FCN (icode) (target, op0, op1);
7012       if (! pat)
7013 	return 0;
7014 
7015       emit_insn (pat);
7016       return target;
7017 
7018     case Va_Vb:
7019       icode = d->icode;
7020       arg0 = CALL_EXPR_ARG (exp, 0);
7021       op0 = expand_expr (arg0, NULL_RTX, V8HImode, EXPAND_NORMAL);
7022 
7023       target = gen_reg_rtx (V8HImode);
7024       mode0 =  insn_data[icode].operand[1].mode;
7025 
7026       if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
7027 	op0 = copy_to_mode_reg (mode0, op0);
7028 
7029       pat = GEN_FCN (icode) (target, op0);
7030       if (! pat)
7031 	return 0;
7032 
7033       emit_insn (pat);
7034       return target;
7035 
7036     case Da_rlimm_rlimm:
7037       icode = d->icode;
7038       arg0 = CALL_EXPR_ARG (exp, 0);
7039       arg1 = CALL_EXPR_ARG (exp, 1);
7040       op0 = expand_expr (arg0, NULL_RTX, SImode, EXPAND_NORMAL);
7041       op1 = expand_expr (arg1, NULL_RTX, SImode, EXPAND_NORMAL);
7042 
7043 
7044       if (icode == CODE_FOR_vdirun_insn)
7045 	target = gen_rtx_REG (SImode, 131);
7046       else if (icode == CODE_FOR_vdorun_insn)
7047 	target = gen_rtx_REG (SImode, 139);
7048       else
7049 	  gcc_unreachable ();
7050 
7051       mode0 =  insn_data[icode].operand[1].mode;
7052       mode1 =  insn_data[icode].operand[2].mode;
7053 
7054       if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
7055 	op0 = copy_to_mode_reg (mode0, op0);
7056 
7057       if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
7058 	op1 = copy_to_mode_reg (mode1, op1);
7059 
7060 
7061       pat = GEN_FCN (icode) (target, op0, op1);
7062       if (! pat)
7063 	return 0;
7064 
7065       emit_insn (pat);
7066       return NULL_RTX;
7067 
7068     case Da_u3_rlimm:
7069       icode = d->icode;
7070       arg0 = CALL_EXPR_ARG (exp, 0);
7071       arg1 = CALL_EXPR_ARG (exp, 1);
7072       op0 = expand_expr (arg0, NULL_RTX, SImode, EXPAND_NORMAL);
7073       op1 = expand_expr (arg1, NULL_RTX, SImode, EXPAND_NORMAL);
7074 
7075 
7076       if (! (GET_CODE (op0) == CONST_INT)
7077 	  || !(UNSIGNED_INT3 (INTVAL (op0))))
7078 	error ("operand 1 of %s instruction should be an unsigned 3-bit value (DR0-DR7)",
7079 	       d->name);
7080 
7081       mode1 =  insn_data[icode].operand[1].mode;
7082 
7083       if (icode == CODE_FOR_vdiwr_insn)
7084 	target = gen_rtx_REG (SImode,
7085 			      ARC_FIRST_SIMD_DMA_CONFIG_IN_REG + INTVAL (op0));
7086       else if (icode == CODE_FOR_vdowr_insn)
7087 	target = gen_rtx_REG (SImode,
7088 			      ARC_FIRST_SIMD_DMA_CONFIG_OUT_REG + INTVAL (op0));
7089       else
7090 	gcc_unreachable ();
7091 
7092       if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
7093 	op1 = copy_to_mode_reg (mode1, op1);
7094 
7095       pat = GEN_FCN (icode) (target, op1);
7096       if (! pat)
7097 	return 0;
7098 
7099       emit_insn (pat);
7100       return NULL_RTX;
7101 
7102     case void_u6:
7103       icode = d->icode;
7104       arg0 = CALL_EXPR_ARG (exp, 0);
7105 
7106       fold (arg0);
7107 
7108       op0 = expand_expr (arg0, NULL_RTX, SImode, EXPAND_NORMAL);
7109       mode0 = insn_data[icode].operand[0].mode;
7110 
7111       /* op0 should be u6.  */
7112       if (! (*insn_data[icode].operand[0].predicate) (op0, mode0)
7113 	  || !(UNSIGNED_INT6 (INTVAL (op0))))
7114 	error ("operand of %s instruction should be an unsigned 6-bit value",
7115 	       d->name);
7116 
7117       pat = GEN_FCN (icode) (op0);
7118       if (! pat)
7119 	return 0;
7120 
7121       emit_insn (pat);
7122       return NULL_RTX;
7123 
7124     case void_rlimm:
7125       icode = d->icode;
7126       arg0 = CALL_EXPR_ARG (exp, 0);
7127 
7128       fold (arg0);
7129 
7130       op0 = expand_expr (arg0, NULL_RTX, SImode, EXPAND_NORMAL);
7131       mode0 = insn_data[icode].operand[0].mode;
7132 
7133       if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
7134 	op0 = copy_to_mode_reg (mode0, op0);
7135 
7136       pat = GEN_FCN (icode) (op0);
7137       if (! pat)
7138 	return 0;
7139 
7140       emit_insn (pat);
7141       return NULL_RTX;
7142 
7143     case Va_Vb_Ic_u8:
7144       {
7145 	rtx src_vreg;
7146 	icode = d->icode;
7147 	arg0 = CALL_EXPR_ARG (exp, 0); /* source vreg */
7148 	arg1 = CALL_EXPR_ARG (exp, 1); /* [I]0-7 */
7149 	arg2 = CALL_EXPR_ARG (exp, 2); /* u8 */
7150 
7151 	src_vreg = expand_expr (arg0, NULL_RTX, V8HImode, EXPAND_NORMAL);
7152 	op0 = expand_expr (arg1, NULL_RTX, SImode, EXPAND_NORMAL);  /* [I]0-7 */
7153 	op1 = expand_expr (arg2, NULL_RTX, SImode, EXPAND_NORMAL);  /* u8 */
7154 	op2 = gen_rtx_REG (V8HImode, ARC_FIRST_SIMD_VR_REG);	    /* VR0 */
7155 
7156 	/* target <- src vreg */
7157 	emit_insn (gen_move_insn (target, src_vreg));
7158 
7159 	/* target <- vec_concat: target, mem(Ib, u8) */
7160 	mode0 =  insn_data[icode].operand[3].mode;
7161 	mode1 =  insn_data[icode].operand[1].mode;
7162 
7163 	if ( (!(*insn_data[icode].operand[3].predicate) (op0, mode0))
7164 	     || !(UNSIGNED_INT3 (INTVAL (op0))))
7165 	  error ("operand 1 of %s instruction should be an unsigned 3-bit value (I0-I7)",
7166 		 d->name);
7167 
7168 	if ( (!(*insn_data[icode].operand[1].predicate) (op1, mode1))
7169 	     || !(UNSIGNED_INT8 (INTVAL (op1))))
7170 	  error ("operand 2 of %s instruction should be an unsigned 8-bit value",
7171 		 d->name);
7172 
7173 	pat = GEN_FCN (icode) (target, op1, op2, op0);
7174 	if (! pat)
7175 	  return 0;
7176 
7177 	emit_insn (pat);
7178 	return target;
7179       }
7180 
7181     case void_Va_Ib_u8:
7182       icode = d->icode;
7183       arg0 = CALL_EXPR_ARG (exp, 0); /* src vreg */
7184       arg1 = CALL_EXPR_ARG (exp, 1); /* [I]0-7 */
7185       arg2 = CALL_EXPR_ARG (exp, 2); /* u8 */
7186 
7187       op0 = gen_rtx_REG (V8HImode, ARC_FIRST_SIMD_VR_REG);         /* VR0    */
7188       op1 = expand_expr (arg1, NULL_RTX, SImode, EXPAND_NORMAL);   /* I[0-7] */
7189       op2 = expand_expr (arg2, NULL_RTX, SImode, EXPAND_NORMAL);   /* u8     */
7190       op3 = expand_expr (arg0, NULL_RTX, V8HImode, EXPAND_NORMAL); /* Vdest  */
7191 
7192       mode0 =  insn_data[icode].operand[0].mode;
7193       mode1 =  insn_data[icode].operand[1].mode;
7194       mode2 =  insn_data[icode].operand[2].mode;
7195       mode3 =  insn_data[icode].operand[3].mode;
7196 
7197       if ( (!(*insn_data[icode].operand[1].predicate) (op1, mode1))
7198 	   || !(UNSIGNED_INT3 (INTVAL (op1))))
7199 	error ("operand 2 of %s instruction should be an unsigned 3-bit value (I0-I7)",
7200 	       d->name);
7201 
7202       if ( (!(*insn_data[icode].operand[2].predicate) (op2, mode2))
7203 	   || !(UNSIGNED_INT8 (INTVAL (op2))))
7204 	error ("operand 3 of %s instruction should be an unsigned 8-bit value",
7205 	       d->name);
7206 
7207       if (!(*insn_data[icode].operand[3].predicate) (op3, mode3))
7208 	op3 = copy_to_mode_reg (mode3, op3);
7209 
7210       pat = GEN_FCN (icode) (op0, op1, op2, op3);
7211       if (! pat)
7212 	return 0;
7213 
7214       emit_insn (pat);
7215       return NULL_RTX;
7216 
7217     case Va_Ib_u8:
7218       icode = d->icode;
7219       arg0 = CALL_EXPR_ARG (exp, 0); /* dest vreg */
7220       arg1 = CALL_EXPR_ARG (exp, 1); /* [I]0-7 */
7221 
7222       op0 = gen_rtx_REG (V8HImode, ARC_FIRST_SIMD_VR_REG);       /* VR0    */
7223       op1 = expand_expr (arg0, NULL_RTX, SImode, EXPAND_NORMAL); /* I[0-7] */
7224       op2 = expand_expr (arg1, NULL_RTX, SImode, EXPAND_NORMAL); /* u8     */
7225 
7226       /* target <- src vreg */
7227       target = gen_reg_rtx (V8HImode);
7228 
7229       /* target <- vec_concat: target, mem(Ib, u8) */
7230       mode0 =  insn_data[icode].operand[1].mode;
7231       mode1 =  insn_data[icode].operand[2].mode;
7232       mode2 =  insn_data[icode].operand[3].mode;
7233 
7234       if ( (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
7235 	   || !(UNSIGNED_INT3 (INTVAL (op1))))
7236 	error ("operand 1 of %s instruction should be an unsigned 3-bit value (I0-I7)",
7237 	       d->name);
7238 
7239       if ( (!(*insn_data[icode].operand[3].predicate) (op2, mode2))
7240 	   || !(UNSIGNED_INT8 (INTVAL (op2))))
7241 	error ("operand 2 of %s instruction should be an unsigned 8-bit value",
7242 	       d->name);
7243 
7244       pat = GEN_FCN (icode) (target, op0, op1, op2);
7245       if (! pat)
7246 	return 0;
7247 
7248       emit_insn (pat);
7249       return target;
7250 
7251     case void_Va_u3_Ib_u8:
7252       icode = d->icode;
7253       arg0 = CALL_EXPR_ARG (exp, 0); /* source vreg */
7254       arg1 = CALL_EXPR_ARG (exp, 1); /* u3 */
7255       arg2 = CALL_EXPR_ARG (exp, 2); /* [I]0-7 */
7256       arg3 = CALL_EXPR_ARG (exp, 3); /* u8 */
7257 
7258       op0 = expand_expr (arg3, NULL_RTX, SImode, EXPAND_NORMAL); /* u8        */
7259       op1 = gen_rtx_REG (V8HImode, ARC_FIRST_SIMD_VR_REG);       /* VR        */
7260       op2 = expand_expr (arg2, NULL_RTX, SImode, EXPAND_NORMAL); /* [I]0-7    */
7261       op3 = expand_expr (arg0, NULL_RTX, V8HImode, EXPAND_NORMAL);/* vreg to be stored */
7262       op4 = expand_expr (arg1, NULL_RTX, SImode, EXPAND_NORMAL);  /* vreg 0-7 subreg no. */
7263 
7264       mode0 =  insn_data[icode].operand[0].mode;
7265       mode2 =  insn_data[icode].operand[2].mode;
7266       mode3 =  insn_data[icode].operand[3].mode;
7267       mode4 =  insn_data[icode].operand[4].mode;
7268 
7269       /* Do some correctness checks for the operands.  */
7270       if ( (!(*insn_data[icode].operand[0].predicate) (op0, mode0))
7271 	   || !(UNSIGNED_INT8 (INTVAL (op0))))
7272 	error ("operand 4 of %s instruction should be an unsigned 8-bit value (0-255)",
7273 	       d->name);
7274 
7275       if ( (!(*insn_data[icode].operand[2].predicate) (op2, mode2))
7276 	   || !(UNSIGNED_INT3 (INTVAL (op2))))
7277 	error ("operand 3 of %s instruction should be an unsigned 3-bit value (I0-I7)",
7278 	       d->name);
7279 
7280       if (!(*insn_data[icode].operand[3].predicate) (op3, mode3))
7281 	op3 = copy_to_mode_reg (mode3, op3);
7282 
7283       if ( (!(*insn_data[icode].operand[4].predicate) (op4, mode4))
7284 	   || !(UNSIGNED_INT3 (INTVAL (op4))))
7285 	error ("operand 2 of %s instruction should be an unsigned 3-bit value (subreg 0-7)",
7286 	       d->name);
7287       else if (icode == CODE_FOR_vst32_n_insn
7288 	       && ((INTVAL(op4) % 2 ) != 0))
7289 	error ("operand 2 of %s instruction should be an even 3-bit value (subreg 0,2,4,6)",
7290 	       d->name);
7291 
7292       pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
7293       if (! pat)
7294 	return 0;
7295 
7296       emit_insn (pat);
7297       return NULL_RTX;
7298 
7299     default:
7300       gcc_unreachable ();
7301     }
7302   return NULL_RTX;
7303 }
7304 
7305 static bool
7306 arc_preserve_reload_p (rtx in)
7307 {
7308   return (GET_CODE (in) == PLUS
7309 	  && RTX_OK_FOR_BASE_P (XEXP (in, 0), true)
7310 	  && CONST_INT_P (XEXP (in, 1))
7311 	  && !((INTVAL (XEXP (in, 1)) & 511)));
7312 }
7313 
7314 int
7315 arc_register_move_cost (machine_mode,
7316 			enum reg_class from_class, enum reg_class to_class)
7317 {
7318   /* The ARC600 has no bypass for extension registers, hence a nop might be
7319      needed to be inserted after a write so that reads are safe.  */
7320   if (TARGET_ARC600)
7321     {
7322       if (to_class == MPY_WRITABLE_CORE_REGS)
7323 	return 3;
7324      /* Instructions modifying LP_COUNT need 4 additional cycles before
7325 	the register will actually contain the value.  */
7326       else if (to_class == LPCOUNT_REG)
7327 	return 6;
7328       else if (to_class == WRITABLE_CORE_REGS)
7329 	return 6;
7330     }
7331 
7332   /* The ARC700 stalls for 3 cycles when *reading* from lp_count.  */
7333   if (TARGET_ARC700
7334       && (from_class == LPCOUNT_REG || from_class == ALL_CORE_REGS
7335 	  || from_class == WRITABLE_CORE_REGS))
7336     return 8;
7337 
7338   /* Force an attempt to 'mov Dy,Dx' to spill.  */
7339   if (TARGET_ARC700 && TARGET_DPFP
7340       && from_class == DOUBLE_REGS && to_class == DOUBLE_REGS)
7341     return 100;
7342 
7343   return 2;
7344 }
7345 
7346 /* Emit code for an addsi3 instruction with OPERANDS.
7347    COND_P indicates if this will use conditional execution.
7348    Return the length of the instruction.
7349    If OUTPUT_P is false, don't actually output the instruction, just return
7350    its length.  */
7351 int
7352 arc_output_addsi (rtx *operands, bool cond_p, bool output_p)
7353 {
7354   char format[32];
7355 
7356   int match = operands_match_p (operands[0], operands[1]);
7357   int match2 = operands_match_p (operands[0], operands[2]);
7358   int intval = (REG_P (operands[2]) ? 1
7359 		: CONST_INT_P (operands[2]) ? INTVAL (operands[2]) : 0xbadc057);
7360   int neg_intval = -intval;
7361   int short_0 = satisfies_constraint_Rcq (operands[0]);
7362   int short_p = (!cond_p && short_0 && satisfies_constraint_Rcq (operands[1]));
7363   int ret = 0;
7364 
7365 #define ADDSI_OUTPUT1(FORMAT) do {\
7366   if (output_p) \
7367     output_asm_insn (FORMAT, operands);\
7368   return ret; \
7369 } while (0)
7370 #define ADDSI_OUTPUT(LIST) do {\
7371   if (output_p) \
7372     sprintf LIST;\
7373   ADDSI_OUTPUT1 (format);\
7374   return ret; \
7375 } while (0)
7376 
7377   /* First try to emit a 16 bit insn.  */
7378   ret = 2;
7379   if (!cond_p
7380       /* If we are actually about to output this insn, don't try a 16 bit
7381 	 variant if we already decided that we don't want that
7382 	 (I.e. we upsized this insn to align some following insn.)
7383 	 E.g. add_s r0,sp,70 is 16 bit, but add r0,sp,70 requires a LIMM -
7384 	 but add1 r0,sp,35 doesn't.  */
7385       && (!output_p || (get_attr_length (current_output_insn) & 2)))
7386     {
7387       if (short_p
7388 	  && (REG_P (operands[2])
7389 	      ? (match || satisfies_constraint_Rcq (operands[2]))
7390 	      : (unsigned) intval <= (match ? 127 : 7)))
7391 	ADDSI_OUTPUT1 ("add%? %0,%1,%2");
7392       if (short_0 && REG_P (operands[1]) && match2)
7393 	ADDSI_OUTPUT1 ("add%? %0,%2,%1");
7394       if ((short_0 || REGNO (operands[0]) == STACK_POINTER_REGNUM)
7395 	  && REGNO (operands[1]) == STACK_POINTER_REGNUM && !(intval & ~124))
7396 	ADDSI_OUTPUT1 ("add%? %0,%1,%2");
7397 
7398       if ((short_p && (unsigned) neg_intval <= (match ? 31 : 7))
7399 	  || (REGNO (operands[0]) == STACK_POINTER_REGNUM
7400 	      && match && !(neg_intval & ~124)))
7401 	ADDSI_OUTPUT1 ("sub%? %0,%1,%n2");
7402     }
7403 
7404   /* Now try to emit a 32 bit insn without long immediate.  */
7405   ret = 4;
7406   if (!match && match2 && REG_P (operands[1]))
7407     ADDSI_OUTPUT1 ("add%? %0,%2,%1");
7408   if (match || !cond_p)
7409     {
7410       int limit = (match && !cond_p) ? 0x7ff : 0x3f;
7411       int range_factor = neg_intval & intval;
7412       int shift;
7413 
7414       if (intval == -1 << 31)
7415 	ADDSI_OUTPUT1 ("bxor%? %0,%1,31");
7416 
7417       /* If we can use a straight add / sub instead of a {add,sub}[123] of
7418 	 same size, do, so - the insn latency is lower.  */
7419       /* -0x800 is a 12-bit constant for add /add3 / sub / sub3, but
7420 	 0x800 is not.  */
7421       if ((intval >= 0 && intval <= limit)
7422 	       || (intval == -0x800 && limit == 0x7ff))
7423 	ADDSI_OUTPUT1 ("add%? %0,%1,%2");
7424       else if ((intval < 0 && neg_intval <= limit)
7425 	       || (intval == 0x800 && limit == 0x7ff))
7426 	ADDSI_OUTPUT1 ("sub%? %0,%1,%n2");
7427       shift = range_factor >= 8 ? 3 : (range_factor >> 1);
7428       gcc_assert (shift == 0 || shift == 1 || shift == 2 || shift == 3);
7429       gcc_assert ((((1 << shift) - 1) & intval) == 0);
7430       if (((intval < 0 && intval != -0x4000)
7431 	   /* sub[123] is slower than add_s / sub, only use it if it
7432 	      avoids a long immediate.  */
7433 	   && neg_intval <= limit << shift)
7434 	  || (intval == 0x4000 && limit == 0x7ff))
7435 	ADDSI_OUTPUT ((format, "sub%d%%? %%0,%%1,%d",
7436 		       shift, neg_intval >> shift));
7437       else if ((intval >= 0 && intval <= limit << shift)
7438 	       || (intval == -0x4000 && limit == 0x7ff))
7439 	ADDSI_OUTPUT ((format, "add%d%%? %%0,%%1,%d", shift, intval >> shift));
7440     }
7441   /* Try to emit a 16 bit opcode with long immediate.  */
7442   ret = 6;
7443   if (short_p && match)
7444     ADDSI_OUTPUT1 ("add%? %0,%1,%S2");
7445 
7446   /* We have to use a 32 bit opcode, and with a long immediate.  */
7447   ret = 8;
7448   ADDSI_OUTPUT1 (intval < 0 ? "sub%? %0,%1,%n2" : "add%? %0,%1,%S2");
7449 }
7450 
7451 /* Emit code for an commutative_cond_exec instruction with OPERANDS.
7452    Return the length of the instruction.
7453    If OUTPUT_P is false, don't actually output the instruction, just return
7454    its length.  */
7455 int
7456 arc_output_commutative_cond_exec (rtx *operands, bool output_p)
7457 {
7458   enum rtx_code commutative_op = GET_CODE (operands[3]);
7459   const char *pat = NULL;
7460 
7461   /* Canonical rtl should not have a constant in the first operand position.  */
7462   gcc_assert (!CONSTANT_P (operands[1]));
7463 
7464   switch (commutative_op)
7465     {
7466       case AND:
7467 	if (satisfies_constraint_C1p (operands[2]))
7468 	  pat = "bmsk%? %0,%1,%Z2";
7469 	else if (satisfies_constraint_Ccp (operands[2]))
7470 	  pat = "bclr%? %0,%1,%M2";
7471 	else if (satisfies_constraint_CnL (operands[2]))
7472 	  pat = "bic%? %0,%1,%n2-1";
7473 	break;
7474       case IOR:
7475 	if (satisfies_constraint_C0p (operands[2]))
7476 	  pat = "bset%? %0,%1,%z2";
7477 	break;
7478       case XOR:
7479 	if (satisfies_constraint_C0p (operands[2]))
7480 	  pat = "bxor%? %0,%1,%z2";
7481 	break;
7482       case PLUS:
7483 	return arc_output_addsi (operands, true, output_p);
7484       default: break;
7485     }
7486   if (output_p)
7487     output_asm_insn (pat ? pat : "%O3.%d5 %0,%1,%2", operands);
7488   if (pat || REG_P (operands[2]) || satisfies_constraint_L (operands[2]))
7489     return 4;
7490   return 8;
7491 }
7492 
7493 /* Helper function of arc_expand_movmem.  ADDR points to a chunk of memory.
7494    Emit code and return an potentially modified address such that offsets
7495    up to SIZE are can be added to yield a legitimate address.
7496    if REUSE is set, ADDR is a register that may be modified.  */
7497 
7498 static rtx
7499 force_offsettable (rtx addr, HOST_WIDE_INT size, bool reuse)
7500 {
7501   rtx base = addr;
7502   rtx offs = const0_rtx;
7503 
7504   if (GET_CODE (base) == PLUS)
7505     {
7506       offs = XEXP (base, 1);
7507       base = XEXP (base, 0);
7508     }
7509   if (!REG_P (base)
7510       || (REGNO (base) != STACK_POINTER_REGNUM
7511 	  && REGNO_PTR_FRAME_P (REGNO (addr)))
7512       || !CONST_INT_P (offs) || !SMALL_INT (INTVAL (offs))
7513       || !SMALL_INT (INTVAL (offs) + size))
7514     {
7515       if (reuse)
7516 	emit_insn (gen_add2_insn (addr, offs));
7517       else
7518 	addr = copy_to_mode_reg (Pmode, addr);
7519     }
7520   return addr;
7521 }
7522 
7523 /* Like move_by_pieces, but take account of load latency,
7524    and actual offset ranges.
7525    Return true on success.  */
7526 
7527 bool
7528 arc_expand_movmem (rtx *operands)
7529 {
7530   rtx dst = operands[0];
7531   rtx src = operands[1];
7532   rtx dst_addr, src_addr;
7533   HOST_WIDE_INT size;
7534   int align = INTVAL (operands[3]);
7535   unsigned n_pieces;
7536   int piece = align;
7537   rtx store[2];
7538   rtx tmpx[2];
7539   int i;
7540 
7541   if (!CONST_INT_P (operands[2]))
7542     return false;
7543   size = INTVAL (operands[2]);
7544   /* move_by_pieces_ninsns is static, so we can't use it.  */
7545   if (align >= 4)
7546     n_pieces = (size + 2) / 4U + (size & 1);
7547   else if (align == 2)
7548     n_pieces = (size + 1) / 2U;
7549   else
7550     n_pieces = size;
7551   if (n_pieces >= (unsigned int) (optimize_size ? 3 : 15))
7552     return false;
7553   if (piece > 4)
7554     piece = 4;
7555   dst_addr = force_offsettable (XEXP (operands[0], 0), size, 0);
7556   src_addr = force_offsettable (XEXP (operands[1], 0), size, 0);
7557   store[0] = store[1] = NULL_RTX;
7558   tmpx[0] = tmpx[1] = NULL_RTX;
7559   for (i = 0; size > 0; i ^= 1, size -= piece)
7560     {
7561       rtx tmp;
7562       machine_mode mode;
7563 
7564       if (piece > size)
7565 	piece = size & -size;
7566       mode = smallest_mode_for_size (piece * BITS_PER_UNIT, MODE_INT);
7567       /* If we don't re-use temporaries, the scheduler gets carried away,
7568 	 and the register pressure gets unnecessarily high.  */
7569       if (0 && tmpx[i] && GET_MODE (tmpx[i]) == mode)
7570 	tmp = tmpx[i];
7571       else
7572 	tmpx[i] = tmp = gen_reg_rtx (mode);
7573       dst_addr = force_offsettable (dst_addr, piece, 1);
7574       src_addr = force_offsettable (src_addr, piece, 1);
7575       if (store[i])
7576 	emit_insn (store[i]);
7577       emit_move_insn (tmp, change_address (src, mode, src_addr));
7578       store[i] = gen_move_insn (change_address (dst, mode, dst_addr), tmp);
7579       dst_addr = plus_constant (Pmode, dst_addr, piece);
7580       src_addr = plus_constant (Pmode, src_addr, piece);
7581     }
7582   if (store[i])
7583     emit_insn (store[i]);
7584   if (store[i^1])
7585     emit_insn (store[i^1]);
7586   return true;
7587 }
7588 
7589 /* Prepare operands for move in MODE.  Return true iff the move has
7590    been emitted.  */
7591 
7592 bool
7593 prepare_move_operands (rtx *operands, machine_mode mode)
7594 {
7595   /* We used to do this only for MODE_INT Modes, but addresses to floating
7596      point variables may well be in the small data section.  */
7597   if (1)
7598     {
7599       if (!TARGET_NO_SDATA_SET && small_data_pattern (operands[0], Pmode))
7600 	operands[0] = arc_rewrite_small_data (operands[0]);
7601       else if (mode == SImode && flag_pic && SYMBOLIC_CONST (operands[1]))
7602 	{
7603 	  emit_pic_move (operands, SImode);
7604 
7605 	  /* Disable any REG_EQUALs associated with the symref
7606 	     otherwise the optimization pass undoes the work done
7607 	     here and references the variable directly.  */
7608 	}
7609       else if (GET_CODE (operands[0]) != MEM
7610 	       && !TARGET_NO_SDATA_SET
7611 	       && small_data_pattern (operands[1], Pmode))
7612        {
7613 	  /* This is to take care of address calculations involving sdata
7614 	     variables.  */
7615 	  operands[1] = arc_rewrite_small_data (operands[1]);
7616 
7617 	  emit_insn (gen_rtx_SET (mode, operands[0],operands[1]));
7618 	  /* ??? This note is useless, since it only restates the set itself.
7619 	     We should rather use the original SYMBOL_REF.  However, there is
7620 	     the problem that we are lying to the compiler about these
7621 	     SYMBOL_REFs to start with.  symbol@sda should be encoded specially
7622 	     so that we can tell it apart from an actual symbol.  */
7623 	  set_unique_reg_note (get_last_insn (), REG_EQUAL, operands[1]);
7624 
7625 	  /* Take care of the REG_EQUAL note that will be attached to mark the
7626 	     output reg equal to the initial symbol_ref after this code is
7627 	     executed.  */
7628 	  emit_move_insn (operands[0], operands[0]);
7629 	  return true;
7630 	}
7631     }
7632 
7633   if (MEM_P (operands[0])
7634       && !(reload_in_progress || reload_completed))
7635     {
7636       operands[1] = force_reg (mode, operands[1]);
7637       if (!move_dest_operand (operands[0], mode))
7638 	{
7639 	  rtx addr = copy_to_mode_reg (Pmode, XEXP (operands[0], 0));
7640 	  /* This is like change_address_1 (operands[0], mode, 0, 1) ,
7641 	     except that we can't use that function because it is static.  */
7642 	  rtx pat = change_address (operands[0], mode, addr);
7643 	  MEM_COPY_ATTRIBUTES (pat, operands[0]);
7644 	  operands[0] = pat;
7645 	}
7646       if (!cse_not_expected)
7647 	{
7648 	  rtx pat = XEXP (operands[0], 0);
7649 
7650 	  pat = arc_legitimize_address_0 (pat, pat, mode);
7651 	  if (pat)
7652 	    {
7653 	      pat = change_address (operands[0], mode, pat);
7654 	      MEM_COPY_ATTRIBUTES (pat, operands[0]);
7655 	      operands[0] = pat;
7656 	    }
7657 	}
7658     }
7659 
7660   if (MEM_P (operands[1]) && !cse_not_expected)
7661     {
7662       rtx pat = XEXP (operands[1], 0);
7663 
7664       pat = arc_legitimize_address_0 (pat, pat, mode);
7665       if (pat)
7666 	{
7667 	  pat = change_address (operands[1], mode, pat);
7668 	  MEM_COPY_ATTRIBUTES (pat, operands[1]);
7669 	  operands[1] = pat;
7670 	}
7671     }
7672 
7673   return false;
7674 }
7675 
7676 /* Prepare OPERANDS for an extension using CODE to OMODE.
7677    Return true iff the move has been emitted.  */
7678 
7679 bool
7680 prepare_extend_operands (rtx *operands, enum rtx_code code,
7681 			 machine_mode omode)
7682 {
7683   if (!TARGET_NO_SDATA_SET && small_data_pattern (operands[1], Pmode))
7684     {
7685       /* This is to take care of address calculations involving sdata
7686 	 variables.  */
7687       operands[1]
7688 	= gen_rtx_fmt_e (code, omode, arc_rewrite_small_data (operands[1]));
7689       emit_insn (gen_rtx_SET (omode, operands[0], operands[1]));
7690       set_unique_reg_note (get_last_insn (), REG_EQUAL, operands[1]);
7691 
7692       /* Take care of the REG_EQUAL note that will be attached to mark the
7693 	 output reg equal to the initial extension after this code is
7694 	 executed.  */
7695       emit_move_insn (operands[0], operands[0]);
7696       return true;
7697     }
7698   return false;
7699 }
7700 
7701 /* Output a library call to a function called FNAME that has been arranged
7702    to be local to any dso.  */
7703 
7704 const char *
7705 arc_output_libcall (const char *fname)
7706 {
7707   unsigned len = strlen (fname);
7708   static char buf[64];
7709 
7710   gcc_assert (len < sizeof buf - 35);
7711   if (TARGET_LONG_CALLS_SET
7712      || (TARGET_MEDIUM_CALLS && arc_ccfsm_cond_exec_p ()))
7713     {
7714       if (flag_pic)
7715 	sprintf (buf, "add r12,pcl,@%s-(.&-4)\n\tjl%%!%%* [r12]", fname);
7716       else
7717 	sprintf (buf, "jl%%! @%s", fname);
7718     }
7719   else
7720     sprintf (buf, "bl%%!%%* @%s", fname);
7721   return buf;
7722 }
7723 
7724 /* Return the SImode highpart of the DImode value IN.  */
7725 
7726 rtx
7727 disi_highpart (rtx in)
7728 {
7729   return simplify_gen_subreg (SImode, in, DImode, TARGET_BIG_ENDIAN ? 0 : 4);
7730 }
7731 
7732 /* Return length adjustment for INSN.
7733    For ARC600:
7734    A write to a core reg greater or equal to 32 must not be immediately
7735    followed by a use.  Anticipate the length requirement to insert a nop
7736    between PRED and SUCC to prevent a hazard.  */
7737 
7738 static int
7739 arc600_corereg_hazard (rtx_insn *pred, rtx_insn *succ)
7740 {
7741   if (!TARGET_ARC600)
7742     return 0;
7743   /* If SUCC is a doloop_end_i with a preceding label, we must output a nop
7744      in front of SUCC anyway, so there will be separation between PRED and
7745      SUCC.  */
7746   if (recog_memoized (succ) == CODE_FOR_doloop_end_i
7747       && LABEL_P (prev_nonnote_insn (succ)))
7748     return 0;
7749   if (recog_memoized (succ) == CODE_FOR_doloop_begin_i)
7750     return 0;
7751   if (GET_CODE (PATTERN (pred)) == SEQUENCE)
7752     pred = as_a <rtx_sequence *> (PATTERN (pred))->insn (1);
7753   if (GET_CODE (PATTERN (succ)) == SEQUENCE)
7754     succ = as_a <rtx_sequence *> (PATTERN (succ))->insn (0);
7755   if (recog_memoized (pred) == CODE_FOR_mulsi_600
7756       || recog_memoized (pred) == CODE_FOR_umul_600
7757       || recog_memoized (pred) == CODE_FOR_mac_600
7758       || recog_memoized (pred) == CODE_FOR_mul64_600
7759       || recog_memoized (pred) == CODE_FOR_mac64_600
7760       || recog_memoized (pred) == CODE_FOR_umul64_600
7761       || recog_memoized (pred) == CODE_FOR_umac64_600)
7762     return 0;
7763   subrtx_iterator::array_type array;
7764   FOR_EACH_SUBRTX (iter, array, PATTERN (pred), NONCONST)
7765     {
7766       const_rtx x = *iter;
7767       switch (GET_CODE (x))
7768 	{
7769 	case SET: case POST_INC: case POST_DEC: case PRE_INC: case PRE_DEC:
7770 	  break;
7771 	default:
7772 	  /* This is also fine for PRE/POST_MODIFY, because they
7773 	     contain a SET.  */
7774 	  continue;
7775 	}
7776       rtx dest = XEXP (x, 0);
7777       /* Check if this sets a an extension register.  N.B. we use 61 for the
7778 	 condition codes, which is definitely not an extension register.  */
7779       if (REG_P (dest) && REGNO (dest) >= 32 && REGNO (dest) < 61
7780 	  /* Check if the same register is used by the PAT.  */
7781 	  && (refers_to_regno_p
7782 	      (REGNO (dest),
7783 	       REGNO (dest) + (GET_MODE_SIZE (GET_MODE (dest)) + 3) / 4U,
7784 	       PATTERN (succ), 0)))
7785 	return 4;
7786     }
7787   return 0;
7788 }
7789 
7790 /* For ARC600:
7791    A write to a core reg greater or equal to 32 must not be immediately
7792    followed by a use.  Anticipate the length requirement to insert a nop
7793    between PRED and SUCC to prevent a hazard.  */
7794 
7795 int
7796 arc_hazard (rtx_insn *pred, rtx_insn *succ)
7797 {
7798   if (!TARGET_ARC600)
7799     return 0;
7800   if (!pred || !INSN_P (pred) || !succ || !INSN_P (succ))
7801     return 0;
7802   /* We might have a CALL to a non-returning function before a loop end.
7803      ??? Although the manual says that's OK (the target is outside the loop,
7804      and the loop counter unused there), the assembler barfs on this, so we
7805      must instert a nop before such a call too.  */
7806   if (recog_memoized (succ) == CODE_FOR_doloop_end_i
7807       && (JUMP_P (pred) || CALL_P (pred)
7808 	  || GET_CODE (PATTERN (pred)) == SEQUENCE))
7809     return 4;
7810   return arc600_corereg_hazard (pred, succ);
7811 }
7812 
7813 /* Return length adjustment for INSN.  */
7814 
7815 int
7816 arc_adjust_insn_length (rtx_insn *insn, int len, bool)
7817 {
7818   if (!INSN_P (insn))
7819     return len;
7820   /* We already handle sequences by ignoring the delay sequence flag.  */
7821   if (GET_CODE (PATTERN (insn)) == SEQUENCE)
7822     return len;
7823 
7824   /* It is impossible to jump to the very end of a Zero-Overhead Loop, as
7825      the ZOL mechanism only triggers when advancing to the end address,
7826      so if there's a label at the end of a ZOL, we need to insert a nop.
7827      The ARC600 ZOL also has extra restrictions on jumps at the end of a
7828      loop.  */
7829   if (recog_memoized (insn) == CODE_FOR_doloop_end_i)
7830     {
7831       rtx_insn *prev = prev_nonnote_insn (insn);
7832 
7833       return ((LABEL_P (prev)
7834 	       || (TARGET_ARC600
7835 		   && (JUMP_P (prev)
7836 		       || CALL_P (prev) /* Could be a noreturn call.  */
7837 		       || (NONJUMP_INSN_P (prev)
7838 			   && GET_CODE (PATTERN (prev)) == SEQUENCE))))
7839 	      ? len + 4 : len);
7840     }
7841 
7842   /* Check for return with but one preceding insn since function
7843      start / call.  */
7844   if (TARGET_PAD_RETURN
7845       && JUMP_P (insn)
7846       && GET_CODE (PATTERN (insn)) != ADDR_VEC
7847       && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
7848       && get_attr_type (insn) == TYPE_RETURN)
7849     {
7850       rtx_insn *prev = prev_active_insn (insn);
7851 
7852       if (!prev || !(prev = prev_active_insn (prev))
7853 	  || ((NONJUMP_INSN_P (prev)
7854 	       && GET_CODE (PATTERN (prev)) == SEQUENCE)
7855 	      ? CALL_ATTR (as_a <rtx_sequence *> (PATTERN (prev))->insn (0),
7856 			   NON_SIBCALL)
7857 	      : CALL_ATTR (prev, NON_SIBCALL)))
7858 	return len + 4;
7859     }
7860   if (TARGET_ARC600)
7861     {
7862       rtx_insn *succ = next_real_insn (insn);
7863 
7864       /* One the ARC600, a write to an extension register must be separated
7865 	 from a read.  */
7866       if (succ && INSN_P (succ))
7867 	len += arc600_corereg_hazard (insn, succ);
7868     }
7869 
7870   /* Restore extracted operands - otherwise splitters like the addsi3_mixed one
7871      can go awry.  */
7872   extract_constrain_insn_cached (insn);
7873 
7874   return len;
7875 }
7876 
7877 /* Values for length_sensitive.  */
7878 enum
7879 {
7880   ARC_LS_NONE,// Jcc
7881   ARC_LS_25, // 25 bit offset, B
7882   ARC_LS_21, // 21 bit offset, Bcc
7883   ARC_LS_U13,// 13 bit unsigned offset, LP
7884   ARC_LS_10, // 10 bit offset, B_s, Beq_s, Bne_s
7885   ARC_LS_9,  //  9 bit offset, BRcc
7886   ARC_LS_8,  //  8 bit offset, BRcc_s
7887   ARC_LS_U7, //  7 bit unsigned offset, LPcc
7888   ARC_LS_7   //  7 bit offset, Bcc_s
7889 };
7890 
7891 /* While the infrastructure patch is waiting for review, duplicate the
7892    struct definitions, to allow this file to compile.  */
7893 #if 1
7894 typedef struct
7895 {
7896   unsigned align_set;
7897   /* Cost as a branch / call target or call return address.  */
7898   int target_cost;
7899   int fallthrough_cost;
7900   int branch_cost;
7901   int length;
7902   /* 0 for not length sensitive, 1 for largest offset range,
7903  *      2 for next smaller etc.  */
7904   unsigned length_sensitive : 8;
7905   bool enabled;
7906 } insn_length_variant_t;
7907 
7908 typedef struct insn_length_parameters_s
7909 {
7910   int align_unit_log;
7911   int align_base_log;
7912   int max_variants;
7913   int (*get_variants) (rtx_insn *, int, bool, bool, insn_length_variant_t *);
7914 } insn_length_parameters_t;
7915 
7916 static void
7917 arc_insn_length_parameters (insn_length_parameters_t *ilp) ATTRIBUTE_UNUSED;
7918 #endif
7919 
7920 static int
7921 arc_get_insn_variants (rtx_insn *insn, int len, bool, bool target_p,
7922 		       insn_length_variant_t *ilv)
7923 {
7924   if (!NONDEBUG_INSN_P (insn))
7925     return 0;
7926   enum attr_type type;
7927   /* shorten_branches doesn't take optimize_size into account yet for the
7928      get_variants mechanism, so turn this off for now.  */
7929   if (optimize_size)
7930     return 0;
7931   if (rtx_sequence *pat = dyn_cast <rtx_sequence *> (PATTERN (insn)))
7932     {
7933       /* The interaction of a short delay slot insn with a short branch is
7934 	 too weird for shorten_branches to piece together, so describe the
7935 	 entire SEQUENCE.  */
7936       rtx_insn *inner;
7937       if (TARGET_UPSIZE_DBR
7938 	  && get_attr_length (pat->insn (1)) <= 2
7939 	  && (((type = get_attr_type (inner = pat->insn (0)))
7940 	       == TYPE_UNCOND_BRANCH)
7941 	      || type == TYPE_BRANCH)
7942 	  && get_attr_delay_slot_filled (inner) == DELAY_SLOT_FILLED_YES)
7943 	{
7944 	  int n_variants
7945 	    = arc_get_insn_variants (inner, get_attr_length (inner), true,
7946 				     target_p, ilv+1);
7947 	  /* The short variant gets split into a higher-cost aligned
7948 	     and a lower cost unaligned variant.  */
7949 	  gcc_assert (n_variants);
7950 	  gcc_assert (ilv[1].length_sensitive == ARC_LS_7
7951 		      || ilv[1].length_sensitive == ARC_LS_10);
7952 	  gcc_assert (ilv[1].align_set == 3);
7953 	  ilv[0] = ilv[1];
7954 	  ilv[0].align_set = 1;
7955 	  ilv[0].branch_cost += 1;
7956 	  ilv[1].align_set = 2;
7957 	  n_variants++;
7958 	  for (int i = 0; i < n_variants; i++)
7959 	    ilv[i].length += 2;
7960 	  /* In case an instruction with aligned size is wanted, and
7961 	     the short variants are unavailable / too expensive, add
7962 	     versions of long branch + long delay slot.  */
7963 	  for (int i = 2, end = n_variants; i < end; i++, n_variants++)
7964 	    {
7965 	      ilv[n_variants] = ilv[i];
7966 	      ilv[n_variants].length += 2;
7967 	    }
7968 	  return n_variants;
7969 	}
7970       return 0;
7971     }
7972   insn_length_variant_t *first_ilv = ilv;
7973   type = get_attr_type (insn);
7974   bool delay_filled
7975     = (get_attr_delay_slot_filled (insn) == DELAY_SLOT_FILLED_YES);
7976   int branch_align_cost = delay_filled ? 0 : 1;
7977   int branch_unalign_cost = delay_filled ? 0 : TARGET_UNALIGN_BRANCH ? 0 : 1;
7978   /* If the previous instruction is an sfunc call, this insn is always
7979      a target, even though the middle-end is unaware of this.  */
7980   bool force_target = false;
7981   rtx_insn *prev = prev_active_insn (insn);
7982   if (prev && arc_next_active_insn (prev, 0) == insn
7983       && ((NONJUMP_INSN_P (prev) && GET_CODE (PATTERN (prev)) == SEQUENCE)
7984 	  ? CALL_ATTR (as_a <rtx_sequence *> (PATTERN (prev))->insn (0),
7985 		       NON_SIBCALL)
7986 	  : (CALL_ATTR (prev, NON_SIBCALL)
7987 	     && NEXT_INSN (PREV_INSN (prev)) == prev)))
7988     force_target = true;
7989 
7990   switch (type)
7991     {
7992     case TYPE_BRCC:
7993       /* Short BRCC only comes in no-delay-slot version, and without limm  */
7994       if (!delay_filled)
7995 	{
7996 	  ilv->align_set = 3;
7997 	  ilv->length = 2;
7998 	  ilv->branch_cost = 1;
7999 	  ilv->enabled = (len == 2);
8000 	  ilv->length_sensitive = ARC_LS_8;
8001 	  ilv++;
8002 	}
8003       /* Fall through.  */
8004     case TYPE_BRCC_NO_DELAY_SLOT:
8005       /* doloop_fallback* patterns are TYPE_BRCC_NO_DELAY_SLOT for
8006 	 (delay slot) scheduling purposes, but they are longer.  */
8007       if (GET_CODE (PATTERN (insn)) == PARALLEL
8008 	  && GET_CODE (XVECEXP (PATTERN (insn), 0, 1)) == SET)
8009 	return 0;
8010       /* Standard BRCC: 4 bytes, or 8 bytes with limm.  */
8011       ilv->length = ((type == TYPE_BRCC) ? 4 : 8);
8012       ilv->align_set = 3;
8013       ilv->branch_cost = branch_align_cost;
8014       ilv->enabled = (len <= ilv->length);
8015       ilv->length_sensitive = ARC_LS_9;
8016       if ((target_p || force_target)
8017 	  || (!delay_filled && TARGET_UNALIGN_BRANCH))
8018 	{
8019 	  ilv[1] = *ilv;
8020 	  ilv->align_set = 1;
8021 	  ilv++;
8022 	  ilv->align_set = 2;
8023 	  ilv->target_cost = 1;
8024 	  ilv->branch_cost = branch_unalign_cost;
8025 	}
8026       ilv++;
8027 
8028       rtx op, op0;
8029       op = XEXP (SET_SRC (XVECEXP (PATTERN (insn), 0, 0)), 0);
8030       op0 = XEXP (op, 0);
8031 
8032       if (GET_CODE (op0) == ZERO_EXTRACT
8033 	  && satisfies_constraint_L (XEXP (op0, 2)))
8034 	op0 = XEXP (op0, 0);
8035       if (satisfies_constraint_Rcq (op0))
8036 	{
8037 	  ilv->length = ((type == TYPE_BRCC) ? 6 : 10);
8038 	  ilv->align_set = 3;
8039 	  ilv->branch_cost = 1 + branch_align_cost;
8040 	  ilv->fallthrough_cost = 1;
8041 	  ilv->enabled = true;
8042 	  ilv->length_sensitive = ARC_LS_21;
8043 	  if (!delay_filled && TARGET_UNALIGN_BRANCH)
8044 	    {
8045 	      ilv[1] = *ilv;
8046 	      ilv->align_set = 1;
8047 	      ilv++;
8048 	      ilv->align_set = 2;
8049 	      ilv->branch_cost = 1 + branch_unalign_cost;
8050 	    }
8051 	  ilv++;
8052 	}
8053       ilv->length = ((type == TYPE_BRCC) ? 8 : 12);
8054       ilv->align_set = 3;
8055       ilv->branch_cost = 1 + branch_align_cost;
8056       ilv->fallthrough_cost = 1;
8057       ilv->enabled = true;
8058       ilv->length_sensitive = ARC_LS_21;
8059       if ((target_p || force_target)
8060 	  || (!delay_filled && TARGET_UNALIGN_BRANCH))
8061 	{
8062 	  ilv[1] = *ilv;
8063 	  ilv->align_set = 1;
8064 	  ilv++;
8065 	  ilv->align_set = 2;
8066 	  ilv->target_cost = 1;
8067 	  ilv->branch_cost = 1 + branch_unalign_cost;
8068 	}
8069       ilv++;
8070       break;
8071 
8072     case TYPE_SFUNC:
8073       ilv->length = 12;
8074       goto do_call;
8075     case TYPE_CALL_NO_DELAY_SLOT:
8076       ilv->length = 8;
8077       goto do_call;
8078     case TYPE_CALL:
8079       ilv->length = 4;
8080       ilv->length_sensitive
8081 	= GET_CODE (PATTERN (insn)) == COND_EXEC ? ARC_LS_21 : ARC_LS_25;
8082     do_call:
8083       ilv->align_set = 3;
8084       ilv->fallthrough_cost = branch_align_cost;
8085       ilv->enabled = true;
8086       if ((target_p || force_target)
8087 	  || (!delay_filled && TARGET_UNALIGN_BRANCH))
8088 	{
8089 	  ilv[1] = *ilv;
8090 	  ilv->align_set = 1;
8091 	  ilv++;
8092 	  ilv->align_set = 2;
8093 	  ilv->target_cost = 1;
8094 	  ilv->fallthrough_cost = branch_unalign_cost;
8095 	}
8096       ilv++;
8097       break;
8098     case TYPE_UNCOND_BRANCH:
8099       /* Strictly speaking, this should be ARC_LS_10 for equality comparisons,
8100 	 but that makes no difference at the moment.  */
8101       ilv->length_sensitive = ARC_LS_7;
8102       ilv[1].length_sensitive = ARC_LS_25;
8103       goto do_branch;
8104     case TYPE_BRANCH:
8105       ilv->length_sensitive = ARC_LS_10;
8106       ilv[1].length_sensitive = ARC_LS_21;
8107     do_branch:
8108       ilv->align_set = 3;
8109       ilv->length = 2;
8110       ilv->branch_cost = branch_align_cost;
8111       ilv->enabled = (len == ilv->length);
8112       ilv++;
8113       ilv->length = 4;
8114       ilv->align_set = 3;
8115       ilv->branch_cost = branch_align_cost;
8116       ilv->enabled = true;
8117       if ((target_p || force_target)
8118 	  || (!delay_filled && TARGET_UNALIGN_BRANCH))
8119 	{
8120 	  ilv[1] = *ilv;
8121 	  ilv->align_set = 1;
8122 	  ilv++;
8123 	  ilv->align_set = 2;
8124 	  ilv->target_cost = 1;
8125 	  ilv->branch_cost = branch_unalign_cost;
8126 	}
8127       ilv++;
8128       break;
8129     case TYPE_JUMP:
8130       return 0;
8131     default:
8132       /* For every short insn, there is generally also a long insn.
8133 	 trap_s is an exception.  */
8134       if ((len & 2) == 0 || recog_memoized (insn) == CODE_FOR_trap_s)
8135 	return 0;
8136       ilv->align_set = 3;
8137       ilv->length = len;
8138       ilv->enabled = 1;
8139       ilv++;
8140       ilv->align_set = 3;
8141       ilv->length = len + 2;
8142       ilv->enabled = 1;
8143       if (target_p || force_target)
8144 	{
8145 	  ilv[1] = *ilv;
8146 	  ilv->align_set = 1;
8147 	  ilv++;
8148 	  ilv->align_set = 2;
8149 	  ilv->target_cost = 1;
8150 	}
8151       ilv++;
8152     }
8153   /* If the previous instruction is an sfunc call, this insn is always
8154      a target, even though the middle-end is unaware of this.
8155      Therefore, if we have a call predecessor, transfer the target cost
8156      to the fallthrough and branch costs.  */
8157   if (force_target)
8158     {
8159       for (insn_length_variant_t *p = first_ilv; p < ilv; p++)
8160 	{
8161 	  p->fallthrough_cost += p->target_cost;
8162 	  p->branch_cost += p->target_cost;
8163 	  p->target_cost = 0;
8164 	}
8165     }
8166 
8167   return ilv - first_ilv;
8168 }
8169 
8170 static void
8171 arc_insn_length_parameters (insn_length_parameters_t *ilp)
8172 {
8173   ilp->align_unit_log = 1;
8174   ilp->align_base_log = 1;
8175   ilp->max_variants = 7;
8176   ilp->get_variants = arc_get_insn_variants;
8177 }
8178 
8179 /* Return a copy of COND from *STATEP, inverted if that is indicated by the
8180    CC field of *STATEP.  */
8181 
8182 static rtx
8183 arc_get_ccfsm_cond (struct arc_ccfsm *statep, bool reverse)
8184 {
8185   rtx cond = statep->cond;
8186   int raw_cc = get_arc_condition_code (cond);
8187   if (reverse)
8188     raw_cc = ARC_INVERSE_CONDITION_CODE (raw_cc);
8189 
8190   if (statep->cc == raw_cc)
8191     return copy_rtx (cond);
8192 
8193   gcc_assert (ARC_INVERSE_CONDITION_CODE (raw_cc) == statep->cc);
8194 
8195   machine_mode ccm = GET_MODE (XEXP (cond, 0));
8196   enum rtx_code code = reverse_condition (GET_CODE (cond));
8197   if (code == UNKNOWN || ccm == CC_FP_GTmode || ccm == CC_FP_GEmode)
8198     code = reverse_condition_maybe_unordered (GET_CODE (cond));
8199 
8200   return gen_rtx_fmt_ee (code, GET_MODE (cond),
8201 			 copy_rtx (XEXP (cond, 0)), copy_rtx (XEXP (cond, 1)));
8202 }
8203 
8204 /* Return version of PAT conditionalized with COND, which is part of INSN.
8205    ANNULLED indicates if INSN is an annulled delay-slot insn.
8206    Register further changes if necessary.  */
8207 static rtx
8208 conditionalize_nonjump (rtx pat, rtx cond, rtx insn, bool annulled)
8209 {
8210   /* For commutative operators, we generally prefer to have
8211      the first source match the destination.  */
8212   if (GET_CODE (pat) == SET)
8213     {
8214       rtx src = SET_SRC (pat);
8215 
8216       if (COMMUTATIVE_P (src))
8217 	{
8218 	  rtx src0 = XEXP (src, 0);
8219 	  rtx src1 = XEXP (src, 1);
8220 	  rtx dst = SET_DEST (pat);
8221 
8222 	  if (rtx_equal_p (src1, dst) && !rtx_equal_p (src0, dst)
8223 	      /* Leave add_n alone - the canonical form is to
8224 		 have the complex summand first.  */
8225 	      && REG_P (src0))
8226 	    pat = gen_rtx_SET (VOIDmode, dst,
8227 			       gen_rtx_fmt_ee (GET_CODE (src), GET_MODE (src),
8228 					       src1, src0));
8229 	}
8230     }
8231 
8232   /* dwarf2out.c:dwarf2out_frame_debug_expr doesn't know
8233      what to do with COND_EXEC.  */
8234   if (RTX_FRAME_RELATED_P (insn))
8235     {
8236       /* If this is the delay slot insn of an anulled branch,
8237 	 dwarf2out.c:scan_trace understands the anulling semantics
8238 	 without the COND_EXEC.  */
8239       gcc_assert (annulled);
8240       rtx note = alloc_reg_note (REG_FRAME_RELATED_EXPR, pat,
8241 				 REG_NOTES (insn));
8242       validate_change (insn, &REG_NOTES (insn), note, 1);
8243     }
8244   pat = gen_rtx_COND_EXEC (VOIDmode, cond, pat);
8245   return pat;
8246 }
8247 
8248 /* Use the ccfsm machinery to do if conversion.  */
8249 
8250 static unsigned
8251 arc_ifcvt (void)
8252 {
8253   struct arc_ccfsm *statep = &cfun->machine->ccfsm_current;
8254   basic_block merge_bb = 0;
8255 
8256   memset (statep, 0, sizeof *statep);
8257   for (rtx_insn *insn = get_insns (); insn; insn = next_insn (insn))
8258     {
8259       arc_ccfsm_advance (insn, statep);
8260 
8261       switch (statep->state)
8262 	{
8263 	case 0:
8264 	  if (JUMP_P (insn))
8265 	    merge_bb = 0;
8266 	  break;
8267 	case 1: case 2:
8268 	  {
8269 	    /* Deleted branch.  */
8270 	    gcc_assert (!merge_bb);
8271 	    merge_bb = BLOCK_FOR_INSN (insn);
8272 	    basic_block succ_bb
8273 	      = BLOCK_FOR_INSN (NEXT_INSN (NEXT_INSN (PREV_INSN (insn))));
8274 	    arc_ccfsm_post_advance (insn, statep);
8275 	    gcc_assert (!IN_RANGE (statep->state, 1, 2));
8276 	    rtx_insn *seq = NEXT_INSN (PREV_INSN (insn));
8277 	    if (seq != insn)
8278 	      {
8279 		rtx slot = XVECEXP (PATTERN (seq), 0, 1);
8280 		rtx pat = PATTERN (slot);
8281 		if (INSN_ANNULLED_BRANCH_P (insn))
8282 		  {
8283 		    rtx cond
8284 		      = arc_get_ccfsm_cond (statep, INSN_FROM_TARGET_P (slot));
8285 		    pat = gen_rtx_COND_EXEC (VOIDmode, cond, pat);
8286 		  }
8287 		if (!validate_change (seq, &PATTERN (seq), pat, 0))
8288 		  gcc_unreachable ();
8289 		PUT_CODE (slot, NOTE);
8290 		NOTE_KIND (slot) = NOTE_INSN_DELETED;
8291 		if (merge_bb && succ_bb)
8292 		  merge_blocks (merge_bb, succ_bb);
8293 	      }
8294 	    else if (merge_bb && succ_bb)
8295 	      {
8296 		set_insn_deleted (insn);
8297 		merge_blocks (merge_bb, succ_bb);
8298 	      }
8299 	    else
8300 	      {
8301 		PUT_CODE (insn, NOTE);
8302 		NOTE_KIND (insn) = NOTE_INSN_DELETED;
8303 	      }
8304 	    continue;
8305 	  }
8306 	case 3:
8307 	  if (LABEL_P (insn)
8308 	      && statep->target_label == CODE_LABEL_NUMBER (insn))
8309 	    {
8310 	      arc_ccfsm_post_advance (insn, statep);
8311 	      basic_block succ_bb = BLOCK_FOR_INSN (insn);
8312 	      if (merge_bb && succ_bb)
8313 		merge_blocks (merge_bb, succ_bb);
8314 	      else if (--LABEL_NUSES (insn) == 0)
8315 		{
8316 		  const char *name = LABEL_NAME (insn);
8317 		  PUT_CODE (insn, NOTE);
8318 		  NOTE_KIND (insn) = NOTE_INSN_DELETED_LABEL;
8319 		  NOTE_DELETED_LABEL_NAME (insn) = name;
8320 		}
8321 	      merge_bb = 0;
8322 	      continue;
8323 	    }
8324 	  /* Fall through.  */
8325 	case 4: case 5:
8326 	  if (!NONDEBUG_INSN_P (insn))
8327 	    break;
8328 
8329 	  /* Conditionalized insn.  */
8330 
8331 	  rtx_insn *prev, *pprev;
8332 	  rtx *patp, pat, cond;
8333 	  bool annulled; annulled = false;
8334 
8335 	  /* If this is a delay slot insn in a non-annulled branch,
8336 	     don't conditionalize it.  N.B., this should be fine for
8337 	     conditional return too.  However, don't do this for
8338 	     unconditional branches, as these would be encountered when
8339 	     processing an 'else' part.  */
8340 	  prev = PREV_INSN (insn);
8341 	  pprev = PREV_INSN (prev);
8342 	  if (pprev && NEXT_INSN (NEXT_INSN (pprev)) == NEXT_INSN (insn)
8343 	      && JUMP_P (prev) && get_attr_cond (prev) == COND_USE)
8344 	    {
8345 	      if (!INSN_ANNULLED_BRANCH_P (prev))
8346 		break;
8347 	      annulled = true;
8348 	    }
8349 
8350 	  patp = &PATTERN (insn);
8351 	  pat = *patp;
8352 	  cond = arc_get_ccfsm_cond (statep, INSN_FROM_TARGET_P (insn));
8353 	  if (NONJUMP_INSN_P (insn) || CALL_P (insn))
8354 	    {
8355 	      /* ??? don't conditionalize if all side effects are dead
8356 		 in the not-execute case.  */
8357 
8358 	      pat = conditionalize_nonjump (pat, cond, insn, annulled);
8359 	    }
8360 	  else if (simplejump_p (insn))
8361 	    {
8362 	      patp = &SET_SRC (pat);
8363 	      pat = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, *patp, pc_rtx);
8364 	    }
8365 	  else if (JUMP_P (insn) && ANY_RETURN_P (PATTERN (insn)))
8366 	    {
8367 	      pat = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, pat, pc_rtx);
8368 	      pat = gen_rtx_SET (VOIDmode, pc_rtx, pat);
8369 	    }
8370 	  else
8371 	    gcc_unreachable ();
8372 	  validate_change (insn, patp, pat, 1);
8373 	  if (!apply_change_group ())
8374 	    gcc_unreachable ();
8375 	  if (JUMP_P (insn))
8376 	    {
8377 	      rtx_insn *next = next_nonnote_insn (insn);
8378 	      if (GET_CODE (next) == BARRIER)
8379 		delete_insn (next);
8380 	      if (statep->state == 3)
8381 		continue;
8382 	    }
8383 	  break;
8384 	default:
8385 	  gcc_unreachable ();
8386 	}
8387       arc_ccfsm_post_advance (insn, statep);
8388     }
8389   return 0;
8390 }
8391 
8392 /* Find annulled delay insns and convert them to use the appropriate predicate.
8393    This allows branch shortening to size up these insns properly.  */
8394 
8395 static unsigned
8396 arc_predicate_delay_insns (void)
8397 {
8398   for (rtx_insn *insn = get_insns (); insn; insn = NEXT_INSN (insn))
8399     {
8400       rtx pat, jump, dlay, src, cond, *patp;
8401       int reverse;
8402 
8403       if (!NONJUMP_INSN_P (insn)
8404 	  || GET_CODE (pat = PATTERN (insn)) != SEQUENCE)
8405 	continue;
8406       jump = XVECEXP (pat, 0, 0);
8407       dlay = XVECEXP (pat, 0, 1);
8408       if (!JUMP_P (jump) || !INSN_ANNULLED_BRANCH_P (jump))
8409 	continue;
8410       /* If the branch insn does the annulling, leave the delay insn alone.  */
8411       if (!TARGET_AT_DBR_CONDEXEC && !INSN_FROM_TARGET_P (dlay))
8412 	continue;
8413       /* ??? Could also leave DLAY un-conditionalized if its target is dead
8414 	 on the other path.  */
8415       gcc_assert (GET_CODE (PATTERN (jump)) == SET);
8416       gcc_assert (SET_DEST (PATTERN (jump)) == pc_rtx);
8417       src = SET_SRC (PATTERN (jump));
8418       gcc_assert (GET_CODE (src) == IF_THEN_ELSE);
8419       cond = XEXP (src, 0);
8420       if (XEXP (src, 2) == pc_rtx)
8421 	reverse = 0;
8422       else if (XEXP (src, 1) == pc_rtx)
8423 	reverse = 1;
8424       else
8425 	gcc_unreachable ();
8426       if (reverse != !INSN_FROM_TARGET_P (dlay))
8427 	{
8428 	  machine_mode ccm = GET_MODE (XEXP (cond, 0));
8429 	  enum rtx_code code = reverse_condition (GET_CODE (cond));
8430 	  if (code == UNKNOWN || ccm == CC_FP_GTmode || ccm == CC_FP_GEmode)
8431 	    code = reverse_condition_maybe_unordered (GET_CODE (cond));
8432 
8433 	  cond = gen_rtx_fmt_ee (code, GET_MODE (cond),
8434 				 copy_rtx (XEXP (cond, 0)),
8435 				 copy_rtx (XEXP (cond, 1)));
8436 	}
8437       else
8438 	cond = copy_rtx (cond);
8439       patp = &PATTERN (dlay);
8440       pat = *patp;
8441       pat = conditionalize_nonjump (pat, cond, dlay, true);
8442       validate_change (dlay, patp, pat, 1);
8443       if (!apply_change_group ())
8444 	gcc_unreachable ();
8445     }
8446   return 0;
8447 }
8448 
8449 /* For ARC600: If a write to a core reg >=32 appears in a delay slot
8450   (other than of a forward brcc), it creates a hazard when there is a read
8451   of the same register at the branch target.  We can't know what is at the
8452   branch target of calls, and for branches, we don't really know before the
8453   end of delay slot scheduling, either.  Not only can individual instruction
8454   be hoisted out into a delay slot, a basic block can also be emptied this
8455   way, and branch and/or fall through targets be redirected.  Hence we don't
8456   want such writes in a delay slot.  */
8457 
8458 /* Return nonzreo iff INSN writes to an extension core register.  */
8459 
8460 int
8461 arc_write_ext_corereg (rtx insn)
8462 {
8463   subrtx_iterator::array_type array;
8464   FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
8465     {
8466       const_rtx x = *iter;
8467       switch (GET_CODE (x))
8468 	{
8469 	case SET: case POST_INC: case POST_DEC: case PRE_INC: case PRE_DEC:
8470 	  break;
8471 	default:
8472 	  /* This is also fine for PRE/POST_MODIFY, because they
8473 	     contain a SET.  */
8474 	  continue;
8475 	}
8476       const_rtx dest = XEXP (x, 0);
8477       if (REG_P (dest) && REGNO (dest) >= 32 && REGNO (dest) < 61)
8478 	return 1;
8479     }
8480   return 0;
8481 }
8482 
8483 /* This is like the hook, but returns NULL when it can't / won't generate
8484    a legitimate address.  */
8485 
8486 static rtx
8487 arc_legitimize_address_0 (rtx x, rtx oldx ATTRIBUTE_UNUSED,
8488 			  machine_mode mode)
8489 {
8490   rtx addr, inner;
8491 
8492   if (flag_pic && SYMBOLIC_CONST (x))
8493      (x) =  arc_legitimize_pic_address (x, 0);
8494   addr = x;
8495   if (GET_CODE (addr) == CONST)
8496     addr = XEXP (addr, 0);
8497   if (GET_CODE (addr) == PLUS
8498       && CONST_INT_P (XEXP (addr, 1))
8499       && ((GET_CODE (XEXP (addr, 0)) == SYMBOL_REF
8500 	   && !SYMBOL_REF_FUNCTION_P (XEXP (addr, 0)))
8501 	  || (REG_P (XEXP (addr, 0))
8502 	      && (INTVAL (XEXP (addr, 1)) & 252))))
8503     {
8504       HOST_WIDE_INT offs, upper;
8505       int size = GET_MODE_SIZE (mode);
8506 
8507       offs = INTVAL (XEXP (addr, 1));
8508       upper = (offs + 256 * size) & ~511 * size;
8509       inner = plus_constant (Pmode, XEXP (addr, 0), upper);
8510 #if 0 /* ??? this produces worse code for EEMBC idctrn01  */
8511       if (GET_CODE (x) == CONST)
8512 	inner = gen_rtx_CONST (Pmode, inner);
8513 #endif
8514       addr = plus_constant (Pmode, force_reg (Pmode, inner), offs - upper);
8515       x = addr;
8516     }
8517   else if (GET_CODE (addr) == SYMBOL_REF && !SYMBOL_REF_FUNCTION_P (addr))
8518     x = force_reg (Pmode, x);
8519   if (memory_address_p ((machine_mode) mode, x))
8520      return x;
8521   return NULL_RTX;
8522 }
8523 
8524 static rtx
8525 arc_legitimize_address (rtx orig_x, rtx oldx, machine_mode mode)
8526 {
8527   rtx new_x = arc_legitimize_address_0 (orig_x, oldx, mode);
8528 
8529   if (new_x)
8530     return new_x;
8531   return orig_x;
8532 }
8533 
8534 static rtx
8535 arc_delegitimize_address_0 (rtx x)
8536 {
8537   rtx u, gp;
8538 
8539   if (GET_CODE (x) == CONST && GET_CODE (u = XEXP (x, 0)) == UNSPEC)
8540     {
8541       if (XINT (u, 1) == ARC_UNSPEC_GOT)
8542 	return XVECEXP (u, 0, 0);
8543     }
8544   else if (GET_CODE (x) == PLUS
8545 	   && ((REG_P (gp = XEXP (x, 0))
8546 		&& REGNO (gp) == PIC_OFFSET_TABLE_REGNUM)
8547 	       || (GET_CODE (gp) == CONST
8548 		   && GET_CODE (u = XEXP (gp, 0)) == UNSPEC
8549 		   && XINT (u, 1) == ARC_UNSPEC_GOT
8550 		   && GET_CODE (XVECEXP (u, 0, 0)) == SYMBOL_REF
8551 		   && !strcmp (XSTR (XVECEXP (u, 0, 0), 0), "_DYNAMIC")))
8552 	   && GET_CODE (XEXP (x, 1)) == CONST
8553 	   && GET_CODE (u = XEXP (XEXP (x, 1), 0)) == UNSPEC
8554 	   && XINT (u, 1) == ARC_UNSPEC_GOTOFF)
8555     return XVECEXP (u, 0, 0);
8556   else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
8557 	   && ((REG_P (gp = XEXP (XEXP (x, 0), 1))
8558 		&& REGNO (gp) == PIC_OFFSET_TABLE_REGNUM)
8559 	       || (GET_CODE (gp) == CONST
8560 		   && GET_CODE (u = XEXP (gp, 0)) == UNSPEC
8561 		   && XINT (u, 1) == ARC_UNSPEC_GOT
8562 		   && GET_CODE (XVECEXP (u, 0, 0)) == SYMBOL_REF
8563 		   && !strcmp (XSTR (XVECEXP (u, 0, 0), 0), "_DYNAMIC")))
8564 	   && GET_CODE (XEXP (x, 1)) == CONST
8565 	   && GET_CODE (u = XEXP (XEXP (x, 1), 0)) == UNSPEC
8566 	   && XINT (u, 1) == ARC_UNSPEC_GOTOFF)
8567     return gen_rtx_PLUS (GET_MODE (x), XEXP (XEXP (x, 0), 0),
8568 			 XVECEXP (u, 0, 0));
8569   else if (GET_CODE (x) == PLUS
8570 	   && (u = arc_delegitimize_address_0 (XEXP (x, 1))))
8571     return gen_rtx_PLUS (GET_MODE (x), XEXP (x, 0), u);
8572   return NULL_RTX;
8573 }
8574 
8575 static rtx
8576 arc_delegitimize_address (rtx x)
8577 {
8578   rtx orig_x = x = delegitimize_mem_from_attrs (x);
8579   if (GET_CODE (x) == MEM)
8580     x = XEXP (x, 0);
8581   x = arc_delegitimize_address_0 (x);
8582   if (x)
8583     {
8584       if (MEM_P (orig_x))
8585 	x = replace_equiv_address_nv (orig_x, x);
8586       return x;
8587     }
8588   return orig_x;
8589 }
8590 
8591 /* Return a REG rtx for acc1.  N.B. the gcc-internal representation may
8592    differ from the hardware register number in order to allow the generic
8593    code to correctly split the concatenation of acc1 and acc2.  */
8594 
8595 rtx
8596 gen_acc1 (void)
8597 {
8598   return gen_rtx_REG (SImode, TARGET_BIG_ENDIAN ? 56: 57);
8599 }
8600 
8601 /* Return a REG rtx for acc2.  N.B. the gcc-internal representation may
8602    differ from the hardware register number in order to allow the generic
8603    code to correctly split the concatenation of acc1 and acc2.  */
8604 
8605 rtx
8606 gen_acc2 (void)
8607 {
8608   return gen_rtx_REG (SImode, TARGET_BIG_ENDIAN ? 57: 56);
8609 }
8610 
8611 /* Return a REG rtx for mlo.  N.B. the gcc-internal representation may
8612    differ from the hardware register number in order to allow the generic
8613    code to correctly split the concatenation of mhi and mlo.  */
8614 
8615 rtx
8616 gen_mlo (void)
8617 {
8618   return gen_rtx_REG (SImode, TARGET_BIG_ENDIAN ? 59: 58);
8619 }
8620 
8621 /* Return a REG rtx for mhi.  N.B. the gcc-internal representation may
8622    differ from the hardware register number in order to allow the generic
8623    code to correctly split the concatenation of mhi and mlo.  */
8624 
8625 rtx
8626 gen_mhi (void)
8627 {
8628   return gen_rtx_REG (SImode, TARGET_BIG_ENDIAN ? 58: 59);
8629 }
8630 
8631 /* FIXME: a parameter should be added, and code added to final.c,
8632    to reproduce this functionality in shorten_branches.  */
8633 #if 0
8634 /* Return nonzero iff BRANCH should be unaligned if possible by upsizing
8635    a previous instruction.  */
8636 int
8637 arc_unalign_branch_p (rtx branch)
8638 {
8639   rtx note;
8640 
8641   if (!TARGET_UNALIGN_BRANCH)
8642     return 0;
8643   /* Do not do this if we have a filled delay slot.  */
8644   if (get_attr_delay_slot_filled (branch) == DELAY_SLOT_FILLED_YES
8645       && !NEXT_INSN (branch)->deleted ())
8646     return 0;
8647   note = find_reg_note (branch, REG_BR_PROB, 0);
8648   return (!note
8649 	  || (arc_unalign_prob_threshold && !br_prob_note_reliable_p (note))
8650 	  || INTVAL (XEXP (note, 0)) < arc_unalign_prob_threshold);
8651 }
8652 #endif
8653 
8654 /* When estimating sizes during arc_reorg, when optimizing for speed, there
8655    are three reasons why we need to consider branches to be length 6:
8656    - annull-false delay slot insns are implemented using conditional execution,
8657      thus preventing short insn formation where used.
8658    - for ARC600: annul-true delay slot insns are implemented where possible
8659      using conditional execution, preventing short insn formation where used.
8660    - for ARC700: likely or somewhat likely taken branches are made long and
8661      unaligned if possible to avoid branch penalty.  */
8662 
8663 bool
8664 arc_branch_size_unknown_p (void)
8665 {
8666   return !optimize_size && arc_reorg_in_progress;
8667 }
8668 
8669 /* We are about to output a return insn.  Add padding if necessary to avoid
8670    a mispredict.  A return could happen immediately after the function
8671    start, but after a call we know that there will be at least a blink
8672    restore.  */
8673 
8674 void
8675 arc_pad_return (void)
8676 {
8677   rtx_insn *insn = current_output_insn;
8678   rtx_insn *prev = prev_active_insn (insn);
8679   int want_long;
8680 
8681   if (!prev)
8682     {
8683       fputs ("\tnop_s\n", asm_out_file);
8684       cfun->machine->unalign ^= 2;
8685       want_long = 1;
8686     }
8687   /* If PREV is a sequence, we know it must be a branch / jump or a tailcall,
8688      because after a call, we'd have to restore blink first.  */
8689   else if (GET_CODE (PATTERN (prev)) == SEQUENCE)
8690     return;
8691   else
8692     {
8693       want_long = (get_attr_length (prev) == 2);
8694       prev = prev_active_insn (prev);
8695     }
8696   if (!prev
8697       || ((NONJUMP_INSN_P (prev) && GET_CODE (PATTERN (prev)) == SEQUENCE)
8698 	  ? CALL_ATTR (as_a <rtx_sequence *> (PATTERN (prev))->insn (0),
8699 		       NON_SIBCALL)
8700 	  : CALL_ATTR (prev, NON_SIBCALL)))
8701     {
8702       if (want_long)
8703 	cfun->machine->size_reason
8704 	  = "call/return and return/return must be 6 bytes apart to avoid mispredict";
8705       else if (TARGET_UNALIGN_BRANCH && cfun->machine->unalign)
8706 	{
8707 	  cfun->machine->size_reason
8708 	    = "Long unaligned jump avoids non-delay slot penalty";
8709 	  want_long = 1;
8710 	}
8711       /* Disgorge delay insn, if there is any, and it may be moved.  */
8712       if (final_sequence
8713 	  /* ??? Annulled would be OK if we can and do conditionalize
8714 	     the delay slot insn accordingly.  */
8715 	  && !INSN_ANNULLED_BRANCH_P (insn)
8716 	  && (get_attr_cond (insn) != COND_USE
8717 	      || !reg_set_p (gen_rtx_REG (CCmode, CC_REG),
8718 			     XVECEXP (final_sequence, 0, 1))))
8719 	{
8720 	  prev = as_a <rtx_insn *> (XVECEXP (final_sequence, 0, 1));
8721 	  gcc_assert (!prev_real_insn (insn)
8722 		      || !arc_hazard (prev_real_insn (insn), prev));
8723 	  cfun->machine->force_short_suffix = !want_long;
8724 	  rtx save_pred = current_insn_predicate;
8725 	  final_scan_insn (prev, asm_out_file, optimize, 1, NULL);
8726 	  cfun->machine->force_short_suffix = -1;
8727 	  prev->set_deleted ();
8728 	  current_output_insn = insn;
8729 	  current_insn_predicate = save_pred;
8730 	}
8731       else if (want_long)
8732 	fputs ("\tnop\n", asm_out_file);
8733       else
8734 	{
8735 	  fputs ("\tnop_s\n", asm_out_file);
8736 	  cfun->machine->unalign ^= 2;
8737 	}
8738     }
8739   return;
8740 }
8741 
8742 /* The usual; we set up our machine_function data.  */
8743 
8744 static struct machine_function *
8745 arc_init_machine_status (void)
8746 {
8747   struct machine_function *machine;
8748   machine = ggc_cleared_alloc<machine_function> ();
8749   machine->fn_type = ARC_FUNCTION_UNKNOWN;
8750   machine->force_short_suffix = -1;
8751 
8752   return machine;
8753 }
8754 
8755 /* Implements INIT_EXPANDERS.  We just set up to call the above
8756    function.  */
8757 
8758 void
8759 arc_init_expanders (void)
8760 {
8761   init_machine_status = arc_init_machine_status;
8762 }
8763 
8764 /* Check if OP is a proper parallel of a millicode call pattern.  OFFSET
8765    indicates a number of elements to ignore - that allows to have a
8766    sibcall pattern that starts with (return).  LOAD_P is zero for store
8767    multiple (for prologues), and one for load multiples (for epilogues),
8768    and two for load multiples where no final clobber of blink is required.
8769    We also skip the first load / store element since this is supposed to
8770    be checked in the instruction pattern.  */
8771 
8772 int
8773 arc_check_millicode (rtx op, int offset, int load_p)
8774 {
8775   int len = XVECLEN (op, 0) - offset;
8776   int i;
8777 
8778   if (load_p == 2)
8779     {
8780       if (len < 2 || len > 13)
8781 	return 0;
8782       load_p = 1;
8783     }
8784   else
8785     {
8786       rtx elt = XVECEXP (op, 0, --len);
8787 
8788       if (GET_CODE (elt) != CLOBBER
8789 	  || !REG_P (XEXP (elt, 0))
8790 	  || REGNO (XEXP (elt, 0)) != RETURN_ADDR_REGNUM
8791 	  || len < 3 || len > 13)
8792 	return 0;
8793     }
8794   for (i = 1; i < len; i++)
8795     {
8796       rtx elt = XVECEXP (op, 0, i + offset);
8797       rtx reg, mem, addr;
8798 
8799       if (GET_CODE (elt) != SET)
8800 	return 0;
8801       mem = XEXP (elt, load_p);
8802       reg = XEXP (elt, 1-load_p);
8803       if (!REG_P (reg) || REGNO (reg) != 13U+i || !MEM_P (mem))
8804 	return 0;
8805       addr = XEXP (mem, 0);
8806       if (GET_CODE (addr) != PLUS
8807 	  || !rtx_equal_p (stack_pointer_rtx, XEXP (addr, 0))
8808 	  || !CONST_INT_P (XEXP (addr, 1)) || INTVAL (XEXP (addr, 1)) != i*4)
8809 	return 0;
8810     }
8811   return 1;
8812 }
8813 
8814 /* Accessor functions for cfun->machine->unalign.  */
8815 
8816 int
8817 arc_get_unalign (void)
8818 {
8819   return cfun->machine->unalign;
8820 }
8821 
8822 void
8823 arc_clear_unalign (void)
8824 {
8825   if (cfun)
8826     cfun->machine->unalign = 0;
8827 }
8828 
8829 void
8830 arc_toggle_unalign (void)
8831 {
8832   cfun->machine->unalign ^= 2;
8833 }
8834 
8835 /* Operands 0..2 are the operands of a addsi which uses a 12 bit
8836    constant in operand 2, but which would require a LIMM because of
8837    operand mismatch.
8838    operands 3 and 4 are new SET_SRCs for operands 0.  */
8839 
8840 void
8841 split_addsi (rtx *operands)
8842 {
8843   int val = INTVAL (operands[2]);
8844 
8845   /* Try for two short insns first.  Lengths being equal, we prefer
8846      expansions with shorter register lifetimes.  */
8847   if (val > 127 && val <= 255
8848       && satisfies_constraint_Rcq (operands[0]))
8849     {
8850       operands[3] = operands[2];
8851       operands[4] = gen_rtx_PLUS (SImode, operands[0], operands[1]);
8852     }
8853   else
8854     {
8855       operands[3] = operands[1];
8856       operands[4] = gen_rtx_PLUS (SImode, operands[0], operands[2]);
8857     }
8858 }
8859 
8860 /* Operands 0..2 are the operands of a subsi which uses a 12 bit
8861    constant in operand 1, but which would require a LIMM because of
8862    operand mismatch.
8863    operands 3 and 4 are new SET_SRCs for operands 0.  */
8864 
8865 void
8866 split_subsi (rtx *operands)
8867 {
8868   int val = INTVAL (operands[1]);
8869 
8870   /* Try for two short insns first.  Lengths being equal, we prefer
8871      expansions with shorter register lifetimes.  */
8872   if (satisfies_constraint_Rcq (operands[0])
8873       && satisfies_constraint_Rcq (operands[2]))
8874     {
8875       if (val >= -31 && val <= 127)
8876 	{
8877 	  operands[3] = gen_rtx_NEG (SImode, operands[2]);
8878 	  operands[4] = gen_rtx_PLUS (SImode, operands[0], operands[1]);
8879 	  return;
8880 	}
8881       else if (val >= 0 && val < 255)
8882 	{
8883 	  operands[3] = operands[1];
8884 	  operands[4] = gen_rtx_MINUS (SImode, operands[0], operands[2]);
8885 	  return;
8886 	}
8887     }
8888   /* If the destination is not an ARCompact16 register, we might
8889      still have a chance to make a short insn if the source is;
8890       we need to start with a reg-reg move for this.  */
8891   operands[3] = operands[2];
8892   operands[4] = gen_rtx_MINUS (SImode, operands[1], operands[0]);
8893 }
8894 
8895 /* Handle DOUBLE_REGS uses.
8896    Operand 0: destination register
8897    Operand 1: source register  */
8898 
8899 static rtx
8900 arc_process_double_reg_moves (rtx *operands)
8901 {
8902   rtx dest = operands[0];
8903   rtx src  = operands[1];
8904   rtx val;
8905 
8906   enum usesDxState { none, srcDx, destDx, maxDx };
8907   enum usesDxState state = none;
8908 
8909   if (refers_to_regno_p (40, 44, src, 0))
8910     state = srcDx;
8911   if (refers_to_regno_p (40, 44, dest, 0))
8912     {
8913       /* Via arc_register_move_cost, we should never see D,D moves.  */
8914       gcc_assert (state == none);
8915       state = destDx;
8916     }
8917 
8918   if (state == none)
8919     return NULL_RTX;
8920 
8921   start_sequence ();
8922 
8923   if (state == srcDx)
8924     {
8925       /* Without the LR insn, we need to split this into a
8926 	 sequence of insns which will use the DEXCLx and DADDHxy
8927 	 insns to be able to read the Dx register in question.  */
8928       if (TARGET_DPFP_DISABLE_LRSR)
8929 	{
8930 	  /* gen *movdf_insn_nolrsr */
8931 	  rtx set = gen_rtx_SET (VOIDmode, dest, src);
8932 	  rtx use1 = gen_rtx_USE (VOIDmode, const1_rtx);
8933 	  emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, use1)));
8934 	}
8935       else
8936 	{
8937 	  /* When we have 'mov D, r' or 'mov D, D' then get the target
8938 	     register pair for use with LR insn.  */
8939 	  rtx destHigh = simplify_gen_subreg(SImode, dest, DFmode, 4);
8940 	  rtx destLow  = simplify_gen_subreg(SImode, dest, DFmode, 0);
8941 
8942 	  /* Produce the two LR insns to get the high and low parts.  */
8943 	  emit_insn (gen_rtx_SET (VOIDmode,
8944 				  destHigh,
8945 				  gen_rtx_UNSPEC_VOLATILE (Pmode, gen_rtvec (1, src),
8946 				  VUNSPEC_LR_HIGH)));
8947 	  emit_insn (gen_rtx_SET (VOIDmode,
8948 				  destLow,
8949 				  gen_rtx_UNSPEC_VOLATILE (Pmode, gen_rtvec (1, src),
8950 				  VUNSPEC_LR)));
8951 	}
8952     }
8953   else if (state == destDx)
8954     {
8955       /* When we have 'mov r, D' or 'mov D, D' and we have access to the
8956 	 LR insn get the target register pair.  */
8957       rtx srcHigh = simplify_gen_subreg(SImode, src, DFmode, 4);
8958       rtx srcLow  = simplify_gen_subreg(SImode, src, DFmode, 0);
8959 
8960       emit_insn (gen_rtx_UNSPEC_VOLATILE (Pmode,
8961 					  gen_rtvec (3, dest, srcHigh, srcLow),
8962 					  VUNSPEC_DEXCL_NORES));
8963 
8964     }
8965   else
8966     gcc_unreachable ();
8967 
8968   val = get_insns ();
8969   end_sequence ();
8970   return val;
8971 }
8972 
8973 /* operands 0..1 are the operands of a 64 bit move instruction.
8974    split it into two moves with operands 2/3 and 4/5.  */
8975 
8976 rtx
8977 arc_split_move (rtx *operands)
8978 {
8979   machine_mode mode = GET_MODE (operands[0]);
8980   int i;
8981   int swap = 0;
8982   rtx xop[4];
8983   rtx val;
8984 
8985   if (TARGET_DPFP)
8986   {
8987     val = arc_process_double_reg_moves (operands);
8988     if (val)
8989       return val;
8990   }
8991 
8992   for (i = 0; i < 2; i++)
8993     {
8994       if (MEM_P (operands[i]) && auto_inc_p (XEXP (operands[i], 0)))
8995 	{
8996 	  rtx addr = XEXP (operands[i], 0);
8997 	  rtx r, o;
8998 	  enum rtx_code code;
8999 
9000 	  gcc_assert (!reg_overlap_mentioned_p (operands[0], addr));
9001 	  switch (GET_CODE (addr))
9002 	    {
9003 	    case PRE_DEC: o = GEN_INT (-8); goto pre_modify;
9004 	    case PRE_INC: o = GEN_INT (8); goto pre_modify;
9005 	    case PRE_MODIFY: o = XEXP (XEXP (addr, 1), 1);
9006 	    pre_modify:
9007 	      code = PRE_MODIFY;
9008 	      break;
9009 	    case POST_DEC: o = GEN_INT (-8); goto post_modify;
9010 	    case POST_INC: o = GEN_INT (8); goto post_modify;
9011 	    case POST_MODIFY: o = XEXP (XEXP (addr, 1), 1);
9012 	    post_modify:
9013 	      code = POST_MODIFY;
9014 	      swap = 2;
9015 	      break;
9016 	    default:
9017 	      gcc_unreachable ();
9018 	    }
9019 	  r = XEXP (addr, 0);
9020 	  xop[0+i] = adjust_automodify_address_nv
9021 		      (operands[i], SImode,
9022 		       gen_rtx_fmt_ee (code, Pmode, r,
9023 				       gen_rtx_PLUS (Pmode, r, o)),
9024 		       0);
9025 	  xop[2+i] = adjust_automodify_address_nv
9026 		      (operands[i], SImode, plus_constant (Pmode, r, 4), 4);
9027 	}
9028       else
9029 	{
9030 	  xop[0+i] = operand_subword (operands[i], 0, 0, mode);
9031 	  xop[2+i] = operand_subword (operands[i], 1, 0, mode);
9032 	}
9033     }
9034   if (reg_overlap_mentioned_p (xop[0], xop[3]))
9035     {
9036       swap = 2;
9037       gcc_assert (!reg_overlap_mentioned_p (xop[2], xop[1]));
9038     }
9039   operands[2+swap] = xop[0];
9040   operands[3+swap] = xop[1];
9041   operands[4-swap] = xop[2];
9042   operands[5-swap] = xop[3];
9043 
9044   start_sequence ();
9045   emit_insn (gen_rtx_SET (VOIDmode, operands[2], operands[3]));
9046   emit_insn (gen_rtx_SET (VOIDmode, operands[4], operands[5]));
9047   val = get_insns ();
9048   end_sequence ();
9049 
9050   return val;
9051 }
9052 
9053 /* Select between the instruction output templates s_tmpl (for short INSNs)
9054    and l_tmpl (for long INSNs).  */
9055 
9056 const char *
9057 arc_short_long (rtx_insn *insn, const char *s_tmpl, const char *l_tmpl)
9058 {
9059   int is_short = arc_verify_short (insn, cfun->machine->unalign, -1);
9060 
9061   extract_constrain_insn_cached (insn);
9062   return is_short ? s_tmpl : l_tmpl;
9063 }
9064 
9065 /* Searches X for any reference to REGNO, returning the rtx of the
9066    reference found if any.  Otherwise, returns NULL_RTX.  */
9067 
9068 rtx
9069 arc_regno_use_in (unsigned int regno, rtx x)
9070 {
9071   const char *fmt;
9072   int i, j;
9073   rtx tem;
9074 
9075   if (REG_P (x) && refers_to_regno_p (regno, x))
9076     return x;
9077 
9078   fmt = GET_RTX_FORMAT (GET_CODE (x));
9079   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
9080     {
9081       if (fmt[i] == 'e')
9082 	{
9083 	  if ((tem = regno_use_in (regno, XEXP (x, i))))
9084 	    return tem;
9085 	}
9086       else if (fmt[i] == 'E')
9087 	for (j = XVECLEN (x, i) - 1; j >= 0; j--)
9088 	  if ((tem = regno_use_in (regno , XVECEXP (x, i, j))))
9089 	    return tem;
9090     }
9091 
9092   return NULL_RTX;
9093 }
9094 
9095 /* Return the integer value of the "type" attribute for INSN, or -1 if
9096    INSN can't have attributes.  */
9097 
9098 int
9099 arc_attr_type (rtx_insn *insn)
9100 {
9101   if (NONJUMP_INSN_P (insn)
9102       ? (GET_CODE (PATTERN (insn)) == USE
9103 	 || GET_CODE (PATTERN (insn)) == CLOBBER)
9104       : JUMP_P (insn)
9105       ? (GET_CODE (PATTERN (insn)) == ADDR_VEC
9106 	 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC)
9107       : !CALL_P (insn))
9108     return -1;
9109   return get_attr_type (insn);
9110 }
9111 
9112 /* Return true if insn sets the condition codes.  */
9113 
9114 bool
9115 arc_sets_cc_p (rtx_insn *insn)
9116 {
9117   if (NONJUMP_INSN_P (insn))
9118     if (rtx_sequence *seq = dyn_cast <rtx_sequence *> (PATTERN (insn)))
9119       insn = seq->insn (seq->len () - 1);
9120   return arc_attr_type (insn) == TYPE_COMPARE;
9121 }
9122 
9123 /* Return true if INSN is an instruction with a delay slot we may want
9124    to fill.  */
9125 
9126 bool
9127 arc_need_delay (rtx_insn *insn)
9128 {
9129   rtx_insn *next;
9130 
9131   if (!flag_delayed_branch)
9132     return false;
9133   /* The return at the end of a function needs a delay slot.  */
9134   if (NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == USE
9135       && (!(next = next_active_insn (insn))
9136 	  || ((!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) != SEQUENCE)
9137 	      && arc_attr_type (next) == TYPE_RETURN))
9138       && (!TARGET_PAD_RETURN
9139 	  || (prev_active_insn (insn)
9140 	      && prev_active_insn (prev_active_insn (insn))
9141 	      && prev_active_insn (prev_active_insn (prev_active_insn (insn))))))
9142     return true;
9143   if (NONJUMP_INSN_P (insn)
9144       ? (GET_CODE (PATTERN (insn)) == USE
9145 	 || GET_CODE (PATTERN (insn)) == CLOBBER
9146 	 || GET_CODE (PATTERN (insn)) == SEQUENCE)
9147       : JUMP_P (insn)
9148       ? (GET_CODE (PATTERN (insn)) == ADDR_VEC
9149 	 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC)
9150       : !CALL_P (insn))
9151     return false;
9152   return num_delay_slots (insn) != 0;
9153 }
9154 
9155 /* Return true if the scheduling pass(es) has/have already run,
9156    i.e. where possible, we should try to mitigate high latencies
9157    by different instruction selection.  */
9158 
9159 bool
9160 arc_scheduling_not_expected (void)
9161 {
9162   return cfun->machine->arc_reorg_started;
9163 }
9164 
9165 /* Oddly enough, sometimes we get a zero overhead loop that branch
9166    shortening doesn't think is a loop - observed with compile/pr24883.c
9167    -O3 -fomit-frame-pointer -funroll-loops.  Make sure to include the
9168    alignment visible for branch shortening  (we actually align the loop
9169    insn before it, but that is equivalent since the loop insn is 4 byte
9170    long.)  */
9171 
9172 int
9173 arc_label_align (rtx label)
9174 {
9175   int loop_align = LOOP_ALIGN (LABEL);
9176 
9177   if (loop_align > align_labels_log)
9178     {
9179       rtx_insn *prev = prev_nonnote_insn (label);
9180 
9181       if (prev && NONJUMP_INSN_P (prev)
9182 	  && GET_CODE (PATTERN (prev)) == PARALLEL
9183 	  && recog_memoized (prev) == CODE_FOR_doloop_begin_i)
9184 	return loop_align;
9185     }
9186   /* Code has a minimum p2 alignment of 1, which we must restore after an
9187      ADDR_DIFF_VEC.  */
9188   if (align_labels_log < 1)
9189     {
9190       rtx_insn *next = next_nonnote_nondebug_insn (label);
9191       if (INSN_P (next) && recog_memoized (next) >= 0)
9192 	return 1;
9193     }
9194   return align_labels_log;
9195 }
9196 
9197 /* Return true if LABEL is in executable code.  */
9198 
9199 bool
9200 arc_text_label (rtx_insn *label)
9201 {
9202   rtx_insn *next;
9203 
9204   /* ??? We use deleted labels like they were still there, see
9205      gcc.c-torture/compile/20000326-2.c .  */
9206   gcc_assert (GET_CODE (label) == CODE_LABEL
9207 	      || (GET_CODE (label) == NOTE
9208 		  && NOTE_KIND (label) == NOTE_INSN_DELETED_LABEL));
9209   next = next_nonnote_insn (label);
9210   if (next)
9211     return (!JUMP_TABLE_DATA_P (next)
9212 	    || GET_CODE (PATTERN (next)) != ADDR_VEC);
9213   else if (!PREV_INSN (label))
9214     /* ??? sometimes text labels get inserted very late, see
9215        gcc.dg/torture/stackalign/comp-goto-1.c */
9216     return true;
9217   return false;
9218 }
9219 
9220 /* Return the size of the pretend args for DECL.  */
9221 
9222 int
9223 arc_decl_pretend_args (tree decl)
9224 {
9225   /* struct function is in DECL_STRUCT_FUNCTION (decl), but no
9226      pretend_args there...  See PR38391.  */
9227   gcc_assert (decl == current_function_decl);
9228   return crtl->args.pretend_args_size;
9229 }
9230 
9231 /* Without this, gcc.dg/tree-prof/bb-reorg.c fails to assemble
9232   when compiling with -O2 -freorder-blocks-and-partition -fprofile-use
9233   -D_PROFILE_USE; delay branch scheduling then follows a crossing jump
9234   to redirect two breqs.  */
9235 
9236 static bool
9237 arc_can_follow_jump (const rtx_insn *follower, const rtx_insn *followee)
9238 {
9239   /* ??? get_attr_type is declared to take an rtx.  */
9240   union { const rtx_insn *c; rtx_insn *r; } u;
9241 
9242   u.c = follower;
9243   if (CROSSING_JUMP_P (followee))
9244     switch (get_attr_type (u.r))
9245       {
9246       case TYPE_BRCC:
9247       case TYPE_BRCC_NO_DELAY_SLOT:
9248 	return false;
9249       default:
9250 	return true;
9251       }
9252   return true;
9253 }
9254 
9255 /* Implement EPILOGUE__USES.
9256    Return true if REGNO should be added to the deemed uses of the epilogue.
9257 
9258    We use the return address
9259    arc_return_address_regs[arc_compute_function_type (cfun)] .
9260    But also, we have to make sure all the register restore instructions
9261    are known to be live in interrupt functions.  */
9262 
9263 bool
9264 arc_epilogue_uses (int regno)
9265 {
9266   if (reload_completed)
9267     {
9268       if (ARC_INTERRUPT_P (cfun->machine->fn_type))
9269 	{
9270 	  if (!fixed_regs[regno])
9271 	    return true;
9272 	  return regno == arc_return_address_regs[cfun->machine->fn_type];
9273 	}
9274       else
9275 	return regno == RETURN_ADDR_REGNUM;
9276     }
9277   else
9278     return regno == arc_return_address_regs[arc_compute_function_type (cfun)];
9279 }
9280 
9281 #ifndef TARGET_NO_LRA
9282 #define TARGET_NO_LRA !TARGET_LRA
9283 #endif
9284 
9285 static bool
9286 arc_lra_p (void)
9287 {
9288   return !TARGET_NO_LRA;
9289 }
9290 
9291 /* ??? Should we define TARGET_REGISTER_PRIORITY?  We might perfer to use
9292    Rcq registers, because some insn are shorter with them.  OTOH we already
9293    have separate alternatives for this purpose, and other insns don't
9294    mind, so maybe we should rather prefer the other registers?
9295    We need more data, and we can only get that if we allow people to
9296    try all options.  */
9297 static int
9298 arc_register_priority (int r)
9299 {
9300   switch (arc_lra_priority_tag)
9301     {
9302     case ARC_LRA_PRIORITY_NONE:
9303       return 0;
9304     case ARC_LRA_PRIORITY_NONCOMPACT:
9305       return ((((r & 7) ^ 4) - 4) & 15) != r;
9306     case ARC_LRA_PRIORITY_COMPACT:
9307       return ((((r & 7) ^ 4) - 4) & 15) == r;
9308     default:
9309       gcc_unreachable ();
9310     }
9311 }
9312 
9313 static reg_class_t
9314 arc_spill_class (reg_class_t /* orig_class */, machine_mode)
9315 {
9316   return GENERAL_REGS;
9317 }
9318 
9319 bool
9320 arc_legitimize_reload_address (rtx *p, machine_mode mode, int opnum,
9321 			       int itype)
9322 {
9323   rtx x = *p;
9324   enum reload_type type = (enum reload_type) itype;
9325 
9326   if (GET_CODE (x) == PLUS
9327       && CONST_INT_P (XEXP (x, 1))
9328       && (RTX_OK_FOR_BASE_P (XEXP (x, 0), true)
9329 	  || (REG_P (XEXP (x, 0))
9330 	      && reg_equiv_constant (REGNO (XEXP (x, 0))))))
9331     {
9332       int scale = GET_MODE_SIZE (mode);
9333       int shift;
9334       rtx index_rtx = XEXP (x, 1);
9335       HOST_WIDE_INT offset = INTVAL (index_rtx), offset_base;
9336       rtx reg, sum, sum2;
9337 
9338       if (scale > 4)
9339 	scale = 4;
9340       if ((scale-1) & offset)
9341 	scale = 1;
9342       shift = scale >> 1;
9343       offset_base = (offset + (256 << shift)) & (-512 << shift);
9344       /* Sometimes the normal form does not suit DImode.  We
9345 	 could avoid that by using smaller ranges, but that
9346 	 would give less optimized code when SImode is
9347 	 prevalent.  */
9348       if (GET_MODE_SIZE (mode) + offset - offset_base <= (256 << shift))
9349 	{
9350 	  int regno;
9351 
9352 	  reg = XEXP (x, 0);
9353 	  regno = REGNO (reg);
9354 	  sum2 = sum = plus_constant (Pmode, reg, offset_base);
9355 
9356 	  if (reg_equiv_constant (regno))
9357 	    {
9358 	      sum2 = plus_constant (Pmode, reg_equiv_constant (regno),
9359 				    offset_base);
9360 	      if (GET_CODE (sum2) == PLUS)
9361 		sum2 = gen_rtx_CONST (Pmode, sum2);
9362 	    }
9363 	  *p = gen_rtx_PLUS (Pmode, sum, GEN_INT (offset - offset_base));
9364 	  push_reload (sum2, NULL_RTX, &XEXP (*p, 0), NULL,
9365 		       BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum,
9366 		       type);
9367 	  return true;
9368 	}
9369     }
9370   /* We must re-recognize what we created before.  */
9371   else if (GET_CODE (x) == PLUS
9372 	   && GET_CODE (XEXP (x, 0)) == PLUS
9373 	   && CONST_INT_P (XEXP (XEXP (x, 0), 1))
9374 	   && REG_P  (XEXP (XEXP (x, 0), 0))
9375 	   && CONST_INT_P (XEXP (x, 1)))
9376     {
9377       /* Because this address is so complex, we know it must have
9378 	 been created by LEGITIMIZE_RELOAD_ADDRESS before; thus,
9379 	 it is already unshared, and needs no further unsharing.  */
9380       push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
9381 		   BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
9382       return true;
9383     }
9384   return false;
9385 }
9386 
9387 /* Implement TARGET_USE_BY_PIECES_INFRASTRUCTURE_P.  */
9388 
9389 static bool
9390 arc_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
9391 				    unsigned int align,
9392 				    enum by_pieces_operation op,
9393 				    bool speed_p)
9394 {
9395   /* Let the movmem expander handle small block moves.  */
9396   if (op == MOVE_BY_PIECES)
9397     return false;
9398 
9399   return default_use_by_pieces_infrastructure_p (size, align, op, speed_p);
9400 }
9401 
9402 struct gcc_target targetm = TARGET_INITIALIZER;
9403 
9404 #include "gt-arc.h"
9405