1 /* Definitions of target machine for GNU compiler.
2 Copyright (C) 1999, 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
3 Contributed by James E. Wilson <wilson@cygnus.com> and
4 David Mosberger <davidm@hpl.hp.com>.
5
6 This file is part of GNU CC.
7
8 GNU CC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2, or (at your option)
11 any later version.
12
13 GNU CC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GNU CC; see the file COPYING. If not, write to
20 the Free Software Foundation, 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
22
23 #include "config.h"
24 #include "system.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "regs.h"
28 #include "hard-reg-set.h"
29 #include "real.h"
30 #include "insn-config.h"
31 #include "conditions.h"
32 #include "output.h"
33 #include "insn-attr.h"
34 #include "flags.h"
35 #include "recog.h"
36 #include "expr.h"
37 #include "optabs.h"
38 #include "except.h"
39 #include "function.h"
40 #include "ggc.h"
41 #include "basic-block.h"
42 #include "toplev.h"
43 #include "sched-int.h"
44 #include "timevar.h"
45 #include "target.h"
46 #include "target-def.h"
47 #include "tm_p.h"
48 #include "langhooks.h"
49
50 /* This is used for communication between ASM_OUTPUT_LABEL and
51 ASM_OUTPUT_LABELREF. */
52 int ia64_asm_output_label = 0;
53
54 /* Define the information needed to generate branch and scc insns. This is
55 stored from the compare operation. */
56 struct rtx_def * ia64_compare_op0;
57 struct rtx_def * ia64_compare_op1;
58
59 /* Register names for ia64_expand_prologue. */
60 static const char * const ia64_reg_numbers[96] =
61 { "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
62 "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
63 "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
64 "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
65 "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71",
66 "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
67 "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87",
68 "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95",
69 "r96", "r97", "r98", "r99", "r100","r101","r102","r103",
70 "r104","r105","r106","r107","r108","r109","r110","r111",
71 "r112","r113","r114","r115","r116","r117","r118","r119",
72 "r120","r121","r122","r123","r124","r125","r126","r127"};
73
74 /* ??? These strings could be shared with REGISTER_NAMES. */
75 static const char * const ia64_input_reg_names[8] =
76 { "in0", "in1", "in2", "in3", "in4", "in5", "in6", "in7" };
77
78 /* ??? These strings could be shared with REGISTER_NAMES. */
79 static const char * const ia64_local_reg_names[80] =
80 { "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",
81 "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",
82 "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",
83 "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",
84 "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",
85 "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",
86 "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",
87 "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",
88 "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",
89 "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" };
90
91 /* ??? These strings could be shared with REGISTER_NAMES. */
92 static const char * const ia64_output_reg_names[8] =
93 { "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" };
94
95 /* String used with the -mfixed-range= option. */
96 const char *ia64_fixed_range_string;
97
98 /* Determines whether we use adds, addl, or movl to generate our
99 TLS immediate offsets. */
100 int ia64_tls_size = 22;
101
102 /* String used with the -mtls-size= option. */
103 const char *ia64_tls_size_string;
104
105 /* Determines whether we run our final scheduling pass or not. We always
106 avoid the normal second scheduling pass. */
107 static int ia64_flag_schedule_insns2;
108
109 /* Variables which are this size or smaller are put in the sdata/sbss
110 sections. */
111
112 unsigned int ia64_section_threshold;
113
114 /* Structure to be filled in by ia64_compute_frame_size with register
115 save masks and offsets for the current function. */
116
117 struct ia64_frame_info
118 {
119 HOST_WIDE_INT total_size; /* size of the stack frame, not including
120 the caller's scratch area. */
121 HOST_WIDE_INT spill_cfa_off; /* top of the reg spill area from the cfa. */
122 HOST_WIDE_INT spill_size; /* size of the gr/br/fr spill area. */
123 HOST_WIDE_INT extra_spill_size; /* size of spill area for others. */
124 HARD_REG_SET mask; /* mask of saved registers. */
125 unsigned int gr_used_mask; /* mask of registers in use as gr spill
126 registers or long-term scratches. */
127 int n_spilled; /* number of spilled registers. */
128 int reg_fp; /* register for fp. */
129 int reg_save_b0; /* save register for b0. */
130 int reg_save_pr; /* save register for prs. */
131 int reg_save_ar_pfs; /* save register for ar.pfs. */
132 int reg_save_ar_unat; /* save register for ar.unat. */
133 int reg_save_ar_lc; /* save register for ar.lc. */
134 int reg_save_gp; /* save register for gp. */
135 int n_input_regs; /* number of input registers used. */
136 int n_local_regs; /* number of local registers used. */
137 int n_output_regs; /* number of output registers used. */
138 int n_rotate_regs; /* number of rotating registers used. */
139
140 char need_regstk; /* true if a .regstk directive needed. */
141 char initialized; /* true if the data is finalized. */
142 };
143
144 /* Current frame information calculated by ia64_compute_frame_size. */
145 static struct ia64_frame_info current_frame_info;
146
147 static rtx gen_tls_get_addr PARAMS ((void));
148 static rtx gen_thread_pointer PARAMS ((void));
149 static int find_gr_spill PARAMS ((int));
150 static int next_scratch_gr_reg PARAMS ((void));
151 static void mark_reg_gr_used_mask PARAMS ((rtx, void *));
152 static void ia64_compute_frame_size PARAMS ((HOST_WIDE_INT));
153 static void setup_spill_pointers PARAMS ((int, rtx, HOST_WIDE_INT));
154 static void finish_spill_pointers PARAMS ((void));
155 static rtx spill_restore_mem PARAMS ((rtx, HOST_WIDE_INT));
156 static void do_spill PARAMS ((rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT, rtx));
157 static void do_restore PARAMS ((rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT));
158 static rtx gen_movdi_x PARAMS ((rtx, rtx, rtx));
159 static rtx gen_fr_spill_x PARAMS ((rtx, rtx, rtx));
160 static rtx gen_fr_restore_x PARAMS ((rtx, rtx, rtx));
161
162 static enum machine_mode hfa_element_mode PARAMS ((tree, int));
163 static void fix_range PARAMS ((const char *));
164 static struct machine_function * ia64_init_machine_status PARAMS ((void));
165 static void emit_insn_group_barriers PARAMS ((FILE *, rtx));
166 static void emit_all_insn_group_barriers PARAMS ((FILE *, rtx));
167 static void emit_predicate_relation_info PARAMS ((void));
168 static bool ia64_in_small_data_p PARAMS ((tree));
169 static void ia64_encode_section_info PARAMS ((tree, int));
170 static const char *ia64_strip_name_encoding PARAMS ((const char *));
171 static void process_epilogue PARAMS ((void));
172 static int process_set PARAMS ((FILE *, rtx));
173
174 static rtx ia64_expand_fetch_and_op PARAMS ((optab, enum machine_mode,
175 tree, rtx));
176 static rtx ia64_expand_op_and_fetch PARAMS ((optab, enum machine_mode,
177 tree, rtx));
178 static rtx ia64_expand_compare_and_swap PARAMS ((enum machine_mode,
179 enum machine_mode,
180 int, tree, rtx));
181 static rtx ia64_expand_lock_test_and_set PARAMS ((enum machine_mode,
182 tree, rtx));
183 static rtx ia64_expand_lock_release PARAMS ((enum machine_mode, tree, rtx));
184 static bool ia64_assemble_integer PARAMS ((rtx, unsigned int, int));
185 static void ia64_output_function_prologue PARAMS ((FILE *, HOST_WIDE_INT));
186 static void ia64_output_function_epilogue PARAMS ((FILE *, HOST_WIDE_INT));
187 static void ia64_output_function_end_prologue PARAMS ((FILE *));
188
189 static int ia64_issue_rate PARAMS ((void));
190 static int ia64_adjust_cost PARAMS ((rtx, rtx, rtx, int));
191 static void ia64_sched_init PARAMS ((FILE *, int, int));
192 static void ia64_sched_finish PARAMS ((FILE *, int));
193 static int ia64_internal_sched_reorder PARAMS ((FILE *, int, rtx *,
194 int *, int, int));
195 static int ia64_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int));
196 static int ia64_sched_reorder2 PARAMS ((FILE *, int, rtx *, int *, int));
197 static int ia64_variable_issue PARAMS ((FILE *, int, rtx, int));
198
199 static void ia64_output_mi_thunk PARAMS ((FILE *, tree, HOST_WIDE_INT,
200 HOST_WIDE_INT, tree));
201
202 static void ia64_select_rtx_section PARAMS ((enum machine_mode, rtx,
203 unsigned HOST_WIDE_INT));
204 static void ia64_rwreloc_select_section PARAMS ((tree, int,
205 unsigned HOST_WIDE_INT))
206 ATTRIBUTE_UNUSED;
207 static void ia64_rwreloc_unique_section PARAMS ((tree, int))
208 ATTRIBUTE_UNUSED;
209 static void ia64_rwreloc_select_rtx_section PARAMS ((enum machine_mode, rtx,
210 unsigned HOST_WIDE_INT))
211 ATTRIBUTE_UNUSED;
212 static unsigned int ia64_rwreloc_section_type_flags
213 PARAMS ((tree, const char *, int))
214 ATTRIBUTE_UNUSED;
215
216 static void ia64_hpux_add_extern_decl PARAMS ((const char *name))
217 ATTRIBUTE_UNUSED;
218
219 /* Table of valid machine attributes. */
220 static const struct attribute_spec ia64_attribute_table[] =
221 {
222 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
223 { "syscall_linkage", 0, 0, false, true, true, NULL },
224 { NULL, 0, 0, false, false, false, NULL }
225 };
226
227 /* Initialize the GCC target structure. */
228 #undef TARGET_ATTRIBUTE_TABLE
229 #define TARGET_ATTRIBUTE_TABLE ia64_attribute_table
230
231 #undef TARGET_INIT_BUILTINS
232 #define TARGET_INIT_BUILTINS ia64_init_builtins
233
234 #undef TARGET_EXPAND_BUILTIN
235 #define TARGET_EXPAND_BUILTIN ia64_expand_builtin
236
237 #undef TARGET_ASM_BYTE_OP
238 #define TARGET_ASM_BYTE_OP "\tdata1\t"
239 #undef TARGET_ASM_ALIGNED_HI_OP
240 #define TARGET_ASM_ALIGNED_HI_OP "\tdata2\t"
241 #undef TARGET_ASM_ALIGNED_SI_OP
242 #define TARGET_ASM_ALIGNED_SI_OP "\tdata4\t"
243 #undef TARGET_ASM_ALIGNED_DI_OP
244 #define TARGET_ASM_ALIGNED_DI_OP "\tdata8\t"
245 #undef TARGET_ASM_UNALIGNED_HI_OP
246 #define TARGET_ASM_UNALIGNED_HI_OP "\tdata2.ua\t"
247 #undef TARGET_ASM_UNALIGNED_SI_OP
248 #define TARGET_ASM_UNALIGNED_SI_OP "\tdata4.ua\t"
249 #undef TARGET_ASM_UNALIGNED_DI_OP
250 #define TARGET_ASM_UNALIGNED_DI_OP "\tdata8.ua\t"
251 #undef TARGET_ASM_INTEGER
252 #define TARGET_ASM_INTEGER ia64_assemble_integer
253
254 #undef TARGET_ASM_FUNCTION_PROLOGUE
255 #define TARGET_ASM_FUNCTION_PROLOGUE ia64_output_function_prologue
256 #undef TARGET_ASM_FUNCTION_END_PROLOGUE
257 #define TARGET_ASM_FUNCTION_END_PROLOGUE ia64_output_function_end_prologue
258 #undef TARGET_ASM_FUNCTION_EPILOGUE
259 #define TARGET_ASM_FUNCTION_EPILOGUE ia64_output_function_epilogue
260
261 #undef TARGET_IN_SMALL_DATA_P
262 #define TARGET_IN_SMALL_DATA_P ia64_in_small_data_p
263 #undef TARGET_ENCODE_SECTION_INFO
264 #define TARGET_ENCODE_SECTION_INFO ia64_encode_section_info
265 #undef TARGET_STRIP_NAME_ENCODING
266 #define TARGET_STRIP_NAME_ENCODING ia64_strip_name_encoding
267
268 #undef TARGET_SCHED_ADJUST_COST
269 #define TARGET_SCHED_ADJUST_COST ia64_adjust_cost
270 #undef TARGET_SCHED_ISSUE_RATE
271 #define TARGET_SCHED_ISSUE_RATE ia64_issue_rate
272 #undef TARGET_SCHED_VARIABLE_ISSUE
273 #define TARGET_SCHED_VARIABLE_ISSUE ia64_variable_issue
274 #undef TARGET_SCHED_INIT
275 #define TARGET_SCHED_INIT ia64_sched_init
276 #undef TARGET_SCHED_FINISH
277 #define TARGET_SCHED_FINISH ia64_sched_finish
278 #undef TARGET_SCHED_REORDER
279 #define TARGET_SCHED_REORDER ia64_sched_reorder
280 #undef TARGET_SCHED_REORDER2
281 #define TARGET_SCHED_REORDER2 ia64_sched_reorder2
282
283 #undef TARGET_ASM_OUTPUT_MI_THUNK
284 #define TARGET_ASM_OUTPUT_MI_THUNK ia64_output_mi_thunk
285 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
286 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_tree_hwi_hwi_tree_true
287
288 struct gcc_target targetm = TARGET_INITIALIZER;
289
290 /* Return 1 if OP is a valid operand for the MEM of a CALL insn. */
291
292 int
call_operand(op,mode)293 call_operand (op, mode)
294 rtx op;
295 enum machine_mode mode;
296 {
297 if (mode != GET_MODE (op) && mode != VOIDmode)
298 return 0;
299
300 return (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == REG
301 || (GET_CODE (op) == SUBREG && GET_CODE (XEXP (op, 0)) == REG));
302 }
303
304 /* Return 1 if OP refers to a symbol in the sdata section. */
305
306 int
sdata_symbolic_operand(op,mode)307 sdata_symbolic_operand (op, mode)
308 rtx op;
309 enum machine_mode mode ATTRIBUTE_UNUSED;
310 {
311 switch (GET_CODE (op))
312 {
313 case CONST:
314 if (GET_CODE (XEXP (op, 0)) != PLUS
315 || GET_CODE (XEXP (XEXP (op, 0), 0)) != SYMBOL_REF)
316 break;
317 op = XEXP (XEXP (op, 0), 0);
318 /* FALLTHRU */
319
320 case SYMBOL_REF:
321 if (CONSTANT_POOL_ADDRESS_P (op))
322 return GET_MODE_SIZE (get_pool_mode (op)) <= ia64_section_threshold;
323 else
324 {
325 const char *str = XSTR (op, 0);
326 return (str[0] == ENCODE_SECTION_INFO_CHAR && str[1] == 's');
327 }
328
329 default:
330 break;
331 }
332
333 return 0;
334 }
335
336 /* Return 1 if OP refers to a symbol, and is appropriate for a GOT load. */
337
338 int
got_symbolic_operand(op,mode)339 got_symbolic_operand (op, mode)
340 rtx op;
341 enum machine_mode mode ATTRIBUTE_UNUSED;
342 {
343 switch (GET_CODE (op))
344 {
345 case CONST:
346 op = XEXP (op, 0);
347 if (GET_CODE (op) != PLUS)
348 return 0;
349 if (GET_CODE (XEXP (op, 0)) != SYMBOL_REF)
350 return 0;
351 op = XEXP (op, 1);
352 if (GET_CODE (op) != CONST_INT)
353 return 0;
354
355 return 1;
356
357 /* Ok if we're not using GOT entries at all. */
358 if (TARGET_NO_PIC || TARGET_AUTO_PIC)
359 return 1;
360
361 /* "Ok" while emitting rtl, since otherwise we won't be provided
362 with the entire offset during emission, which makes it very
363 hard to split the offset into high and low parts. */
364 if (rtx_equal_function_value_matters)
365 return 1;
366
367 /* Force the low 14 bits of the constant to zero so that we do not
368 use up so many GOT entries. */
369 return (INTVAL (op) & 0x3fff) == 0;
370
371 case SYMBOL_REF:
372 case LABEL_REF:
373 return 1;
374
375 default:
376 break;
377 }
378 return 0;
379 }
380
381 /* Return 1 if OP refers to a symbol. */
382
383 int
symbolic_operand(op,mode)384 symbolic_operand (op, mode)
385 rtx op;
386 enum machine_mode mode ATTRIBUTE_UNUSED;
387 {
388 switch (GET_CODE (op))
389 {
390 case CONST:
391 case SYMBOL_REF:
392 case LABEL_REF:
393 return 1;
394
395 default:
396 break;
397 }
398 return 0;
399 }
400
401 /* Return tls_model if OP refers to a TLS symbol. */
402
403 int
tls_symbolic_operand(op,mode)404 tls_symbolic_operand (op, mode)
405 rtx op;
406 enum machine_mode mode ATTRIBUTE_UNUSED;
407 {
408 const char *str;
409
410 if (GET_CODE (op) != SYMBOL_REF)
411 return 0;
412 str = XSTR (op, 0);
413 if (str[0] != ENCODE_SECTION_INFO_CHAR)
414 return 0;
415 switch (str[1])
416 {
417 case 'G':
418 return TLS_MODEL_GLOBAL_DYNAMIC;
419 case 'L':
420 return TLS_MODEL_LOCAL_DYNAMIC;
421 case 'i':
422 return TLS_MODEL_INITIAL_EXEC;
423 case 'l':
424 return TLS_MODEL_LOCAL_EXEC;
425 }
426 return 0;
427 }
428
429
430 /* Return 1 if OP refers to a function. */
431
432 int
function_operand(op,mode)433 function_operand (op, mode)
434 rtx op;
435 enum machine_mode mode ATTRIBUTE_UNUSED;
436 {
437 if (GET_CODE (op) == SYMBOL_REF && SYMBOL_REF_FLAG (op))
438 return 1;
439 else
440 return 0;
441 }
442
443 /* Return 1 if OP is setjmp or a similar function. */
444
445 /* ??? This is an unsatisfying solution. Should rethink. */
446
447 int
setjmp_operand(op,mode)448 setjmp_operand (op, mode)
449 rtx op;
450 enum machine_mode mode ATTRIBUTE_UNUSED;
451 {
452 const char *name;
453 int retval = 0;
454
455 if (GET_CODE (op) != SYMBOL_REF)
456 return 0;
457
458 name = XSTR (op, 0);
459
460 /* The following code is borrowed from special_function_p in calls.c. */
461
462 /* Disregard prefix _, __ or __x. */
463 if (name[0] == '_')
464 {
465 if (name[1] == '_' && name[2] == 'x')
466 name += 3;
467 else if (name[1] == '_')
468 name += 2;
469 else
470 name += 1;
471 }
472
473 if (name[0] == 's')
474 {
475 retval
476 = ((name[1] == 'e'
477 && (! strcmp (name, "setjmp")
478 || ! strcmp (name, "setjmp_syscall")))
479 || (name[1] == 'i'
480 && ! strcmp (name, "sigsetjmp"))
481 || (name[1] == 'a'
482 && ! strcmp (name, "savectx")));
483 }
484 else if ((name[0] == 'q' && name[1] == 's'
485 && ! strcmp (name, "qsetjmp"))
486 || (name[0] == 'v' && name[1] == 'f'
487 && ! strcmp (name, "vfork")))
488 retval = 1;
489
490 return retval;
491 }
492
493 /* Return 1 if OP is a general operand, but when pic exclude symbolic
494 operands. */
495
496 /* ??? If we drop no-pic support, can delete SYMBOL_REF, CONST, and LABEL_REF
497 from PREDICATE_CODES. */
498
499 int
move_operand(op,mode)500 move_operand (op, mode)
501 rtx op;
502 enum machine_mode mode;
503 {
504 if (! TARGET_NO_PIC && symbolic_operand (op, mode))
505 return 0;
506
507 return general_operand (op, mode);
508 }
509
510 /* Return 1 if OP is a register operand that is (or could be) a GR reg. */
511
512 int
gr_register_operand(op,mode)513 gr_register_operand (op, mode)
514 rtx op;
515 enum machine_mode mode;
516 {
517 if (! register_operand (op, mode))
518 return 0;
519 if (GET_CODE (op) == SUBREG)
520 op = SUBREG_REG (op);
521 if (GET_CODE (op) == REG)
522 {
523 unsigned int regno = REGNO (op);
524 if (regno < FIRST_PSEUDO_REGISTER)
525 return GENERAL_REGNO_P (regno);
526 }
527 return 1;
528 }
529
530 /* Return 1 if OP is a register operand that is (or could be) an FR reg. */
531
532 int
fr_register_operand(op,mode)533 fr_register_operand (op, mode)
534 rtx op;
535 enum machine_mode mode;
536 {
537 if (! register_operand (op, mode))
538 return 0;
539 if (GET_CODE (op) == SUBREG)
540 op = SUBREG_REG (op);
541 if (GET_CODE (op) == REG)
542 {
543 unsigned int regno = REGNO (op);
544 if (regno < FIRST_PSEUDO_REGISTER)
545 return FR_REGNO_P (regno);
546 }
547 return 1;
548 }
549
550 /* Return 1 if OP is a register operand that is (or could be) a GR/FR reg. */
551
552 int
grfr_register_operand(op,mode)553 grfr_register_operand (op, mode)
554 rtx op;
555 enum machine_mode mode;
556 {
557 if (! register_operand (op, mode))
558 return 0;
559 if (GET_CODE (op) == SUBREG)
560 op = SUBREG_REG (op);
561 if (GET_CODE (op) == REG)
562 {
563 unsigned int regno = REGNO (op);
564 if (regno < FIRST_PSEUDO_REGISTER)
565 return GENERAL_REGNO_P (regno) || FR_REGNO_P (regno);
566 }
567 return 1;
568 }
569
570 /* Return 1 if OP is a nonimmediate operand that is (or could be) a GR reg. */
571
572 int
gr_nonimmediate_operand(op,mode)573 gr_nonimmediate_operand (op, mode)
574 rtx op;
575 enum machine_mode mode;
576 {
577 if (! nonimmediate_operand (op, mode))
578 return 0;
579 if (GET_CODE (op) == SUBREG)
580 op = SUBREG_REG (op);
581 if (GET_CODE (op) == REG)
582 {
583 unsigned int regno = REGNO (op);
584 if (regno < FIRST_PSEUDO_REGISTER)
585 return GENERAL_REGNO_P (regno);
586 }
587 return 1;
588 }
589
590 /* Return 1 if OP is a nonimmediate operand that is (or could be) a FR reg. */
591
592 int
fr_nonimmediate_operand(op,mode)593 fr_nonimmediate_operand (op, mode)
594 rtx op;
595 enum machine_mode mode;
596 {
597 if (! nonimmediate_operand (op, mode))
598 return 0;
599 if (GET_CODE (op) == SUBREG)
600 op = SUBREG_REG (op);
601 if (GET_CODE (op) == REG)
602 {
603 unsigned int regno = REGNO (op);
604 if (regno < FIRST_PSEUDO_REGISTER)
605 return FR_REGNO_P (regno);
606 }
607 return 1;
608 }
609
610 /* Return 1 if OP is a nonimmediate operand that is a GR/FR reg. */
611
612 int
grfr_nonimmediate_operand(op,mode)613 grfr_nonimmediate_operand (op, mode)
614 rtx op;
615 enum machine_mode mode;
616 {
617 if (! nonimmediate_operand (op, mode))
618 return 0;
619 if (GET_CODE (op) == SUBREG)
620 op = SUBREG_REG (op);
621 if (GET_CODE (op) == REG)
622 {
623 unsigned int regno = REGNO (op);
624 if (regno < FIRST_PSEUDO_REGISTER)
625 return GENERAL_REGNO_P (regno) || FR_REGNO_P (regno);
626 }
627 return 1;
628 }
629
630 /* Return 1 if OP is a GR register operand, or zero. */
631
632 int
gr_reg_or_0_operand(op,mode)633 gr_reg_or_0_operand (op, mode)
634 rtx op;
635 enum machine_mode mode;
636 {
637 return (op == const0_rtx || gr_register_operand (op, mode));
638 }
639
640 /* Return 1 if OP is a GR register operand, or a 5 bit immediate operand. */
641
642 int
gr_reg_or_5bit_operand(op,mode)643 gr_reg_or_5bit_operand (op, mode)
644 rtx op;
645 enum machine_mode mode;
646 {
647 return ((GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 32)
648 || GET_CODE (op) == CONSTANT_P_RTX
649 || gr_register_operand (op, mode));
650 }
651
652 /* Return 1 if OP is a GR register operand, or a 6 bit immediate operand. */
653
654 int
gr_reg_or_6bit_operand(op,mode)655 gr_reg_or_6bit_operand (op, mode)
656 rtx op;
657 enum machine_mode mode;
658 {
659 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_M (INTVAL (op)))
660 || GET_CODE (op) == CONSTANT_P_RTX
661 || gr_register_operand (op, mode));
662 }
663
664 /* Return 1 if OP is a GR register operand, or an 8 bit immediate operand. */
665
666 int
gr_reg_or_8bit_operand(op,mode)667 gr_reg_or_8bit_operand (op, mode)
668 rtx op;
669 enum machine_mode mode;
670 {
671 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op)))
672 || GET_CODE (op) == CONSTANT_P_RTX
673 || gr_register_operand (op, mode));
674 }
675
676 /* Return 1 if OP is a GR/FR register operand, or an 8 bit immediate. */
677
678 int
grfr_reg_or_8bit_operand(op,mode)679 grfr_reg_or_8bit_operand (op, mode)
680 rtx op;
681 enum machine_mode mode;
682 {
683 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op)))
684 || GET_CODE (op) == CONSTANT_P_RTX
685 || grfr_register_operand (op, mode));
686 }
687
688 /* Return 1 if OP is a register operand, or an 8 bit adjusted immediate
689 operand. */
690
691 int
gr_reg_or_8bit_adjusted_operand(op,mode)692 gr_reg_or_8bit_adjusted_operand (op, mode)
693 rtx op;
694 enum machine_mode mode;
695 {
696 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_L (INTVAL (op)))
697 || GET_CODE (op) == CONSTANT_P_RTX
698 || gr_register_operand (op, mode));
699 }
700
701 /* Return 1 if OP is a register operand, or is valid for both an 8 bit
702 immediate and an 8 bit adjusted immediate operand. This is necessary
703 because when we emit a compare, we don't know what the condition will be,
704 so we need the union of the immediates accepted by GT and LT. */
705
706 int
gr_reg_or_8bit_and_adjusted_operand(op,mode)707 gr_reg_or_8bit_and_adjusted_operand (op, mode)
708 rtx op;
709 enum machine_mode mode;
710 {
711 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op))
712 && CONST_OK_FOR_L (INTVAL (op)))
713 || GET_CODE (op) == CONSTANT_P_RTX
714 || gr_register_operand (op, mode));
715 }
716
717 /* Return 1 if OP is a register operand, or a 14 bit immediate operand. */
718
719 int
gr_reg_or_14bit_operand(op,mode)720 gr_reg_or_14bit_operand (op, mode)
721 rtx op;
722 enum machine_mode mode;
723 {
724 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_I (INTVAL (op)))
725 || GET_CODE (op) == CONSTANT_P_RTX
726 || gr_register_operand (op, mode));
727 }
728
729 /* Return 1 if OP is a register operand, or a 22 bit immediate operand. */
730
731 int
gr_reg_or_22bit_operand(op,mode)732 gr_reg_or_22bit_operand (op, mode)
733 rtx op;
734 enum machine_mode mode;
735 {
736 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_J (INTVAL (op)))
737 || GET_CODE (op) == CONSTANT_P_RTX
738 || gr_register_operand (op, mode));
739 }
740
741 /* Return 1 if OP is a 6 bit immediate operand. */
742
743 int
shift_count_operand(op,mode)744 shift_count_operand (op, mode)
745 rtx op;
746 enum machine_mode mode ATTRIBUTE_UNUSED;
747 {
748 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_M (INTVAL (op)))
749 || GET_CODE (op) == CONSTANT_P_RTX);
750 }
751
752 /* Return 1 if OP is a 5 bit immediate operand. */
753
754 int
shift_32bit_count_operand(op,mode)755 shift_32bit_count_operand (op, mode)
756 rtx op;
757 enum machine_mode mode ATTRIBUTE_UNUSED;
758 {
759 return ((GET_CODE (op) == CONST_INT
760 && (INTVAL (op) >= 0 && INTVAL (op) < 32))
761 || GET_CODE (op) == CONSTANT_P_RTX);
762 }
763
764 /* Return 1 if OP is a 2, 4, 8, or 16 immediate operand. */
765
766 int
shladd_operand(op,mode)767 shladd_operand (op, mode)
768 rtx op;
769 enum machine_mode mode ATTRIBUTE_UNUSED;
770 {
771 return (GET_CODE (op) == CONST_INT
772 && (INTVAL (op) == 2 || INTVAL (op) == 4
773 || INTVAL (op) == 8 || INTVAL (op) == 16));
774 }
775
776 /* Return 1 if OP is a -16, -8, -4, -1, 1, 4, 8, or 16 immediate operand. */
777
778 int
fetchadd_operand(op,mode)779 fetchadd_operand (op, mode)
780 rtx op;
781 enum machine_mode mode ATTRIBUTE_UNUSED;
782 {
783 return (GET_CODE (op) == CONST_INT
784 && (INTVAL (op) == -16 || INTVAL (op) == -8 ||
785 INTVAL (op) == -4 || INTVAL (op) == -1 ||
786 INTVAL (op) == 1 || INTVAL (op) == 4 ||
787 INTVAL (op) == 8 || INTVAL (op) == 16));
788 }
789
790 /* Return 1 if OP is a floating-point constant zero, one, or a register. */
791
792 int
fr_reg_or_fp01_operand(op,mode)793 fr_reg_or_fp01_operand (op, mode)
794 rtx op;
795 enum machine_mode mode;
796 {
797 return ((GET_CODE (op) == CONST_DOUBLE && CONST_DOUBLE_OK_FOR_G (op))
798 || fr_register_operand (op, mode));
799 }
800
801 /* Like nonimmediate_operand, but don't allow MEMs that try to use a
802 POST_MODIFY with a REG as displacement. */
803
804 int
destination_operand(op,mode)805 destination_operand (op, mode)
806 rtx op;
807 enum machine_mode mode;
808 {
809 if (! nonimmediate_operand (op, mode))
810 return 0;
811 if (GET_CODE (op) == MEM
812 && GET_CODE (XEXP (op, 0)) == POST_MODIFY
813 && GET_CODE (XEXP (XEXP (XEXP (op, 0), 1), 1)) == REG)
814 return 0;
815 return 1;
816 }
817
818 /* Like memory_operand, but don't allow post-increments. */
819
820 int
not_postinc_memory_operand(op,mode)821 not_postinc_memory_operand (op, mode)
822 rtx op;
823 enum machine_mode mode;
824 {
825 return (memory_operand (op, mode)
826 && GET_RTX_CLASS (GET_CODE (XEXP (op, 0))) != 'a');
827 }
828
829 /* Return 1 if this is a comparison operator, which accepts an normal 8-bit
830 signed immediate operand. */
831
832 int
normal_comparison_operator(op,mode)833 normal_comparison_operator (op, mode)
834 register rtx op;
835 enum machine_mode mode;
836 {
837 enum rtx_code code = GET_CODE (op);
838 return ((mode == VOIDmode || GET_MODE (op) == mode)
839 && (code == EQ || code == NE
840 || code == GT || code == LE || code == GTU || code == LEU));
841 }
842
843 /* Return 1 if this is a comparison operator, which accepts an adjusted 8-bit
844 signed immediate operand. */
845
846 int
adjusted_comparison_operator(op,mode)847 adjusted_comparison_operator (op, mode)
848 register rtx op;
849 enum machine_mode mode;
850 {
851 enum rtx_code code = GET_CODE (op);
852 return ((mode == VOIDmode || GET_MODE (op) == mode)
853 && (code == LT || code == GE || code == LTU || code == GEU));
854 }
855
856 /* Return 1 if this is a signed inequality operator. */
857
858 int
signed_inequality_operator(op,mode)859 signed_inequality_operator (op, mode)
860 register rtx op;
861 enum machine_mode mode;
862 {
863 enum rtx_code code = GET_CODE (op);
864 return ((mode == VOIDmode || GET_MODE (op) == mode)
865 && (code == GE || code == GT
866 || code == LE || code == LT));
867 }
868
869 /* Return 1 if this operator is valid for predication. */
870
871 int
predicate_operator(op,mode)872 predicate_operator (op, mode)
873 register rtx op;
874 enum machine_mode mode;
875 {
876 enum rtx_code code = GET_CODE (op);
877 return ((GET_MODE (op) == mode || mode == VOIDmode)
878 && (code == EQ || code == NE));
879 }
880
881 /* Return 1 if this operator can be used in a conditional operation. */
882
883 int
condop_operator(op,mode)884 condop_operator (op, mode)
885 register rtx op;
886 enum machine_mode mode;
887 {
888 enum rtx_code code = GET_CODE (op);
889 return ((GET_MODE (op) == mode || mode == VOIDmode)
890 && (code == PLUS || code == MINUS || code == AND
891 || code == IOR || code == XOR));
892 }
893
894 /* Return 1 if this is the ar.lc register. */
895
896 int
ar_lc_reg_operand(op,mode)897 ar_lc_reg_operand (op, mode)
898 register rtx op;
899 enum machine_mode mode;
900 {
901 return (GET_MODE (op) == DImode
902 && (mode == DImode || mode == VOIDmode)
903 && GET_CODE (op) == REG
904 && REGNO (op) == AR_LC_REGNUM);
905 }
906
907 /* Return 1 if this is the ar.ccv register. */
908
909 int
ar_ccv_reg_operand(op,mode)910 ar_ccv_reg_operand (op, mode)
911 register rtx op;
912 enum machine_mode mode;
913 {
914 return ((GET_MODE (op) == mode || mode == VOIDmode)
915 && GET_CODE (op) == REG
916 && REGNO (op) == AR_CCV_REGNUM);
917 }
918
919 /* Return 1 if this is the ar.pfs register. */
920
921 int
ar_pfs_reg_operand(op,mode)922 ar_pfs_reg_operand (op, mode)
923 register rtx op;
924 enum machine_mode mode;
925 {
926 return ((GET_MODE (op) == mode || mode == VOIDmode)
927 && GET_CODE (op) == REG
928 && REGNO (op) == AR_PFS_REGNUM);
929 }
930
931 /* Like general_operand, but don't allow (mem (addressof)). */
932
933 int
general_tfmode_operand(op,mode)934 general_tfmode_operand (op, mode)
935 rtx op;
936 enum machine_mode mode;
937 {
938 if (! general_operand (op, mode))
939 return 0;
940 if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == ADDRESSOF)
941 return 0;
942 return 1;
943 }
944
945 /* Similarly. */
946
947 int
destination_tfmode_operand(op,mode)948 destination_tfmode_operand (op, mode)
949 rtx op;
950 enum machine_mode mode;
951 {
952 if (! destination_operand (op, mode))
953 return 0;
954 if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == ADDRESSOF)
955 return 0;
956 return 1;
957 }
958
959 /* Similarly. */
960
961 int
tfreg_or_fp01_operand(op,mode)962 tfreg_or_fp01_operand (op, mode)
963 rtx op;
964 enum machine_mode mode;
965 {
966 if (GET_CODE (op) == SUBREG)
967 return 0;
968 return fr_reg_or_fp01_operand (op, mode);
969 }
970
971 /* Return 1 if OP is valid as a base register in a reg + offset address. */
972
973 int
basereg_operand(op,mode)974 basereg_operand (op, mode)
975 rtx op;
976 enum machine_mode mode;
977 {
978 /* ??? Should I copy the flag_omit_frame_pointer and cse_not_expected
979 checks from pa.c basereg_operand as well? Seems to be OK without them
980 in test runs. */
981
982 return (register_operand (op, mode) &&
983 REG_POINTER ((GET_CODE (op) == SUBREG) ? SUBREG_REG (op) : op));
984 }
985
986 /* Return 1 if the operands of a move are ok. */
987
988 int
ia64_move_ok(dst,src)989 ia64_move_ok (dst, src)
990 rtx dst, src;
991 {
992 /* If we're under init_recog_no_volatile, we'll not be able to use
993 memory_operand. So check the code directly and don't worry about
994 the validity of the underlying address, which should have been
995 checked elsewhere anyway. */
996 if (GET_CODE (dst) != MEM)
997 return 1;
998 if (GET_CODE (src) == MEM)
999 return 0;
1000 if (register_operand (src, VOIDmode))
1001 return 1;
1002
1003 /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0. */
1004 if (INTEGRAL_MODE_P (GET_MODE (dst)))
1005 return src == const0_rtx;
1006 else
1007 return GET_CODE (src) == CONST_DOUBLE && CONST_DOUBLE_OK_FOR_G (src);
1008 }
1009
1010 /* Return 0 if we are doing C++ code. This optimization fails with
1011 C++ because of GNAT c++/6685. */
1012
1013 int
addp4_optimize_ok(op1,op2)1014 addp4_optimize_ok (op1, op2)
1015 rtx op1, op2;
1016 {
1017
1018 if (!strcmp (lang_hooks.name, "GNU C++"))
1019 return 0;
1020
1021 return (basereg_operand (op1, GET_MODE(op1)) !=
1022 basereg_operand (op2, GET_MODE(op2)));
1023 }
1024
1025 /* Check if OP is a mask suitible for use with SHIFT in a dep.z instruction.
1026 Return the length of the field, or <= 0 on failure. */
1027
1028 int
ia64_depz_field_mask(rop,rshift)1029 ia64_depz_field_mask (rop, rshift)
1030 rtx rop, rshift;
1031 {
1032 unsigned HOST_WIDE_INT op = INTVAL (rop);
1033 unsigned HOST_WIDE_INT shift = INTVAL (rshift);
1034
1035 /* Get rid of the zero bits we're shifting in. */
1036 op >>= shift;
1037
1038 /* We must now have a solid block of 1's at bit 0. */
1039 return exact_log2 (op + 1);
1040 }
1041
1042 /* Expand a symbolic constant load. */
1043 /* ??? Should generalize this, so that we can also support 32 bit pointers. */
1044
1045 void
ia64_expand_load_address(dest,src,scratch)1046 ia64_expand_load_address (dest, src, scratch)
1047 rtx dest, src, scratch;
1048 {
1049 rtx temp;
1050
1051 /* The destination could be a MEM during initial rtl generation,
1052 which isn't a valid destination for the PIC load address patterns. */
1053 if (! register_operand (dest, DImode))
1054 if (! scratch || ! register_operand (scratch, DImode))
1055 temp = gen_reg_rtx (DImode);
1056 else
1057 temp = scratch;
1058 else
1059 temp = dest;
1060
1061 if (tls_symbolic_operand (src, Pmode))
1062 abort ();
1063
1064 if (TARGET_AUTO_PIC)
1065 emit_insn (gen_load_gprel64 (temp, src));
1066 else if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_FLAG (src))
1067 emit_insn (gen_load_fptr (temp, src));
1068 else if ((GET_MODE (src) == Pmode || GET_MODE (src) == ptr_mode)
1069 && sdata_symbolic_operand (src, VOIDmode))
1070 emit_insn (gen_load_gprel (temp, src));
1071 else if (GET_CODE (src) == CONST
1072 && GET_CODE (XEXP (src, 0)) == PLUS
1073 && GET_CODE (XEXP (XEXP (src, 0), 1)) == CONST_INT
1074 && (INTVAL (XEXP (XEXP (src, 0), 1)) & 0x1fff) != 0)
1075 {
1076 rtx subtarget = no_new_pseudos ? temp : gen_reg_rtx (DImode);
1077 rtx sym = XEXP (XEXP (src, 0), 0);
1078 HOST_WIDE_INT ofs, hi, lo;
1079
1080 /* Split the offset into a sign extended 14-bit low part
1081 and a complementary high part. */
1082 ofs = INTVAL (XEXP (XEXP (src, 0), 1));
1083 lo = ((ofs & 0x3fff) ^ 0x2000) - 0x2000;
1084 hi = ofs - lo;
1085
1086 if (! scratch)
1087 scratch = no_new_pseudos ? subtarget : gen_reg_rtx (DImode);
1088
1089 ia64_expand_load_address (subtarget, plus_constant (sym, hi), scratch);
1090 emit_insn (gen_adddi3 (temp, subtarget, GEN_INT (lo)));
1091 }
1092 else
1093 {
1094 rtx insn;
1095 if (! scratch)
1096 scratch = no_new_pseudos ? temp : gen_reg_rtx (DImode);
1097
1098 insn = emit_insn (gen_load_symptr (temp, src, scratch));
1099 #ifdef POINTERS_EXTEND_UNSIGNED
1100 if (GET_MODE (temp) != GET_MODE (src))
1101 src = convert_memory_address (GET_MODE (temp), src);
1102 #endif
1103 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_EQUAL, src, REG_NOTES (insn));
1104 }
1105
1106 if (temp != dest)
1107 {
1108 if (GET_MODE (dest) != GET_MODE (temp))
1109 temp = convert_to_mode (GET_MODE (dest), temp, 0);
1110 emit_move_insn (dest, temp);
1111 }
1112 }
1113
1114 static GTY(()) rtx gen_tls_tga;
1115 static rtx
gen_tls_get_addr()1116 gen_tls_get_addr ()
1117 {
1118 if (!gen_tls_tga)
1119 {
1120 gen_tls_tga = init_one_libfunc ("__tls_get_addr");
1121 }
1122 return gen_tls_tga;
1123 }
1124
1125 static GTY(()) rtx thread_pointer_rtx;
1126 static rtx
gen_thread_pointer()1127 gen_thread_pointer ()
1128 {
1129 if (!thread_pointer_rtx)
1130 {
1131 thread_pointer_rtx = gen_rtx_REG (Pmode, 13);
1132 RTX_UNCHANGING_P (thread_pointer_rtx) = 1;
1133 }
1134 return thread_pointer_rtx;
1135 }
1136
1137 rtx
ia64_expand_move(op0,op1)1138 ia64_expand_move (op0, op1)
1139 rtx op0, op1;
1140 {
1141 enum machine_mode mode = GET_MODE (op0);
1142
1143 if (!reload_in_progress && !reload_completed && !ia64_move_ok (op0, op1))
1144 op1 = force_reg (mode, op1);
1145
1146 if (mode == Pmode || mode == ptr_mode)
1147 {
1148 enum tls_model tls_kind;
1149 if ((tls_kind = tls_symbolic_operand (op1, Pmode)))
1150 {
1151 rtx tga_op1, tga_op2, tga_ret, tga_eqv, tmp, insns;
1152 rtx orig_op0 = op0;
1153
1154 switch (tls_kind)
1155 {
1156 case TLS_MODEL_GLOBAL_DYNAMIC:
1157 start_sequence ();
1158
1159 tga_op1 = gen_reg_rtx (Pmode);
1160 emit_insn (gen_load_ltoff_dtpmod (tga_op1, op1));
1161 tga_op1 = gen_rtx_MEM (Pmode, tga_op1);
1162 RTX_UNCHANGING_P (tga_op1) = 1;
1163
1164 tga_op2 = gen_reg_rtx (Pmode);
1165 emit_insn (gen_load_ltoff_dtprel (tga_op2, op1));
1166 tga_op2 = gen_rtx_MEM (Pmode, tga_op2);
1167 RTX_UNCHANGING_P (tga_op2) = 1;
1168
1169 tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
1170 LCT_CONST, Pmode, 2, tga_op1,
1171 Pmode, tga_op2, Pmode);
1172
1173 insns = get_insns ();
1174 end_sequence ();
1175
1176 if (GET_MODE (op0) != Pmode)
1177 op0 = tga_ret;
1178 emit_libcall_block (insns, op0, tga_ret, op1);
1179 break;
1180
1181 case TLS_MODEL_LOCAL_DYNAMIC:
1182 /* ??? This isn't the completely proper way to do local-dynamic
1183 If the call to __tls_get_addr is used only by a single symbol,
1184 then we should (somehow) move the dtprel to the second arg
1185 to avoid the extra add. */
1186 start_sequence ();
1187
1188 tga_op1 = gen_reg_rtx (Pmode);
1189 emit_insn (gen_load_ltoff_dtpmod (tga_op1, op1));
1190 tga_op1 = gen_rtx_MEM (Pmode, tga_op1);
1191 RTX_UNCHANGING_P (tga_op1) = 1;
1192
1193 tga_op2 = const0_rtx;
1194
1195 tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
1196 LCT_CONST, Pmode, 2, tga_op1,
1197 Pmode, tga_op2, Pmode);
1198
1199 insns = get_insns ();
1200 end_sequence ();
1201
1202 tga_eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
1203 UNSPEC_LD_BASE);
1204 tmp = gen_reg_rtx (Pmode);
1205 emit_libcall_block (insns, tmp, tga_ret, tga_eqv);
1206
1207 if (!register_operand (op0, Pmode))
1208 op0 = gen_reg_rtx (Pmode);
1209 if (TARGET_TLS64)
1210 {
1211 emit_insn (gen_load_dtprel (op0, op1));
1212 emit_insn (gen_adddi3 (op0, tmp, op0));
1213 }
1214 else
1215 emit_insn (gen_add_dtprel (op0, tmp, op1));
1216 break;
1217
1218 case TLS_MODEL_INITIAL_EXEC:
1219 tmp = gen_reg_rtx (Pmode);
1220 emit_insn (gen_load_ltoff_tprel (tmp, op1));
1221 tmp = gen_rtx_MEM (Pmode, tmp);
1222 RTX_UNCHANGING_P (tmp) = 1;
1223 tmp = force_reg (Pmode, tmp);
1224
1225 if (!register_operand (op0, Pmode))
1226 op0 = gen_reg_rtx (Pmode);
1227 emit_insn (gen_adddi3 (op0, tmp, gen_thread_pointer ()));
1228 break;
1229
1230 case TLS_MODEL_LOCAL_EXEC:
1231 if (!register_operand (op0, Pmode))
1232 op0 = gen_reg_rtx (Pmode);
1233 if (TARGET_TLS64)
1234 {
1235 emit_insn (gen_load_tprel (op0, op1));
1236 emit_insn (gen_adddi3 (op0, gen_thread_pointer (), op0));
1237 }
1238 else
1239 emit_insn (gen_add_tprel (op0, gen_thread_pointer (), op1));
1240 break;
1241
1242 default:
1243 abort ();
1244 }
1245
1246 if (orig_op0 == op0)
1247 return NULL_RTX;
1248 if (GET_MODE (orig_op0) == Pmode)
1249 return op0;
1250 return gen_lowpart (GET_MODE (orig_op0), op0);
1251 }
1252 else if (!TARGET_NO_PIC &&
1253 (symbolic_operand (op1, Pmode) ||
1254 symbolic_operand (op1, ptr_mode)))
1255 {
1256 /* Before optimization starts, delay committing to any particular
1257 type of PIC address load. If this function gets deferred, we
1258 may acquire information that changes the value of the
1259 sdata_symbolic_operand predicate.
1260
1261 But don't delay for function pointers. Loading a function address
1262 actually loads the address of the descriptor not the function.
1263 If we represent these as SYMBOL_REFs, then they get cse'd with
1264 calls, and we end up with calls to the descriptor address instead
1265 of calls to the function address. Functions are not candidates
1266 for sdata anyways.
1267
1268 Don't delay for LABEL_REF because the splitter loses REG_LABEL
1269 notes. Don't delay for pool addresses on general principals;
1270 they'll never become non-local behind our back. */
1271
1272 if (rtx_equal_function_value_matters
1273 && GET_CODE (op1) != LABEL_REF
1274 && ! (GET_CODE (op1) == SYMBOL_REF
1275 && (SYMBOL_REF_FLAG (op1)
1276 || CONSTANT_POOL_ADDRESS_P (op1)
1277 || STRING_POOL_ADDRESS_P (op1))))
1278 if (GET_MODE (op1) == DImode)
1279 emit_insn (gen_movdi_symbolic (op0, op1));
1280 else
1281 emit_insn (gen_movsi_symbolic (op0, op1));
1282 else
1283 ia64_expand_load_address (op0, op1, NULL_RTX);
1284 return NULL_RTX;
1285 }
1286 }
1287
1288 return op1;
1289 }
1290
1291 /* Split a post-reload TImode reference into two DImode components. */
1292
1293 rtx
ia64_split_timode(out,in,scratch)1294 ia64_split_timode (out, in, scratch)
1295 rtx out[2];
1296 rtx in, scratch;
1297 {
1298 switch (GET_CODE (in))
1299 {
1300 case REG:
1301 out[0] = gen_rtx_REG (DImode, REGNO (in));
1302 out[1] = gen_rtx_REG (DImode, REGNO (in) + 1);
1303 return NULL_RTX;
1304
1305 case MEM:
1306 {
1307 rtx base = XEXP (in, 0);
1308
1309 switch (GET_CODE (base))
1310 {
1311 case REG:
1312 out[0] = adjust_address (in, DImode, 0);
1313 break;
1314 case POST_MODIFY:
1315 base = XEXP (base, 0);
1316 out[0] = adjust_address (in, DImode, 0);
1317 break;
1318
1319 /* Since we're changing the mode, we need to change to POST_MODIFY
1320 as well to preserve the size of the increment. Either that or
1321 do the update in two steps, but we've already got this scratch
1322 register handy so let's use it. */
1323 case POST_INC:
1324 base = XEXP (base, 0);
1325 out[0]
1326 = change_address (in, DImode,
1327 gen_rtx_POST_MODIFY
1328 (Pmode, base, plus_constant (base, 16)));
1329 break;
1330 case POST_DEC:
1331 base = XEXP (base, 0);
1332 out[0]
1333 = change_address (in, DImode,
1334 gen_rtx_POST_MODIFY
1335 (Pmode, base, plus_constant (base, -16)));
1336 break;
1337 default:
1338 abort ();
1339 }
1340
1341 if (scratch == NULL_RTX)
1342 abort ();
1343 out[1] = change_address (in, DImode, scratch);
1344 return gen_adddi3 (scratch, base, GEN_INT (8));
1345 }
1346
1347 case CONST_INT:
1348 case CONST_DOUBLE:
1349 split_double (in, &out[0], &out[1]);
1350 return NULL_RTX;
1351
1352 default:
1353 abort ();
1354 }
1355 }
1356
1357 /* ??? Fixing GR->FR TFmode moves during reload is hard. You need to go
1358 through memory plus an extra GR scratch register. Except that you can
1359 either get the first from SECONDARY_MEMORY_NEEDED or the second from
1360 SECONDARY_RELOAD_CLASS, but not both.
1361
1362 We got into problems in the first place by allowing a construct like
1363 (subreg:TF (reg:TI)), which we got from a union containing a long double.
1364 This solution attempts to prevent this situation from occurring. When
1365 we see something like the above, we spill the inner register to memory. */
1366
1367 rtx
spill_tfmode_operand(in,force)1368 spill_tfmode_operand (in, force)
1369 rtx in;
1370 int force;
1371 {
1372 if (GET_CODE (in) == SUBREG
1373 && GET_MODE (SUBREG_REG (in)) == TImode
1374 && GET_CODE (SUBREG_REG (in)) == REG)
1375 {
1376 rtx mem = gen_mem_addressof (SUBREG_REG (in), NULL_TREE, true);
1377 return gen_rtx_MEM (TFmode, copy_to_reg (XEXP (mem, 0)));
1378 }
1379 else if (force && GET_CODE (in) == REG)
1380 {
1381 rtx mem = gen_mem_addressof (in, NULL_TREE, true);
1382 return gen_rtx_MEM (TFmode, copy_to_reg (XEXP (mem, 0)));
1383 }
1384 else if (GET_CODE (in) == MEM
1385 && GET_CODE (XEXP (in, 0)) == ADDRESSOF)
1386 return change_address (in, TFmode, copy_to_reg (XEXP (in, 0)));
1387 else
1388 return in;
1389 }
1390
1391 /* Emit comparison instruction if necessary, returning the expression
1392 that holds the compare result in the proper mode. */
1393
1394 rtx
ia64_expand_compare(code,mode)1395 ia64_expand_compare (code, mode)
1396 enum rtx_code code;
1397 enum machine_mode mode;
1398 {
1399 rtx op0 = ia64_compare_op0, op1 = ia64_compare_op1;
1400 rtx cmp;
1401
1402 /* If we have a BImode input, then we already have a compare result, and
1403 do not need to emit another comparison. */
1404 if (GET_MODE (op0) == BImode)
1405 {
1406 if ((code == NE || code == EQ) && op1 == const0_rtx)
1407 cmp = op0;
1408 else
1409 abort ();
1410 }
1411 else
1412 {
1413 cmp = gen_reg_rtx (BImode);
1414 emit_insn (gen_rtx_SET (VOIDmode, cmp,
1415 gen_rtx_fmt_ee (code, BImode, op0, op1)));
1416 code = NE;
1417 }
1418
1419 return gen_rtx_fmt_ee (code, mode, cmp, const0_rtx);
1420 }
1421
1422 /* Emit the appropriate sequence for a call. */
1423 void
ia64_expand_call(retval,addr,nextarg,sibcall_p)1424 ia64_expand_call (retval, addr, nextarg, sibcall_p)
1425 rtx retval;
1426 rtx addr;
1427 rtx nextarg ATTRIBUTE_UNUSED;
1428 int sibcall_p;
1429 {
1430 rtx insn, b0;
1431
1432 addr = XEXP (addr, 0);
1433 addr = convert_memory_address (DImode, addr);
1434 b0 = gen_rtx_REG (DImode, R_BR (0));
1435
1436 /* ??? Should do this for functions known to bind local too. */
1437 if (TARGET_NO_PIC || TARGET_AUTO_PIC)
1438 {
1439 if (sibcall_p)
1440 insn = gen_sibcall_nogp (addr);
1441 else if (! retval)
1442 insn = gen_call_nogp (addr, b0);
1443 else
1444 insn = gen_call_value_nogp (retval, addr, b0);
1445 insn = emit_call_insn (insn);
1446 }
1447 else
1448 {
1449 if (sibcall_p)
1450 insn = gen_sibcall_gp (addr);
1451 else if (! retval)
1452 insn = gen_call_gp (addr, b0);
1453 else
1454 insn = gen_call_value_gp (retval, addr, b0);
1455 insn = emit_call_insn (insn);
1456
1457 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
1458 }
1459
1460 if (sibcall_p)
1461 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), b0);
1462 }
1463 void
ia64_reload_gp()1464 ia64_reload_gp ()
1465 {
1466 rtx tmp;
1467
1468 if (current_frame_info.reg_save_gp)
1469 tmp = gen_rtx_REG (DImode, current_frame_info.reg_save_gp);
1470 else
1471 {
1472 HOST_WIDE_INT offset;
1473
1474 offset = (current_frame_info.spill_cfa_off
1475 + current_frame_info.spill_size);
1476 if (frame_pointer_needed)
1477 {
1478 tmp = hard_frame_pointer_rtx;
1479 offset = -offset;
1480 }
1481 else
1482 {
1483 tmp = stack_pointer_rtx;
1484 offset = current_frame_info.total_size - offset;
1485 }
1486
1487 if (CONST_OK_FOR_I (offset))
1488 emit_insn (gen_adddi3 (pic_offset_table_rtx,
1489 tmp, GEN_INT (offset)));
1490 else
1491 {
1492 emit_move_insn (pic_offset_table_rtx, GEN_INT (offset));
1493 emit_insn (gen_adddi3 (pic_offset_table_rtx,
1494 pic_offset_table_rtx, tmp));
1495 }
1496
1497 tmp = gen_rtx_MEM (DImode, pic_offset_table_rtx);
1498 }
1499
1500 emit_move_insn (pic_offset_table_rtx, tmp);
1501 }
1502
1503 void
ia64_split_call(retval,addr,retaddr,scratch_r,scratch_b,noreturn_p,sibcall_p)1504 ia64_split_call (retval, addr, retaddr, scratch_r, scratch_b,
1505 noreturn_p, sibcall_p)
1506 rtx retval, addr, retaddr, scratch_r, scratch_b;
1507 int noreturn_p, sibcall_p;
1508 {
1509 rtx insn;
1510 bool is_desc = false;
1511
1512 /* If we find we're calling through a register, then we're actually
1513 calling through a descriptor, so load up the values. */
1514 if (REG_P (addr) && GR_REGNO_P (REGNO (addr)))
1515 {
1516 rtx tmp;
1517 bool addr_dead_p;
1518
1519 /* ??? We are currently constrained to *not* use peep2, because
1520 we can legitimiately change the global lifetime of the GP
1521 (in the form of killing where previously live). This is
1522 because a call through a descriptor doesn't use the previous
1523 value of the GP, while a direct call does, and we do not
1524 commit to either form until the split here.
1525
1526 That said, this means that we lack precise life info for
1527 whether ADDR is dead after this call. This is not terribly
1528 important, since we can fix things up essentially for free
1529 with the POST_DEC below, but it's nice to not use it when we
1530 can immediately tell it's not necessary. */
1531 addr_dead_p = ((noreturn_p || sibcall_p
1532 || TEST_HARD_REG_BIT (regs_invalidated_by_call,
1533 REGNO (addr)))
1534 && !FUNCTION_ARG_REGNO_P (REGNO (addr)));
1535
1536 /* Load the code address into scratch_b. */
1537 tmp = gen_rtx_POST_INC (Pmode, addr);
1538 tmp = gen_rtx_MEM (Pmode, tmp);
1539 emit_move_insn (scratch_r, tmp);
1540 emit_move_insn (scratch_b, scratch_r);
1541
1542 /* Load the GP address. If ADDR is not dead here, then we must
1543 revert the change made above via the POST_INCREMENT. */
1544 if (!addr_dead_p)
1545 tmp = gen_rtx_POST_DEC (Pmode, addr);
1546 else
1547 tmp = addr;
1548 tmp = gen_rtx_MEM (Pmode, tmp);
1549 emit_move_insn (pic_offset_table_rtx, tmp);
1550
1551 is_desc = true;
1552 addr = scratch_b;
1553 }
1554
1555 if (sibcall_p)
1556 insn = gen_sibcall_nogp (addr);
1557 else if (retval)
1558 insn = gen_call_value_nogp (retval, addr, retaddr);
1559 else
1560 insn = gen_call_nogp (addr, retaddr);
1561 emit_call_insn (insn);
1562
1563 if ((!TARGET_CONST_GP || is_desc) && !noreturn_p && !sibcall_p)
1564 ia64_reload_gp ();
1565 }
1566
1567 /* Begin the assembly file. */
1568
1569 void
emit_safe_across_calls(f)1570 emit_safe_across_calls (f)
1571 FILE *f;
1572 {
1573 unsigned int rs, re;
1574 int out_state;
1575
1576 rs = 1;
1577 out_state = 0;
1578 while (1)
1579 {
1580 while (rs < 64 && call_used_regs[PR_REG (rs)])
1581 rs++;
1582 if (rs >= 64)
1583 break;
1584 for (re = rs + 1; re < 64 && ! call_used_regs[PR_REG (re)]; re++)
1585 continue;
1586 if (out_state == 0)
1587 {
1588 fputs ("\t.pred.safe_across_calls ", f);
1589 out_state = 1;
1590 }
1591 else
1592 fputc (',', f);
1593 if (re == rs + 1)
1594 fprintf (f, "p%u", rs);
1595 else
1596 fprintf (f, "p%u-p%u", rs, re - 1);
1597 rs = re + 1;
1598 }
1599 if (out_state)
1600 fputc ('\n', f);
1601 }
1602
1603 /* Helper function for ia64_compute_frame_size: find an appropriate general
1604 register to spill some special register to. SPECIAL_SPILL_MASK contains
1605 bits in GR0 to GR31 that have already been allocated by this routine.
1606 TRY_LOCALS is true if we should attempt to locate a local regnum. */
1607
1608 static int
find_gr_spill(try_locals)1609 find_gr_spill (try_locals)
1610 int try_locals;
1611 {
1612 int regno;
1613
1614 /* If this is a leaf function, first try an otherwise unused
1615 call-clobbered register. */
1616 if (current_function_is_leaf)
1617 {
1618 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
1619 if (! regs_ever_live[regno]
1620 && call_used_regs[regno]
1621 && ! fixed_regs[regno]
1622 && ! global_regs[regno]
1623 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
1624 {
1625 current_frame_info.gr_used_mask |= 1 << regno;
1626 return regno;
1627 }
1628 }
1629
1630 if (try_locals)
1631 {
1632 regno = current_frame_info.n_local_regs;
1633 /* If there is a frame pointer, then we can't use loc79, because
1634 that is HARD_FRAME_POINTER_REGNUM. In particular, see the
1635 reg_name switching code in ia64_expand_prologue. */
1636 if (regno < (80 - frame_pointer_needed))
1637 {
1638 current_frame_info.n_local_regs = regno + 1;
1639 return LOC_REG (0) + regno;
1640 }
1641 }
1642
1643 /* Failed to find a general register to spill to. Must use stack. */
1644 return 0;
1645 }
1646
1647 /* In order to make for nice schedules, we try to allocate every temporary
1648 to a different register. We must of course stay away from call-saved,
1649 fixed, and global registers. We must also stay away from registers
1650 allocated in current_frame_info.gr_used_mask, since those include regs
1651 used all through the prologue.
1652
1653 Any register allocated here must be used immediately. The idea is to
1654 aid scheduling, not to solve data flow problems. */
1655
1656 static int last_scratch_gr_reg;
1657
1658 static int
next_scratch_gr_reg()1659 next_scratch_gr_reg ()
1660 {
1661 int i, regno;
1662
1663 for (i = 0; i < 32; ++i)
1664 {
1665 regno = (last_scratch_gr_reg + i + 1) & 31;
1666 if (call_used_regs[regno]
1667 && ! fixed_regs[regno]
1668 && ! global_regs[regno]
1669 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
1670 {
1671 last_scratch_gr_reg = regno;
1672 return regno;
1673 }
1674 }
1675
1676 /* There must be _something_ available. */
1677 abort ();
1678 }
1679
1680 /* Helper function for ia64_compute_frame_size, called through
1681 diddle_return_value. Mark REG in current_frame_info.gr_used_mask. */
1682
1683 static void
mark_reg_gr_used_mask(reg,data)1684 mark_reg_gr_used_mask (reg, data)
1685 rtx reg;
1686 void *data ATTRIBUTE_UNUSED;
1687 {
1688 unsigned int regno = REGNO (reg);
1689 if (regno < 32)
1690 {
1691 unsigned int i, n = HARD_REGNO_NREGS (regno, GET_MODE (reg));
1692 for (i = 0; i < n; ++i)
1693 current_frame_info.gr_used_mask |= 1 << (regno + i);
1694 }
1695 }
1696
1697 /* Returns the number of bytes offset between the frame pointer and the stack
1698 pointer for the current function. SIZE is the number of bytes of space
1699 needed for local variables. */
1700
1701 static void
ia64_compute_frame_size(size)1702 ia64_compute_frame_size (size)
1703 HOST_WIDE_INT size;
1704 {
1705 HOST_WIDE_INT total_size;
1706 HOST_WIDE_INT spill_size = 0;
1707 HOST_WIDE_INT extra_spill_size = 0;
1708 HOST_WIDE_INT pretend_args_size;
1709 HARD_REG_SET mask;
1710 int n_spilled = 0;
1711 int spilled_gr_p = 0;
1712 int spilled_fr_p = 0;
1713 unsigned int regno;
1714 int i;
1715
1716 if (current_frame_info.initialized)
1717 return;
1718
1719 memset (¤t_frame_info, 0, sizeof current_frame_info);
1720 CLEAR_HARD_REG_SET (mask);
1721
1722 /* Don't allocate scratches to the return register. */
1723 diddle_return_value (mark_reg_gr_used_mask, NULL);
1724
1725 /* Don't allocate scratches to the EH scratch registers. */
1726 if (cfun->machine->ia64_eh_epilogue_sp)
1727 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_sp, NULL);
1728 if (cfun->machine->ia64_eh_epilogue_bsp)
1729 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_bsp, NULL);
1730
1731 /* Find the size of the register stack frame. We have only 80 local
1732 registers, because we reserve 8 for the inputs and 8 for the
1733 outputs. */
1734
1735 /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed,
1736 since we'll be adjusting that down later. */
1737 regno = LOC_REG (78) + ! frame_pointer_needed;
1738 for (; regno >= LOC_REG (0); regno--)
1739 if (regs_ever_live[regno])
1740 break;
1741 current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
1742
1743 /* For functions marked with the syscall_linkage attribute, we must mark
1744 all eight input registers as in use, so that locals aren't visible to
1745 the caller. */
1746
1747 if (cfun->machine->n_varargs > 0
1748 || lookup_attribute ("syscall_linkage",
1749 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
1750 current_frame_info.n_input_regs = 8;
1751 else
1752 {
1753 for (regno = IN_REG (7); regno >= IN_REG (0); regno--)
1754 if (regs_ever_live[regno])
1755 break;
1756 current_frame_info.n_input_regs = regno - IN_REG (0) + 1;
1757 }
1758
1759 for (regno = OUT_REG (7); regno >= OUT_REG (0); regno--)
1760 if (regs_ever_live[regno])
1761 break;
1762 i = regno - OUT_REG (0) + 1;
1763
1764 /* When -p profiling, we need one output register for the mcount argument.
1765 Likwise for -a profiling for the bb_init_func argument. For -ax
1766 profiling, we need two output registers for the two bb_init_trace_func
1767 arguments. */
1768 if (current_function_profile)
1769 i = MAX (i, 1);
1770 current_frame_info.n_output_regs = i;
1771
1772 /* ??? No rotating register support yet. */
1773 current_frame_info.n_rotate_regs = 0;
1774
1775 /* Discover which registers need spilling, and how much room that
1776 will take. Begin with floating point and general registers,
1777 which will always wind up on the stack. */
1778
1779 for (regno = FR_REG (2); regno <= FR_REG (127); regno++)
1780 if (regs_ever_live[regno] && ! call_used_regs[regno])
1781 {
1782 SET_HARD_REG_BIT (mask, regno);
1783 spill_size += 16;
1784 n_spilled += 1;
1785 spilled_fr_p = 1;
1786 }
1787
1788 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
1789 if (regs_ever_live[regno] && ! call_used_regs[regno])
1790 {
1791 SET_HARD_REG_BIT (mask, regno);
1792 spill_size += 8;
1793 n_spilled += 1;
1794 spilled_gr_p = 1;
1795 }
1796
1797 for (regno = BR_REG (1); regno <= BR_REG (7); regno++)
1798 if (regs_ever_live[regno] && ! call_used_regs[regno])
1799 {
1800 SET_HARD_REG_BIT (mask, regno);
1801 spill_size += 8;
1802 n_spilled += 1;
1803 }
1804
1805 /* Now come all special registers that might get saved in other
1806 general registers. */
1807
1808 if (frame_pointer_needed)
1809 {
1810 current_frame_info.reg_fp = find_gr_spill (1);
1811 /* If we did not get a register, then we take LOC79. This is guaranteed
1812 to be free, even if regs_ever_live is already set, because this is
1813 HARD_FRAME_POINTER_REGNUM. This requires incrementing n_local_regs,
1814 as we don't count loc79 above. */
1815 if (current_frame_info.reg_fp == 0)
1816 {
1817 current_frame_info.reg_fp = LOC_REG (79);
1818 current_frame_info.n_local_regs++;
1819 }
1820 }
1821
1822 if (! current_function_is_leaf)
1823 {
1824 /* Emit a save of BR0 if we call other functions. Do this even
1825 if this function doesn't return, as EH depends on this to be
1826 able to unwind the stack. */
1827 SET_HARD_REG_BIT (mask, BR_REG (0));
1828
1829 current_frame_info.reg_save_b0 = find_gr_spill (1);
1830 if (current_frame_info.reg_save_b0 == 0)
1831 {
1832 spill_size += 8;
1833 n_spilled += 1;
1834 }
1835
1836 /* Similarly for ar.pfs. */
1837 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
1838 current_frame_info.reg_save_ar_pfs = find_gr_spill (1);
1839 if (current_frame_info.reg_save_ar_pfs == 0)
1840 {
1841 extra_spill_size += 8;
1842 n_spilled += 1;
1843 }
1844
1845 /* Similarly for gp. Note that if we're calling setjmp, the stacked
1846 registers are clobbered, so we fall back to the stack. */
1847 current_frame_info.reg_save_gp
1848 = (current_function_calls_setjmp ? 0 : find_gr_spill (1));
1849 if (current_frame_info.reg_save_gp == 0)
1850 {
1851 SET_HARD_REG_BIT (mask, GR_REG (1));
1852 spill_size += 8;
1853 n_spilled += 1;
1854 }
1855 }
1856 else
1857 {
1858 if (regs_ever_live[BR_REG (0)] && ! call_used_regs[BR_REG (0)])
1859 {
1860 SET_HARD_REG_BIT (mask, BR_REG (0));
1861 spill_size += 8;
1862 n_spilled += 1;
1863 }
1864
1865 if (regs_ever_live[AR_PFS_REGNUM])
1866 {
1867 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
1868 current_frame_info.reg_save_ar_pfs = find_gr_spill (1);
1869 if (current_frame_info.reg_save_ar_pfs == 0)
1870 {
1871 extra_spill_size += 8;
1872 n_spilled += 1;
1873 }
1874 }
1875 }
1876
1877 /* Unwind descriptor hackery: things are most efficient if we allocate
1878 consecutive GR save registers for RP, PFS, FP in that order. However,
1879 it is absolutely critical that FP get the only hard register that's
1880 guaranteed to be free, so we allocated it first. If all three did
1881 happen to be allocated hard regs, and are consecutive, rearrange them
1882 into the preferred order now. */
1883 if (current_frame_info.reg_fp != 0
1884 && current_frame_info.reg_save_b0 == current_frame_info.reg_fp + 1
1885 && current_frame_info.reg_save_ar_pfs == current_frame_info.reg_fp + 2)
1886 {
1887 current_frame_info.reg_save_b0 = current_frame_info.reg_fp;
1888 current_frame_info.reg_save_ar_pfs = current_frame_info.reg_fp + 1;
1889 current_frame_info.reg_fp = current_frame_info.reg_fp + 2;
1890 }
1891
1892 /* See if we need to store the predicate register block. */
1893 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
1894 if (regs_ever_live[regno] && ! call_used_regs[regno])
1895 break;
1896 if (regno <= PR_REG (63))
1897 {
1898 SET_HARD_REG_BIT (mask, PR_REG (0));
1899 current_frame_info.reg_save_pr = find_gr_spill (1);
1900 if (current_frame_info.reg_save_pr == 0)
1901 {
1902 extra_spill_size += 8;
1903 n_spilled += 1;
1904 }
1905
1906 /* ??? Mark them all as used so that register renaming and such
1907 are free to use them. */
1908 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
1909 regs_ever_live[regno] = 1;
1910 }
1911
1912 /* If we're forced to use st8.spill, we're forced to save and restore
1913 ar.unat as well. The check for existing liveness allows inline asm
1914 to touch ar.unat. */
1915 if (spilled_gr_p || cfun->machine->n_varargs
1916 || regs_ever_live[AR_UNAT_REGNUM])
1917 {
1918 regs_ever_live[AR_UNAT_REGNUM] = 1;
1919 SET_HARD_REG_BIT (mask, AR_UNAT_REGNUM);
1920 current_frame_info.reg_save_ar_unat = find_gr_spill (spill_size == 0);
1921 if (current_frame_info.reg_save_ar_unat == 0)
1922 {
1923 extra_spill_size += 8;
1924 n_spilled += 1;
1925 }
1926 }
1927
1928 if (regs_ever_live[AR_LC_REGNUM])
1929 {
1930 SET_HARD_REG_BIT (mask, AR_LC_REGNUM);
1931 current_frame_info.reg_save_ar_lc = find_gr_spill (spill_size == 0);
1932 if (current_frame_info.reg_save_ar_lc == 0)
1933 {
1934 extra_spill_size += 8;
1935 n_spilled += 1;
1936 }
1937 }
1938
1939 /* If we have an odd number of words of pretend arguments written to
1940 the stack, then the FR save area will be unaligned. We round the
1941 size of this area up to keep things 16 byte aligned. */
1942 if (spilled_fr_p)
1943 pretend_args_size = IA64_STACK_ALIGN (current_function_pretend_args_size);
1944 else
1945 pretend_args_size = current_function_pretend_args_size;
1946
1947 total_size = (spill_size + extra_spill_size + size + pretend_args_size
1948 + current_function_outgoing_args_size);
1949 total_size = IA64_STACK_ALIGN (total_size);
1950
1951 /* We always use the 16-byte scratch area provided by the caller, but
1952 if we are a leaf function, there's no one to which we need to provide
1953 a scratch area. */
1954 if (current_function_is_leaf)
1955 total_size = MAX (0, total_size - 16);
1956
1957 current_frame_info.total_size = total_size;
1958 current_frame_info.spill_cfa_off = pretend_args_size - 16;
1959 current_frame_info.spill_size = spill_size;
1960 current_frame_info.extra_spill_size = extra_spill_size;
1961 COPY_HARD_REG_SET (current_frame_info.mask, mask);
1962 current_frame_info.n_spilled = n_spilled;
1963 current_frame_info.initialized = reload_completed;
1964 }
1965
1966 /* Compute the initial difference between the specified pair of registers. */
1967
1968 HOST_WIDE_INT
ia64_initial_elimination_offset(from,to)1969 ia64_initial_elimination_offset (from, to)
1970 int from, to;
1971 {
1972 HOST_WIDE_INT offset;
1973
1974 ia64_compute_frame_size (get_frame_size ());
1975 switch (from)
1976 {
1977 case FRAME_POINTER_REGNUM:
1978 if (to == HARD_FRAME_POINTER_REGNUM)
1979 {
1980 if (current_function_is_leaf)
1981 offset = -current_frame_info.total_size;
1982 else
1983 offset = -(current_frame_info.total_size
1984 - current_function_outgoing_args_size - 16);
1985 }
1986 else if (to == STACK_POINTER_REGNUM)
1987 {
1988 if (current_function_is_leaf)
1989 offset = 0;
1990 else
1991 offset = 16 + current_function_outgoing_args_size;
1992 }
1993 else
1994 abort ();
1995 break;
1996
1997 case ARG_POINTER_REGNUM:
1998 /* Arguments start above the 16 byte save area, unless stdarg
1999 in which case we store through the 16 byte save area. */
2000 if (to == HARD_FRAME_POINTER_REGNUM)
2001 offset = 16 - current_function_pretend_args_size;
2002 else if (to == STACK_POINTER_REGNUM)
2003 offset = (current_frame_info.total_size
2004 + 16 - current_function_pretend_args_size);
2005 else
2006 abort ();
2007 break;
2008
2009 default:
2010 abort ();
2011 }
2012
2013 return offset;
2014 }
2015
2016 /* If there are more than a trivial number of register spills, we use
2017 two interleaved iterators so that we can get two memory references
2018 per insn group.
2019
2020 In order to simplify things in the prologue and epilogue expanders,
2021 we use helper functions to fix up the memory references after the
2022 fact with the appropriate offsets to a POST_MODIFY memory mode.
2023 The following data structure tracks the state of the two iterators
2024 while insns are being emitted. */
2025
2026 struct spill_fill_data
2027 {
2028 rtx init_after; /* point at which to emit initializations */
2029 rtx init_reg[2]; /* initial base register */
2030 rtx iter_reg[2]; /* the iterator registers */
2031 rtx *prev_addr[2]; /* address of last memory use */
2032 rtx prev_insn[2]; /* the insn corresponding to prev_addr */
2033 HOST_WIDE_INT prev_off[2]; /* last offset */
2034 int n_iter; /* number of iterators in use */
2035 int next_iter; /* next iterator to use */
2036 unsigned int save_gr_used_mask;
2037 };
2038
2039 static struct spill_fill_data spill_fill_data;
2040
2041 static void
setup_spill_pointers(n_spills,init_reg,cfa_off)2042 setup_spill_pointers (n_spills, init_reg, cfa_off)
2043 int n_spills;
2044 rtx init_reg;
2045 HOST_WIDE_INT cfa_off;
2046 {
2047 int i;
2048
2049 spill_fill_data.init_after = get_last_insn ();
2050 spill_fill_data.init_reg[0] = init_reg;
2051 spill_fill_data.init_reg[1] = init_reg;
2052 spill_fill_data.prev_addr[0] = NULL;
2053 spill_fill_data.prev_addr[1] = NULL;
2054 spill_fill_data.prev_insn[0] = NULL;
2055 spill_fill_data.prev_insn[1] = NULL;
2056 spill_fill_data.prev_off[0] = cfa_off;
2057 spill_fill_data.prev_off[1] = cfa_off;
2058 spill_fill_data.next_iter = 0;
2059 spill_fill_data.save_gr_used_mask = current_frame_info.gr_used_mask;
2060
2061 spill_fill_data.n_iter = 1 + (n_spills > 2);
2062 for (i = 0; i < spill_fill_data.n_iter; ++i)
2063 {
2064 int regno = next_scratch_gr_reg ();
2065 spill_fill_data.iter_reg[i] = gen_rtx_REG (DImode, regno);
2066 current_frame_info.gr_used_mask |= 1 << regno;
2067 }
2068 }
2069
2070 static void
finish_spill_pointers()2071 finish_spill_pointers ()
2072 {
2073 current_frame_info.gr_used_mask = spill_fill_data.save_gr_used_mask;
2074 }
2075
2076 static rtx
spill_restore_mem(reg,cfa_off)2077 spill_restore_mem (reg, cfa_off)
2078 rtx reg;
2079 HOST_WIDE_INT cfa_off;
2080 {
2081 int iter = spill_fill_data.next_iter;
2082 HOST_WIDE_INT disp = spill_fill_data.prev_off[iter] - cfa_off;
2083 rtx disp_rtx = GEN_INT (disp);
2084 rtx mem;
2085
2086 if (spill_fill_data.prev_addr[iter])
2087 {
2088 if (CONST_OK_FOR_N (disp))
2089 {
2090 *spill_fill_data.prev_addr[iter]
2091 = gen_rtx_POST_MODIFY (DImode, spill_fill_data.iter_reg[iter],
2092 gen_rtx_PLUS (DImode,
2093 spill_fill_data.iter_reg[iter],
2094 disp_rtx));
2095 REG_NOTES (spill_fill_data.prev_insn[iter])
2096 = gen_rtx_EXPR_LIST (REG_INC, spill_fill_data.iter_reg[iter],
2097 REG_NOTES (spill_fill_data.prev_insn[iter]));
2098 }
2099 else
2100 {
2101 /* ??? Could use register post_modify for loads. */
2102 if (! CONST_OK_FOR_I (disp))
2103 {
2104 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
2105 emit_move_insn (tmp, disp_rtx);
2106 disp_rtx = tmp;
2107 }
2108 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
2109 spill_fill_data.iter_reg[iter], disp_rtx));
2110 }
2111 }
2112 /* Micro-optimization: if we've created a frame pointer, it's at
2113 CFA 0, which may allow the real iterator to be initialized lower,
2114 slightly increasing parallelism. Also, if there are few saves
2115 it may eliminate the iterator entirely. */
2116 else if (disp == 0
2117 && spill_fill_data.init_reg[iter] == stack_pointer_rtx
2118 && frame_pointer_needed)
2119 {
2120 mem = gen_rtx_MEM (GET_MODE (reg), hard_frame_pointer_rtx);
2121 set_mem_alias_set (mem, get_varargs_alias_set ());
2122 return mem;
2123 }
2124 else
2125 {
2126 rtx seq, insn;
2127
2128 if (disp == 0)
2129 seq = gen_movdi (spill_fill_data.iter_reg[iter],
2130 spill_fill_data.init_reg[iter]);
2131 else
2132 {
2133 start_sequence ();
2134
2135 if (! CONST_OK_FOR_I (disp))
2136 {
2137 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
2138 emit_move_insn (tmp, disp_rtx);
2139 disp_rtx = tmp;
2140 }
2141
2142 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
2143 spill_fill_data.init_reg[iter],
2144 disp_rtx));
2145
2146 seq = get_insns ();
2147 end_sequence ();
2148 }
2149
2150 /* Careful for being the first insn in a sequence. */
2151 if (spill_fill_data.init_after)
2152 insn = emit_insn_after (seq, spill_fill_data.init_after);
2153 else
2154 {
2155 rtx first = get_insns ();
2156 if (first)
2157 insn = emit_insn_before (seq, first);
2158 else
2159 insn = emit_insn (seq);
2160 }
2161 spill_fill_data.init_after = insn;
2162
2163 /* If DISP is 0, we may or may not have a further adjustment
2164 afterward. If we do, then the load/store insn may be modified
2165 to be a post-modify. If we don't, then this copy may be
2166 eliminated by copyprop_hardreg_forward, which makes this
2167 insn garbage, which runs afoul of the sanity check in
2168 propagate_one_insn. So mark this insn as legal to delete. */
2169 if (disp == 0)
2170 REG_NOTES(insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx,
2171 REG_NOTES (insn));
2172 }
2173
2174 mem = gen_rtx_MEM (GET_MODE (reg), spill_fill_data.iter_reg[iter]);
2175
2176 /* ??? Not all of the spills are for varargs, but some of them are.
2177 The rest of the spills belong in an alias set of their own. But
2178 it doesn't actually hurt to include them here. */
2179 set_mem_alias_set (mem, get_varargs_alias_set ());
2180
2181 spill_fill_data.prev_addr[iter] = &XEXP (mem, 0);
2182 spill_fill_data.prev_off[iter] = cfa_off;
2183
2184 if (++iter >= spill_fill_data.n_iter)
2185 iter = 0;
2186 spill_fill_data.next_iter = iter;
2187
2188 return mem;
2189 }
2190
2191 static void
2192 do_spill (move_fn, reg, cfa_off, frame_reg)
2193 rtx (*move_fn) PARAMS ((rtx, rtx, rtx));
2194 rtx reg, frame_reg;
2195 HOST_WIDE_INT cfa_off;
2196 {
2197 int iter = spill_fill_data.next_iter;
2198 rtx mem, insn;
2199
2200 mem = spill_restore_mem (reg, cfa_off);
2201 insn = emit_insn ((*move_fn) (mem, reg, GEN_INT (cfa_off)));
2202 spill_fill_data.prev_insn[iter] = insn;
2203
2204 if (frame_reg)
2205 {
2206 rtx base;
2207 HOST_WIDE_INT off;
2208
2209 RTX_FRAME_RELATED_P (insn) = 1;
2210
2211 /* Don't even pretend that the unwind code can intuit its way
2212 through a pair of interleaved post_modify iterators. Just
2213 provide the correct answer. */
2214
2215 if (frame_pointer_needed)
2216 {
2217 base = hard_frame_pointer_rtx;
2218 off = - cfa_off;
2219 }
2220 else
2221 {
2222 base = stack_pointer_rtx;
2223 off = current_frame_info.total_size - cfa_off;
2224 }
2225
2226 REG_NOTES (insn)
2227 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2228 gen_rtx_SET (VOIDmode,
2229 gen_rtx_MEM (GET_MODE (reg),
2230 plus_constant (base, off)),
2231 frame_reg),
2232 REG_NOTES (insn));
2233 }
2234 }
2235
2236 static void
2237 do_restore (move_fn, reg, cfa_off)
2238 rtx (*move_fn) PARAMS ((rtx, rtx, rtx));
2239 rtx reg;
2240 HOST_WIDE_INT cfa_off;
2241 {
2242 int iter = spill_fill_data.next_iter;
2243 rtx insn;
2244
2245 insn = emit_insn ((*move_fn) (reg, spill_restore_mem (reg, cfa_off),
2246 GEN_INT (cfa_off)));
2247 spill_fill_data.prev_insn[iter] = insn;
2248 }
2249
2250 /* Wrapper functions that discards the CONST_INT spill offset. These
2251 exist so that we can give gr_spill/gr_fill the offset they need and
2252 use a consistant function interface. */
2253
2254 static rtx
gen_movdi_x(dest,src,offset)2255 gen_movdi_x (dest, src, offset)
2256 rtx dest, src;
2257 rtx offset ATTRIBUTE_UNUSED;
2258 {
2259 return gen_movdi (dest, src);
2260 }
2261
2262 static rtx
gen_fr_spill_x(dest,src,offset)2263 gen_fr_spill_x (dest, src, offset)
2264 rtx dest, src;
2265 rtx offset ATTRIBUTE_UNUSED;
2266 {
2267 return gen_fr_spill (dest, src);
2268 }
2269
2270 static rtx
gen_fr_restore_x(dest,src,offset)2271 gen_fr_restore_x (dest, src, offset)
2272 rtx dest, src;
2273 rtx offset ATTRIBUTE_UNUSED;
2274 {
2275 return gen_fr_restore (dest, src);
2276 }
2277
2278 /* Called after register allocation to add any instructions needed for the
2279 prologue. Using a prologue insn is favored compared to putting all of the
2280 instructions in output_function_prologue(), since it allows the scheduler
2281 to intermix instructions with the saves of the caller saved registers. In
2282 some cases, it might be necessary to emit a barrier instruction as the last
2283 insn to prevent such scheduling.
2284
2285 Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
2286 so that the debug info generation code can handle them properly.
2287
2288 The register save area is layed out like so:
2289 cfa+16
2290 [ varargs spill area ]
2291 [ fr register spill area ]
2292 [ br register spill area ]
2293 [ ar register spill area ]
2294 [ pr register spill area ]
2295 [ gr register spill area ] */
2296
2297 /* ??? Get inefficient code when the frame size is larger than can fit in an
2298 adds instruction. */
2299
2300 void
ia64_expand_prologue()2301 ia64_expand_prologue ()
2302 {
2303 rtx insn, ar_pfs_save_reg, ar_unat_save_reg;
2304 int i, epilogue_p, regno, alt_regno, cfa_off, n_varargs;
2305 rtx reg, alt_reg;
2306
2307 ia64_compute_frame_size (get_frame_size ());
2308 last_scratch_gr_reg = 15;
2309
2310 /* If there is no epilogue, then we don't need some prologue insns.
2311 We need to avoid emitting the dead prologue insns, because flow
2312 will complain about them. */
2313 if (optimize)
2314 {
2315 edge e;
2316
2317 for (e = EXIT_BLOCK_PTR->pred; e ; e = e->pred_next)
2318 if ((e->flags & EDGE_FAKE) == 0
2319 && (e->flags & EDGE_FALLTHRU) != 0)
2320 break;
2321 epilogue_p = (e != NULL);
2322 }
2323 else
2324 epilogue_p = 1;
2325
2326 /* Set the local, input, and output register names. We need to do this
2327 for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in
2328 half. If we use in/loc/out register names, then we get assembler errors
2329 in crtn.S because there is no alloc insn or regstk directive in there. */
2330 if (! TARGET_REG_NAMES)
2331 {
2332 int inputs = current_frame_info.n_input_regs;
2333 int locals = current_frame_info.n_local_regs;
2334 int outputs = current_frame_info.n_output_regs;
2335
2336 for (i = 0; i < inputs; i++)
2337 reg_names[IN_REG (i)] = ia64_reg_numbers[i];
2338 for (i = 0; i < locals; i++)
2339 reg_names[LOC_REG (i)] = ia64_reg_numbers[inputs + i];
2340 for (i = 0; i < outputs; i++)
2341 reg_names[OUT_REG (i)] = ia64_reg_numbers[inputs + locals + i];
2342 }
2343
2344 /* Set the frame pointer register name. The regnum is logically loc79,
2345 but of course we'll not have allocated that many locals. Rather than
2346 worrying about renumbering the existing rtxs, we adjust the name. */
2347 /* ??? This code means that we can never use one local register when
2348 there is a frame pointer. loc79 gets wasted in this case, as it is
2349 renamed to a register that will never be used. See also the try_locals
2350 code in find_gr_spill. */
2351 if (current_frame_info.reg_fp)
2352 {
2353 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
2354 reg_names[HARD_FRAME_POINTER_REGNUM]
2355 = reg_names[current_frame_info.reg_fp];
2356 reg_names[current_frame_info.reg_fp] = tmp;
2357 }
2358
2359 /* We don't need an alloc instruction if we've used no outputs or locals. */
2360 if (current_frame_info.n_local_regs == 0
2361 && current_frame_info.n_output_regs == 0
2362 && current_frame_info.n_input_regs <= current_function_args_info.int_regs
2363 && !TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
2364 {
2365 /* If there is no alloc, but there are input registers used, then we
2366 need a .regstk directive. */
2367 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
2368 ar_pfs_save_reg = NULL_RTX;
2369 }
2370 else
2371 {
2372 current_frame_info.need_regstk = 0;
2373
2374 if (current_frame_info.reg_save_ar_pfs)
2375 regno = current_frame_info.reg_save_ar_pfs;
2376 else
2377 regno = next_scratch_gr_reg ();
2378 ar_pfs_save_reg = gen_rtx_REG (DImode, regno);
2379
2380 insn = emit_insn (gen_alloc (ar_pfs_save_reg,
2381 GEN_INT (current_frame_info.n_input_regs),
2382 GEN_INT (current_frame_info.n_local_regs),
2383 GEN_INT (current_frame_info.n_output_regs),
2384 GEN_INT (current_frame_info.n_rotate_regs)));
2385 RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_pfs != 0);
2386 }
2387
2388 /* Set up frame pointer, stack pointer, and spill iterators. */
2389
2390 n_varargs = cfun->machine->n_varargs;
2391 setup_spill_pointers (current_frame_info.n_spilled + n_varargs,
2392 stack_pointer_rtx, 0);
2393
2394 if (frame_pointer_needed)
2395 {
2396 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
2397 RTX_FRAME_RELATED_P (insn) = 1;
2398 }
2399
2400 if (current_frame_info.total_size != 0)
2401 {
2402 rtx frame_size_rtx = GEN_INT (- current_frame_info.total_size);
2403 rtx offset;
2404
2405 if (CONST_OK_FOR_I (- current_frame_info.total_size))
2406 offset = frame_size_rtx;
2407 else
2408 {
2409 regno = next_scratch_gr_reg ();
2410 offset = gen_rtx_REG (DImode, regno);
2411 emit_move_insn (offset, frame_size_rtx);
2412 }
2413
2414 insn = emit_insn (gen_adddi3 (stack_pointer_rtx,
2415 stack_pointer_rtx, offset));
2416
2417 if (! frame_pointer_needed)
2418 {
2419 RTX_FRAME_RELATED_P (insn) = 1;
2420 if (GET_CODE (offset) != CONST_INT)
2421 {
2422 REG_NOTES (insn)
2423 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2424 gen_rtx_SET (VOIDmode,
2425 stack_pointer_rtx,
2426 gen_rtx_PLUS (DImode,
2427 stack_pointer_rtx,
2428 frame_size_rtx)),
2429 REG_NOTES (insn));
2430 }
2431 }
2432
2433 /* ??? At this point we must generate a magic insn that appears to
2434 modify the stack pointer, the frame pointer, and all spill
2435 iterators. This would allow the most scheduling freedom. For
2436 now, just hard stop. */
2437 emit_insn (gen_blockage ());
2438 }
2439
2440 /* Must copy out ar.unat before doing any integer spills. */
2441 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
2442 {
2443 if (current_frame_info.reg_save_ar_unat)
2444 ar_unat_save_reg
2445 = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat);
2446 else
2447 {
2448 alt_regno = next_scratch_gr_reg ();
2449 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
2450 current_frame_info.gr_used_mask |= 1 << alt_regno;
2451 }
2452
2453 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2454 insn = emit_move_insn (ar_unat_save_reg, reg);
2455 RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_unat != 0);
2456
2457 /* Even if we're not going to generate an epilogue, we still
2458 need to save the register so that EH works. */
2459 if (! epilogue_p && current_frame_info.reg_save_ar_unat)
2460 emit_insn (gen_prologue_use (ar_unat_save_reg));
2461 }
2462 else
2463 ar_unat_save_reg = NULL_RTX;
2464
2465 /* Spill all varargs registers. Do this before spilling any GR registers,
2466 since we want the UNAT bits for the GR registers to override the UNAT
2467 bits from varargs, which we don't care about. */
2468
2469 cfa_off = -16;
2470 for (regno = GR_ARG_FIRST + 7; n_varargs > 0; --n_varargs, --regno)
2471 {
2472 reg = gen_rtx_REG (DImode, regno);
2473 do_spill (gen_gr_spill, reg, cfa_off += 8, NULL_RTX);
2474 }
2475
2476 /* Locate the bottom of the register save area. */
2477 cfa_off = (current_frame_info.spill_cfa_off
2478 + current_frame_info.spill_size
2479 + current_frame_info.extra_spill_size);
2480
2481 /* Save the predicate register block either in a register or in memory. */
2482 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
2483 {
2484 reg = gen_rtx_REG (DImode, PR_REG (0));
2485 if (current_frame_info.reg_save_pr != 0)
2486 {
2487 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr);
2488 insn = emit_move_insn (alt_reg, reg);
2489
2490 /* ??? Denote pr spill/fill by a DImode move that modifies all
2491 64 hard registers. */
2492 RTX_FRAME_RELATED_P (insn) = 1;
2493 REG_NOTES (insn)
2494 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2495 gen_rtx_SET (VOIDmode, alt_reg, reg),
2496 REG_NOTES (insn));
2497
2498 /* Even if we're not going to generate an epilogue, we still
2499 need to save the register so that EH works. */
2500 if (! epilogue_p)
2501 emit_insn (gen_prologue_use (alt_reg));
2502 }
2503 else
2504 {
2505 alt_regno = next_scratch_gr_reg ();
2506 alt_reg = gen_rtx_REG (DImode, alt_regno);
2507 insn = emit_move_insn (alt_reg, reg);
2508 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2509 cfa_off -= 8;
2510 }
2511 }
2512
2513 /* Handle AR regs in numerical order. All of them get special handling. */
2514 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)
2515 && current_frame_info.reg_save_ar_unat == 0)
2516 {
2517 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2518 do_spill (gen_movdi_x, ar_unat_save_reg, cfa_off, reg);
2519 cfa_off -= 8;
2520 }
2521
2522 /* The alloc insn already copied ar.pfs into a general register. The
2523 only thing we have to do now is copy that register to a stack slot
2524 if we'd not allocated a local register for the job. */
2525 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM)
2526 && current_frame_info.reg_save_ar_pfs == 0)
2527 {
2528 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
2529 do_spill (gen_movdi_x, ar_pfs_save_reg, cfa_off, reg);
2530 cfa_off -= 8;
2531 }
2532
2533 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
2534 {
2535 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
2536 if (current_frame_info.reg_save_ar_lc != 0)
2537 {
2538 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc);
2539 insn = emit_move_insn (alt_reg, reg);
2540 RTX_FRAME_RELATED_P (insn) = 1;
2541
2542 /* Even if we're not going to generate an epilogue, we still
2543 need to save the register so that EH works. */
2544 if (! epilogue_p)
2545 emit_insn (gen_prologue_use (alt_reg));
2546 }
2547 else
2548 {
2549 alt_regno = next_scratch_gr_reg ();
2550 alt_reg = gen_rtx_REG (DImode, alt_regno);
2551 emit_move_insn (alt_reg, reg);
2552 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2553 cfa_off -= 8;
2554 }
2555 }
2556
2557 if (current_frame_info.reg_save_gp)
2558 {
2559 insn = emit_move_insn (gen_rtx_REG (DImode,
2560 current_frame_info.reg_save_gp),
2561 pic_offset_table_rtx);
2562 /* We don't know for sure yet if this is actually needed, since
2563 we've not split the PIC call patterns. If all of the calls
2564 are indirect, and not followed by any uses of the gp, then
2565 this save is dead. Allow it to go away. */
2566 REG_NOTES (insn)
2567 = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, REG_NOTES (insn));
2568 }
2569
2570 /* We should now be at the base of the gr/br/fr spill area. */
2571 if (cfa_off != (current_frame_info.spill_cfa_off
2572 + current_frame_info.spill_size))
2573 abort ();
2574
2575 /* Spill all general registers. */
2576 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
2577 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2578 {
2579 reg = gen_rtx_REG (DImode, regno);
2580 do_spill (gen_gr_spill, reg, cfa_off, reg);
2581 cfa_off -= 8;
2582 }
2583
2584 /* Handle BR0 specially -- it may be getting stored permanently in
2585 some GR register. */
2586 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
2587 {
2588 reg = gen_rtx_REG (DImode, BR_REG (0));
2589 if (current_frame_info.reg_save_b0 != 0)
2590 {
2591 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
2592 insn = emit_move_insn (alt_reg, reg);
2593 RTX_FRAME_RELATED_P (insn) = 1;
2594
2595 /* Even if we're not going to generate an epilogue, we still
2596 need to save the register so that EH works. */
2597 if (! epilogue_p)
2598 emit_insn (gen_prologue_use (alt_reg));
2599 }
2600 else
2601 {
2602 alt_regno = next_scratch_gr_reg ();
2603 alt_reg = gen_rtx_REG (DImode, alt_regno);
2604 emit_move_insn (alt_reg, reg);
2605 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2606 cfa_off -= 8;
2607 }
2608 }
2609
2610 /* Spill the rest of the BR registers. */
2611 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
2612 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2613 {
2614 alt_regno = next_scratch_gr_reg ();
2615 alt_reg = gen_rtx_REG (DImode, alt_regno);
2616 reg = gen_rtx_REG (DImode, regno);
2617 emit_move_insn (alt_reg, reg);
2618 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2619 cfa_off -= 8;
2620 }
2621
2622 /* Align the frame and spill all FR registers. */
2623 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
2624 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2625 {
2626 if (cfa_off & 15)
2627 abort ();
2628 reg = gen_rtx_REG (TFmode, regno);
2629 do_spill (gen_fr_spill_x, reg, cfa_off, reg);
2630 cfa_off -= 16;
2631 }
2632
2633 if (cfa_off != current_frame_info.spill_cfa_off)
2634 abort ();
2635
2636 finish_spill_pointers ();
2637 }
2638
2639 /* Called after register allocation to add any instructions needed for the
2640 epilogue. Using an epilogue insn is favored compared to putting all of the
2641 instructions in output_function_prologue(), since it allows the scheduler
2642 to intermix instructions with the saves of the caller saved registers. In
2643 some cases, it might be necessary to emit a barrier instruction as the last
2644 insn to prevent such scheduling. */
2645
2646 void
ia64_expand_epilogue(sibcall_p)2647 ia64_expand_epilogue (sibcall_p)
2648 int sibcall_p;
2649 {
2650 rtx insn, reg, alt_reg, ar_unat_save_reg;
2651 int regno, alt_regno, cfa_off;
2652
2653 ia64_compute_frame_size (get_frame_size ());
2654
2655 /* If there is a frame pointer, then we use it instead of the stack
2656 pointer, so that the stack pointer does not need to be valid when
2657 the epilogue starts. See EXIT_IGNORE_STACK. */
2658 if (frame_pointer_needed)
2659 setup_spill_pointers (current_frame_info.n_spilled,
2660 hard_frame_pointer_rtx, 0);
2661 else
2662 setup_spill_pointers (current_frame_info.n_spilled, stack_pointer_rtx,
2663 current_frame_info.total_size);
2664
2665 if (current_frame_info.total_size != 0)
2666 {
2667 /* ??? At this point we must generate a magic insn that appears to
2668 modify the spill iterators and the frame pointer. This would
2669 allow the most scheduling freedom. For now, just hard stop. */
2670 emit_insn (gen_blockage ());
2671 }
2672
2673 /* Locate the bottom of the register save area. */
2674 cfa_off = (current_frame_info.spill_cfa_off
2675 + current_frame_info.spill_size
2676 + current_frame_info.extra_spill_size);
2677
2678 /* Restore the predicate registers. */
2679 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
2680 {
2681 if (current_frame_info.reg_save_pr != 0)
2682 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr);
2683 else
2684 {
2685 alt_regno = next_scratch_gr_reg ();
2686 alt_reg = gen_rtx_REG (DImode, alt_regno);
2687 do_restore (gen_movdi_x, alt_reg, cfa_off);
2688 cfa_off -= 8;
2689 }
2690 reg = gen_rtx_REG (DImode, PR_REG (0));
2691 emit_move_insn (reg, alt_reg);
2692 }
2693
2694 /* Restore the application registers. */
2695
2696 /* Load the saved unat from the stack, but do not restore it until
2697 after the GRs have been restored. */
2698 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
2699 {
2700 if (current_frame_info.reg_save_ar_unat != 0)
2701 ar_unat_save_reg
2702 = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat);
2703 else
2704 {
2705 alt_regno = next_scratch_gr_reg ();
2706 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
2707 current_frame_info.gr_used_mask |= 1 << alt_regno;
2708 do_restore (gen_movdi_x, ar_unat_save_reg, cfa_off);
2709 cfa_off -= 8;
2710 }
2711 }
2712 else
2713 ar_unat_save_reg = NULL_RTX;
2714
2715 if (current_frame_info.reg_save_ar_pfs != 0)
2716 {
2717 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_pfs);
2718 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
2719 emit_move_insn (reg, alt_reg);
2720 }
2721 else if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
2722 {
2723 alt_regno = next_scratch_gr_reg ();
2724 alt_reg = gen_rtx_REG (DImode, alt_regno);
2725 do_restore (gen_movdi_x, alt_reg, cfa_off);
2726 cfa_off -= 8;
2727 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
2728 emit_move_insn (reg, alt_reg);
2729 }
2730
2731 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
2732 {
2733 if (current_frame_info.reg_save_ar_lc != 0)
2734 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc);
2735 else
2736 {
2737 alt_regno = next_scratch_gr_reg ();
2738 alt_reg = gen_rtx_REG (DImode, alt_regno);
2739 do_restore (gen_movdi_x, alt_reg, cfa_off);
2740 cfa_off -= 8;
2741 }
2742 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
2743 emit_move_insn (reg, alt_reg);
2744 }
2745
2746 /* We should now be at the base of the gr/br/fr spill area. */
2747 if (cfa_off != (current_frame_info.spill_cfa_off
2748 + current_frame_info.spill_size))
2749 abort ();
2750
2751 /* The GP may be stored on the stack in the prologue, but it's
2752 never restored in the epilogue. Skip the stack slot. */
2753 if (TEST_HARD_REG_BIT (current_frame_info.mask, GR_REG (1)))
2754 cfa_off -= 8;
2755
2756 /* Restore all general registers. */
2757 for (regno = GR_REG (2); regno <= GR_REG (31); ++regno)
2758 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2759 {
2760 reg = gen_rtx_REG (DImode, regno);
2761 do_restore (gen_gr_restore, reg, cfa_off);
2762 cfa_off -= 8;
2763 }
2764
2765 /* Restore the branch registers. Handle B0 specially, as it may
2766 have gotten stored in some GR register. */
2767 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
2768 {
2769 if (current_frame_info.reg_save_b0 != 0)
2770 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
2771 else
2772 {
2773 alt_regno = next_scratch_gr_reg ();
2774 alt_reg = gen_rtx_REG (DImode, alt_regno);
2775 do_restore (gen_movdi_x, alt_reg, cfa_off);
2776 cfa_off -= 8;
2777 }
2778 reg = gen_rtx_REG (DImode, BR_REG (0));
2779 emit_move_insn (reg, alt_reg);
2780 }
2781
2782 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
2783 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2784 {
2785 alt_regno = next_scratch_gr_reg ();
2786 alt_reg = gen_rtx_REG (DImode, alt_regno);
2787 do_restore (gen_movdi_x, alt_reg, cfa_off);
2788 cfa_off -= 8;
2789 reg = gen_rtx_REG (DImode, regno);
2790 emit_move_insn (reg, alt_reg);
2791 }
2792
2793 /* Restore floating point registers. */
2794 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
2795 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2796 {
2797 if (cfa_off & 15)
2798 abort ();
2799 reg = gen_rtx_REG (TFmode, regno);
2800 do_restore (gen_fr_restore_x, reg, cfa_off);
2801 cfa_off -= 16;
2802 }
2803
2804 /* Restore ar.unat for real. */
2805 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
2806 {
2807 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2808 emit_move_insn (reg, ar_unat_save_reg);
2809 }
2810
2811 if (cfa_off != current_frame_info.spill_cfa_off)
2812 abort ();
2813
2814 finish_spill_pointers ();
2815
2816 if (current_frame_info.total_size || cfun->machine->ia64_eh_epilogue_sp)
2817 {
2818 /* ??? At this point we must generate a magic insn that appears to
2819 modify the spill iterators, the stack pointer, and the frame
2820 pointer. This would allow the most scheduling freedom. For now,
2821 just hard stop. */
2822 emit_insn (gen_blockage ());
2823 }
2824
2825 if (cfun->machine->ia64_eh_epilogue_sp)
2826 emit_move_insn (stack_pointer_rtx, cfun->machine->ia64_eh_epilogue_sp);
2827 else if (frame_pointer_needed)
2828 {
2829 insn = emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx);
2830 RTX_FRAME_RELATED_P (insn) = 1;
2831 }
2832 else if (current_frame_info.total_size)
2833 {
2834 rtx offset, frame_size_rtx;
2835
2836 frame_size_rtx = GEN_INT (current_frame_info.total_size);
2837 if (CONST_OK_FOR_I (current_frame_info.total_size))
2838 offset = frame_size_rtx;
2839 else
2840 {
2841 regno = next_scratch_gr_reg ();
2842 offset = gen_rtx_REG (DImode, regno);
2843 emit_move_insn (offset, frame_size_rtx);
2844 }
2845
2846 insn = emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
2847 offset));
2848
2849 RTX_FRAME_RELATED_P (insn) = 1;
2850 if (GET_CODE (offset) != CONST_INT)
2851 {
2852 REG_NOTES (insn)
2853 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2854 gen_rtx_SET (VOIDmode,
2855 stack_pointer_rtx,
2856 gen_rtx_PLUS (DImode,
2857 stack_pointer_rtx,
2858 frame_size_rtx)),
2859 REG_NOTES (insn));
2860 }
2861 }
2862
2863 if (cfun->machine->ia64_eh_epilogue_bsp)
2864 emit_insn (gen_set_bsp (cfun->machine->ia64_eh_epilogue_bsp));
2865
2866 if (! sibcall_p)
2867 emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode, BR_REG (0))));
2868 else
2869 {
2870 int fp = GR_REG (2);
2871 /* We need a throw away register here, r0 and r1 are reserved, so r2 is the
2872 first available call clobbered register. If there was a frame_pointer
2873 register, we may have swapped the names of r2 and HARD_FRAME_POINTER_REGNUM,
2874 so we have to make sure we're using the string "r2" when emitting
2875 the register name for the assmbler. */
2876 if (current_frame_info.reg_fp && current_frame_info.reg_fp == GR_REG (2))
2877 fp = HARD_FRAME_POINTER_REGNUM;
2878
2879 /* We must emit an alloc to force the input registers to become output
2880 registers. Otherwise, if the callee tries to pass its parameters
2881 through to another call without an intervening alloc, then these
2882 values get lost. */
2883 /* ??? We don't need to preserve all input registers. We only need to
2884 preserve those input registers used as arguments to the sibling call.
2885 It is unclear how to compute that number here. */
2886 if (current_frame_info.n_input_regs != 0)
2887 {
2888 rtx n_inputs = GEN_INT (current_frame_info.n_input_regs);
2889 insn = emit_insn (gen_alloc (gen_rtx_REG (DImode, fp),
2890 const0_rtx, const0_rtx,
2891 n_inputs, const0_rtx));
2892 RTX_FRAME_RELATED_P (insn) = 1;
2893 }
2894 }
2895 }
2896
2897 /* Return 1 if br.ret can do all the work required to return from a
2898 function. */
2899
2900 int
ia64_direct_return()2901 ia64_direct_return ()
2902 {
2903 if (reload_completed && ! frame_pointer_needed)
2904 {
2905 ia64_compute_frame_size (get_frame_size ());
2906
2907 return (current_frame_info.total_size == 0
2908 && current_frame_info.n_spilled == 0
2909 && current_frame_info.reg_save_b0 == 0
2910 && current_frame_info.reg_save_pr == 0
2911 && current_frame_info.reg_save_ar_pfs == 0
2912 && current_frame_info.reg_save_ar_unat == 0
2913 && current_frame_info.reg_save_ar_lc == 0);
2914 }
2915 return 0;
2916 }
2917
2918 /* Return the magic cookie that we use to hold the return address
2919 during early compilation. */
2920
2921 rtx
ia64_return_addr_rtx(count,frame)2922 ia64_return_addr_rtx (count, frame)
2923 HOST_WIDE_INT count;
2924 rtx frame ATTRIBUTE_UNUSED;
2925 {
2926 if (count != 0)
2927 return NULL;
2928 return gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_RET_ADDR);
2929 }
2930
2931 /* Split this value after reload, now that we know where the return
2932 address is saved. */
2933
2934 void
ia64_split_return_addr_rtx(dest)2935 ia64_split_return_addr_rtx (dest)
2936 rtx dest;
2937 {
2938 rtx src;
2939
2940 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
2941 {
2942 if (current_frame_info.reg_save_b0 != 0)
2943 src = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
2944 else
2945 {
2946 HOST_WIDE_INT off;
2947 unsigned int regno;
2948
2949 /* Compute offset from CFA for BR0. */
2950 /* ??? Must be kept in sync with ia64_expand_prologue. */
2951 off = (current_frame_info.spill_cfa_off
2952 + current_frame_info.spill_size);
2953 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
2954 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2955 off -= 8;
2956
2957 /* Convert CFA offset to a register based offset. */
2958 if (frame_pointer_needed)
2959 src = hard_frame_pointer_rtx;
2960 else
2961 {
2962 src = stack_pointer_rtx;
2963 off += current_frame_info.total_size;
2964 }
2965
2966 /* Load address into scratch register. */
2967 if (CONST_OK_FOR_I (off))
2968 emit_insn (gen_adddi3 (dest, src, GEN_INT (off)));
2969 else
2970 {
2971 emit_move_insn (dest, GEN_INT (off));
2972 emit_insn (gen_adddi3 (dest, src, dest));
2973 }
2974
2975 src = gen_rtx_MEM (Pmode, dest);
2976 }
2977 }
2978 else
2979 src = gen_rtx_REG (DImode, BR_REG (0));
2980
2981 emit_move_insn (dest, src);
2982 }
2983
2984 int
ia64_hard_regno_rename_ok(from,to)2985 ia64_hard_regno_rename_ok (from, to)
2986 int from;
2987 int to;
2988 {
2989 /* Don't clobber any of the registers we reserved for the prologue. */
2990 if (to == current_frame_info.reg_fp
2991 || to == current_frame_info.reg_save_b0
2992 || to == current_frame_info.reg_save_pr
2993 || to == current_frame_info.reg_save_ar_pfs
2994 || to == current_frame_info.reg_save_ar_unat
2995 || to == current_frame_info.reg_save_ar_lc)
2996 return 0;
2997
2998 if (from == current_frame_info.reg_fp
2999 || from == current_frame_info.reg_save_b0
3000 || from == current_frame_info.reg_save_pr
3001 || from == current_frame_info.reg_save_ar_pfs
3002 || from == current_frame_info.reg_save_ar_unat
3003 || from == current_frame_info.reg_save_ar_lc)
3004 return 0;
3005
3006 /* Don't use output registers outside the register frame. */
3007 if (OUT_REGNO_P (to) && to >= OUT_REG (current_frame_info.n_output_regs))
3008 return 0;
3009
3010 /* Retain even/oddness on predicate register pairs. */
3011 if (PR_REGNO_P (from) && PR_REGNO_P (to))
3012 return (from & 1) == (to & 1);
3013
3014 return 1;
3015 }
3016
3017 /* Target hook for assembling integer objects. Handle word-sized
3018 aligned objects and detect the cases when @fptr is needed. */
3019
3020 static bool
ia64_assemble_integer(x,size,aligned_p)3021 ia64_assemble_integer (x, size, aligned_p)
3022 rtx x;
3023 unsigned int size;
3024 int aligned_p;
3025 {
3026 if (size == (TARGET_ILP32 ? 4 : 8)
3027 && !(TARGET_NO_PIC || TARGET_AUTO_PIC)
3028 && GET_CODE (x) == SYMBOL_REF
3029 && SYMBOL_REF_FLAG (x))
3030 {
3031 static const char * const directive[2][2] = {
3032 /* 64-bit pointer */ /* 32-bit pointer */
3033 { "\tdata8.ua\t@fptr(", "\tdata4.ua\t@fptr("}, /* unaligned */
3034 { "\tdata8\t@fptr(", "\tdata4\t@fptr("} /* aligned */
3035 };
3036 fputs (directive[aligned_p != 0][TARGET_ILP32 != 0], asm_out_file);
3037 output_addr_const (asm_out_file, x);
3038 fputs (")\n", asm_out_file);
3039 return true;
3040 }
3041 return default_assemble_integer (x, size, aligned_p);
3042 }
3043
3044 /* Emit the function prologue. */
3045
3046 static void
ia64_output_function_prologue(file,size)3047 ia64_output_function_prologue (file, size)
3048 FILE *file;
3049 HOST_WIDE_INT size ATTRIBUTE_UNUSED;
3050 {
3051 int mask, grsave, grsave_prev;
3052
3053 if (current_frame_info.need_regstk)
3054 fprintf (file, "\t.regstk %d, %d, %d, %d\n",
3055 current_frame_info.n_input_regs,
3056 current_frame_info.n_local_regs,
3057 current_frame_info.n_output_regs,
3058 current_frame_info.n_rotate_regs);
3059
3060 if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
3061 return;
3062
3063 /* Emit the .prologue directive. */
3064
3065 mask = 0;
3066 grsave = grsave_prev = 0;
3067 if (current_frame_info.reg_save_b0 != 0)
3068 {
3069 mask |= 8;
3070 grsave = grsave_prev = current_frame_info.reg_save_b0;
3071 }
3072 if (current_frame_info.reg_save_ar_pfs != 0
3073 && (grsave_prev == 0
3074 || current_frame_info.reg_save_ar_pfs == grsave_prev + 1))
3075 {
3076 mask |= 4;
3077 if (grsave_prev == 0)
3078 grsave = current_frame_info.reg_save_ar_pfs;
3079 grsave_prev = current_frame_info.reg_save_ar_pfs;
3080 }
3081 if (current_frame_info.reg_fp != 0
3082 && (grsave_prev == 0
3083 || current_frame_info.reg_fp == grsave_prev + 1))
3084 {
3085 mask |= 2;
3086 if (grsave_prev == 0)
3087 grsave = HARD_FRAME_POINTER_REGNUM;
3088 grsave_prev = current_frame_info.reg_fp;
3089 }
3090 if (current_frame_info.reg_save_pr != 0
3091 && (grsave_prev == 0
3092 || current_frame_info.reg_save_pr == grsave_prev + 1))
3093 {
3094 mask |= 1;
3095 if (grsave_prev == 0)
3096 grsave = current_frame_info.reg_save_pr;
3097 }
3098
3099 if (mask)
3100 fprintf (file, "\t.prologue %d, %d\n", mask,
3101 ia64_dbx_register_number (grsave));
3102 else
3103 fputs ("\t.prologue\n", file);
3104
3105 /* Emit a .spill directive, if necessary, to relocate the base of
3106 the register spill area. */
3107 if (current_frame_info.spill_cfa_off != -16)
3108 fprintf (file, "\t.spill %ld\n",
3109 (long) (current_frame_info.spill_cfa_off
3110 + current_frame_info.spill_size));
3111 }
3112
3113 /* Emit the .body directive at the scheduled end of the prologue. */
3114
3115 static void
ia64_output_function_end_prologue(file)3116 ia64_output_function_end_prologue (file)
3117 FILE *file;
3118 {
3119 if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
3120 return;
3121
3122 fputs ("\t.body\n", file);
3123 }
3124
3125 /* Emit the function epilogue. */
3126
3127 static void
ia64_output_function_epilogue(file,size)3128 ia64_output_function_epilogue (file, size)
3129 FILE *file ATTRIBUTE_UNUSED;
3130 HOST_WIDE_INT size ATTRIBUTE_UNUSED;
3131 {
3132 int i;
3133
3134 if (current_frame_info.reg_fp)
3135 {
3136 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
3137 reg_names[HARD_FRAME_POINTER_REGNUM]
3138 = reg_names[current_frame_info.reg_fp];
3139 reg_names[current_frame_info.reg_fp] = tmp;
3140 }
3141 if (! TARGET_REG_NAMES)
3142 {
3143 for (i = 0; i < current_frame_info.n_input_regs; i++)
3144 reg_names[IN_REG (i)] = ia64_input_reg_names[i];
3145 for (i = 0; i < current_frame_info.n_local_regs; i++)
3146 reg_names[LOC_REG (i)] = ia64_local_reg_names[i];
3147 for (i = 0; i < current_frame_info.n_output_regs; i++)
3148 reg_names[OUT_REG (i)] = ia64_output_reg_names[i];
3149 }
3150
3151 current_frame_info.initialized = 0;
3152 }
3153
3154 int
ia64_dbx_register_number(regno)3155 ia64_dbx_register_number (regno)
3156 int regno;
3157 {
3158 /* In ia64_expand_prologue we quite literally renamed the frame pointer
3159 from its home at loc79 to something inside the register frame. We
3160 must perform the same renumbering here for the debug info. */
3161 if (current_frame_info.reg_fp)
3162 {
3163 if (regno == HARD_FRAME_POINTER_REGNUM)
3164 regno = current_frame_info.reg_fp;
3165 else if (regno == current_frame_info.reg_fp)
3166 regno = HARD_FRAME_POINTER_REGNUM;
3167 }
3168
3169 if (IN_REGNO_P (regno))
3170 return 32 + regno - IN_REG (0);
3171 else if (LOC_REGNO_P (regno))
3172 return 32 + current_frame_info.n_input_regs + regno - LOC_REG (0);
3173 else if (OUT_REGNO_P (regno))
3174 return (32 + current_frame_info.n_input_regs
3175 + current_frame_info.n_local_regs + regno - OUT_REG (0));
3176 else
3177 return regno;
3178 }
3179
3180 void
ia64_initialize_trampoline(addr,fnaddr,static_chain)3181 ia64_initialize_trampoline (addr, fnaddr, static_chain)
3182 rtx addr, fnaddr, static_chain;
3183 {
3184 rtx addr_reg, eight = GEN_INT (8);
3185
3186 /* Load up our iterator. */
3187 addr_reg = gen_reg_rtx (Pmode);
3188 emit_move_insn (addr_reg, addr);
3189
3190 /* The first two words are the fake descriptor:
3191 __ia64_trampoline, ADDR+16. */
3192 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg),
3193 gen_rtx_SYMBOL_REF (Pmode, "__ia64_trampoline"));
3194 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
3195
3196 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg),
3197 copy_to_reg (plus_constant (addr, 16)));
3198 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
3199
3200 /* The third word is the target descriptor. */
3201 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), fnaddr);
3202 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
3203
3204 /* The fourth word is the static chain. */
3205 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), static_chain);
3206 }
3207
3208 /* Do any needed setup for a variadic function. CUM has not been updated
3209 for the last named argument which has type TYPE and mode MODE.
3210
3211 We generate the actual spill instructions during prologue generation. */
3212
3213 void
ia64_setup_incoming_varargs(cum,int_mode,type,pretend_size,second_time)3214 ia64_setup_incoming_varargs (cum, int_mode, type, pretend_size, second_time)
3215 CUMULATIVE_ARGS cum;
3216 int int_mode;
3217 tree type;
3218 int * pretend_size;
3219 int second_time ATTRIBUTE_UNUSED;
3220 {
3221 /* Skip the current argument. */
3222 ia64_function_arg_advance (&cum, int_mode, type, 1);
3223
3224 if (cum.words < MAX_ARGUMENT_SLOTS)
3225 {
3226 int n = MAX_ARGUMENT_SLOTS - cum.words;
3227 *pretend_size = n * UNITS_PER_WORD;
3228 cfun->machine->n_varargs = n;
3229 }
3230 }
3231
3232 /* Check whether TYPE is a homogeneous floating point aggregate. If
3233 it is, return the mode of the floating point type that appears
3234 in all leafs. If it is not, return VOIDmode.
3235
3236 An aggregate is a homogeneous floating point aggregate is if all
3237 fields/elements in it have the same floating point type (e.g,
3238 SFmode). 128-bit quad-precision floats are excluded. */
3239
3240 static enum machine_mode
hfa_element_mode(type,nested)3241 hfa_element_mode (type, nested)
3242 tree type;
3243 int nested;
3244 {
3245 enum machine_mode element_mode = VOIDmode;
3246 enum machine_mode mode;
3247 enum tree_code code = TREE_CODE (type);
3248 int know_element_mode = 0;
3249 tree t;
3250
3251 switch (code)
3252 {
3253 case VOID_TYPE: case INTEGER_TYPE: case ENUMERAL_TYPE:
3254 case BOOLEAN_TYPE: case CHAR_TYPE: case POINTER_TYPE:
3255 case OFFSET_TYPE: case REFERENCE_TYPE: case METHOD_TYPE:
3256 case FILE_TYPE: case SET_TYPE: case LANG_TYPE:
3257 case FUNCTION_TYPE:
3258 return VOIDmode;
3259
3260 /* Fortran complex types are supposed to be HFAs, so we need to handle
3261 gcc's COMPLEX_TYPEs as HFAs. We need to exclude the integral complex
3262 types though. */
3263 case COMPLEX_TYPE:
3264 if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_COMPLEX_FLOAT
3265 && (TYPE_MODE (type) != TCmode || INTEL_EXTENDED_IEEE_FORMAT))
3266 return mode_for_size (GET_MODE_UNIT_SIZE (TYPE_MODE (type))
3267 * BITS_PER_UNIT, MODE_FLOAT, 0);
3268 else
3269 return VOIDmode;
3270
3271 case REAL_TYPE:
3272 /* We want to return VOIDmode for raw REAL_TYPEs, but the actual
3273 mode if this is contained within an aggregate. */
3274 if (nested && (TYPE_MODE (type) != TFmode || INTEL_EXTENDED_IEEE_FORMAT))
3275 return TYPE_MODE (type);
3276 else
3277 return VOIDmode;
3278
3279 case ARRAY_TYPE:
3280 return hfa_element_mode (TREE_TYPE (type), 1);
3281
3282 case RECORD_TYPE:
3283 case UNION_TYPE:
3284 case QUAL_UNION_TYPE:
3285 for (t = TYPE_FIELDS (type); t; t = TREE_CHAIN (t))
3286 {
3287 if (TREE_CODE (t) != FIELD_DECL)
3288 continue;
3289
3290 mode = hfa_element_mode (TREE_TYPE (t), 1);
3291 if (know_element_mode)
3292 {
3293 if (mode != element_mode)
3294 return VOIDmode;
3295 }
3296 else if (GET_MODE_CLASS (mode) != MODE_FLOAT)
3297 return VOIDmode;
3298 else
3299 {
3300 know_element_mode = 1;
3301 element_mode = mode;
3302 }
3303 }
3304 return element_mode;
3305
3306 default:
3307 /* If we reach here, we probably have some front-end specific type
3308 that the backend doesn't know about. This can happen via the
3309 aggregate_value_p call in init_function_start. All we can do is
3310 ignore unknown tree types. */
3311 return VOIDmode;
3312 }
3313
3314 return VOIDmode;
3315 }
3316
3317 /* Return rtx for register where argument is passed, or zero if it is passed
3318 on the stack. */
3319
3320 /* ??? 128-bit quad-precision floats are always passed in general
3321 registers. */
3322
3323 rtx
ia64_function_arg(cum,mode,type,named,incoming)3324 ia64_function_arg (cum, mode, type, named, incoming)
3325 CUMULATIVE_ARGS *cum;
3326 enum machine_mode mode;
3327 tree type;
3328 int named;
3329 int incoming;
3330 {
3331 int basereg = (incoming ? GR_ARG_FIRST : AR_ARG_FIRST);
3332 int words = (((mode == BLKmode ? int_size_in_bytes (type)
3333 : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1)
3334 / UNITS_PER_WORD);
3335 int offset = 0;
3336 enum machine_mode hfa_mode = VOIDmode;
3337
3338 /* Integer and float arguments larger than 8 bytes start at the next even
3339 boundary. Aggregates larger than 8 bytes start at the next even boundary
3340 if the aggregate has 16 byte alignment. Net effect is that types with
3341 alignment greater than 8 start at the next even boundary. */
3342 /* ??? The ABI does not specify how to handle aggregates with alignment from
3343 9 to 15 bytes, or greater than 16. We handle them all as if they had
3344 16 byte alignment. Such aggregates can occur only if gcc extensions are
3345 used. */
3346 if ((type ? (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
3347 : (words > 1))
3348 && (cum->words & 1))
3349 offset = 1;
3350
3351 /* If all argument slots are used, then it must go on the stack. */
3352 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
3353 return 0;
3354
3355 /* Check for and handle homogeneous FP aggregates. */
3356 if (type)
3357 hfa_mode = hfa_element_mode (type, 0);
3358
3359 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
3360 and unprototyped hfas are passed specially. */
3361 if (hfa_mode != VOIDmode && (! cum->prototype || named))
3362 {
3363 rtx loc[16];
3364 int i = 0;
3365 int fp_regs = cum->fp_regs;
3366 int int_regs = cum->words + offset;
3367 int hfa_size = GET_MODE_SIZE (hfa_mode);
3368 int byte_size;
3369 int args_byte_size;
3370
3371 /* If prototyped, pass it in FR regs then GR regs.
3372 If not prototyped, pass it in both FR and GR regs.
3373
3374 If this is an SFmode aggregate, then it is possible to run out of
3375 FR regs while GR regs are still left. In that case, we pass the
3376 remaining part in the GR regs. */
3377
3378 /* Fill the FP regs. We do this always. We stop if we reach the end
3379 of the argument, the last FP register, or the last argument slot. */
3380
3381 byte_size = ((mode == BLKmode)
3382 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3383 args_byte_size = int_regs * UNITS_PER_WORD;
3384 offset = 0;
3385 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
3386 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD)); i++)
3387 {
3388 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
3389 gen_rtx_REG (hfa_mode, (FR_ARG_FIRST
3390 + fp_regs)),
3391 GEN_INT (offset));
3392 offset += hfa_size;
3393 args_byte_size += hfa_size;
3394 fp_regs++;
3395 }
3396
3397 /* If no prototype, then the whole thing must go in GR regs. */
3398 if (! cum->prototype)
3399 offset = 0;
3400 /* If this is an SFmode aggregate, then we might have some left over
3401 that needs to go in GR regs. */
3402 else if (byte_size != offset)
3403 int_regs += offset / UNITS_PER_WORD;
3404
3405 /* Fill in the GR regs. We must use DImode here, not the hfa mode. */
3406
3407 for (; offset < byte_size && int_regs < MAX_ARGUMENT_SLOTS; i++)
3408 {
3409 enum machine_mode gr_mode = DImode;
3410
3411 /* If we have an odd 4 byte hunk because we ran out of FR regs,
3412 then this goes in a GR reg left adjusted/little endian, right
3413 adjusted/big endian. */
3414 /* ??? Currently this is handled wrong, because 4-byte hunks are
3415 always right adjusted/little endian. */
3416 if (offset & 0x4)
3417 gr_mode = SImode;
3418 /* If we have an even 4 byte hunk because the aggregate is a
3419 multiple of 4 bytes in size, then this goes in a GR reg right
3420 adjusted/little endian. */
3421 else if (byte_size - offset == 4)
3422 gr_mode = SImode;
3423 /* Complex floats need to have float mode. */
3424 if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
3425 gr_mode = hfa_mode;
3426
3427 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
3428 gen_rtx_REG (gr_mode, (basereg
3429 + int_regs)),
3430 GEN_INT (offset));
3431 offset += GET_MODE_SIZE (gr_mode);
3432 int_regs += GET_MODE_SIZE (gr_mode) <= UNITS_PER_WORD
3433 ? 1 : GET_MODE_SIZE (gr_mode) / UNITS_PER_WORD;
3434 }
3435
3436 /* If we ended up using just one location, just return that one loc, but
3437 change the mode back to the argument mode. */
3438 if (i == 1)
3439 return gen_rtx_REG (mode, REGNO (XEXP (loc[0], 0)));
3440 else
3441 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
3442 }
3443
3444 /* Integral and aggregates go in general registers. If we have run out of
3445 FR registers, then FP values must also go in general registers. This can
3446 happen when we have a SFmode HFA. */
3447 else if (((mode == TFmode) && ! INTEL_EXTENDED_IEEE_FORMAT)
3448 || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
3449 {
3450 int byte_size = ((mode == BLKmode)
3451 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3452 if (BYTES_BIG_ENDIAN
3453 && (mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3454 && byte_size < UNITS_PER_WORD
3455 && byte_size > 0)
3456 {
3457 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
3458 gen_rtx_REG (DImode,
3459 (basereg + cum->words
3460 + offset)),
3461 const0_rtx);
3462 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
3463 }
3464 else
3465 return gen_rtx_REG (mode, basereg + cum->words + offset);
3466
3467 }
3468
3469 /* If there is a prototype, then FP values go in a FR register when
3470 named, and in a GR registeer when unnamed. */
3471 else if (cum->prototype)
3472 {
3473 if (! named)
3474 return gen_rtx_REG (mode, basereg + cum->words + offset);
3475 else
3476 return gen_rtx_REG (mode, FR_ARG_FIRST + cum->fp_regs);
3477 }
3478 /* If there is no prototype, then FP values go in both FR and GR
3479 registers. */
3480 else
3481 {
3482 rtx fp_reg = gen_rtx_EXPR_LIST (VOIDmode,
3483 gen_rtx_REG (mode, (FR_ARG_FIRST
3484 + cum->fp_regs)),
3485 const0_rtx);
3486 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
3487 gen_rtx_REG (mode,
3488 (basereg + cum->words
3489 + offset)),
3490 const0_rtx);
3491
3492 return gen_rtx_PARALLEL (mode, gen_rtvec (2, fp_reg, gr_reg));
3493 }
3494 }
3495
3496 /* Return number of words, at the beginning of the argument, that must be
3497 put in registers. 0 is the argument is entirely in registers or entirely
3498 in memory. */
3499
3500 int
ia64_function_arg_partial_nregs(cum,mode,type,named)3501 ia64_function_arg_partial_nregs (cum, mode, type, named)
3502 CUMULATIVE_ARGS *cum;
3503 enum machine_mode mode;
3504 tree type;
3505 int named ATTRIBUTE_UNUSED;
3506 {
3507 int words = (((mode == BLKmode ? int_size_in_bytes (type)
3508 : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1)
3509 / UNITS_PER_WORD);
3510 int offset = 0;
3511
3512 /* Arguments with alignment larger than 8 bytes start at the next even
3513 boundary. */
3514 if ((type ? (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
3515 : (words > 1))
3516 && (cum->words & 1))
3517 offset = 1;
3518
3519 /* If all argument slots are used, then it must go on the stack. */
3520 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
3521 return 0;
3522
3523 /* It doesn't matter whether the argument goes in FR or GR regs. If
3524 it fits within the 8 argument slots, then it goes entirely in
3525 registers. If it extends past the last argument slot, then the rest
3526 goes on the stack. */
3527
3528 if (words + cum->words + offset <= MAX_ARGUMENT_SLOTS)
3529 return 0;
3530
3531 return MAX_ARGUMENT_SLOTS - cum->words - offset;
3532 }
3533
3534 /* Update CUM to point after this argument. This is patterned after
3535 ia64_function_arg. */
3536
3537 void
ia64_function_arg_advance(cum,mode,type,named)3538 ia64_function_arg_advance (cum, mode, type, named)
3539 CUMULATIVE_ARGS *cum;
3540 enum machine_mode mode;
3541 tree type;
3542 int named;
3543 {
3544 int words = (((mode == BLKmode ? int_size_in_bytes (type)
3545 : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1)
3546 / UNITS_PER_WORD);
3547 int offset = 0;
3548 enum machine_mode hfa_mode = VOIDmode;
3549
3550 /* If all arg slots are already full, then there is nothing to do. */
3551 if (cum->words >= MAX_ARGUMENT_SLOTS)
3552 return;
3553
3554 /* Arguments with alignment larger than 8 bytes start at the next even
3555 boundary. */
3556 if ((type ? (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
3557 : (words > 1))
3558 && (cum->words & 1))
3559 offset = 1;
3560
3561 cum->words += words + offset;
3562
3563 /* Check for and handle homogeneous FP aggregates. */
3564 if (type)
3565 hfa_mode = hfa_element_mode (type, 0);
3566
3567 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
3568 and unprototyped hfas are passed specially. */
3569 if (hfa_mode != VOIDmode && (! cum->prototype || named))
3570 {
3571 int fp_regs = cum->fp_regs;
3572 /* This is the original value of cum->words + offset. */
3573 int int_regs = cum->words - words;
3574 int hfa_size = GET_MODE_SIZE (hfa_mode);
3575 int byte_size;
3576 int args_byte_size;
3577
3578 /* If prototyped, pass it in FR regs then GR regs.
3579 If not prototyped, pass it in both FR and GR regs.
3580
3581 If this is an SFmode aggregate, then it is possible to run out of
3582 FR regs while GR regs are still left. In that case, we pass the
3583 remaining part in the GR regs. */
3584
3585 /* Fill the FP regs. We do this always. We stop if we reach the end
3586 of the argument, the last FP register, or the last argument slot. */
3587
3588 byte_size = ((mode == BLKmode)
3589 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3590 args_byte_size = int_regs * UNITS_PER_WORD;
3591 offset = 0;
3592 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
3593 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD));)
3594 {
3595 offset += hfa_size;
3596 args_byte_size += hfa_size;
3597 fp_regs++;
3598 }
3599
3600 cum->fp_regs = fp_regs;
3601 }
3602
3603 /* Integral and aggregates go in general registers. If we have run out of
3604 FR registers, then FP values must also go in general registers. This can
3605 happen when we have a SFmode HFA. */
3606 else if (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS)
3607 cum->int_regs = cum->words;
3608
3609 /* If there is a prototype, then FP values go in a FR register when
3610 named, and in a GR registeer when unnamed. */
3611 else if (cum->prototype)
3612 {
3613 if (! named)
3614 cum->int_regs = cum->words;
3615 else
3616 /* ??? Complex types should not reach here. */
3617 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
3618 }
3619 /* If there is no prototype, then FP values go in both FR and GR
3620 registers. */
3621 else
3622 {
3623 /* ??? Complex types should not reach here. */
3624 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
3625 cum->int_regs = cum->words;
3626 }
3627 }
3628
3629 /* Variable sized types are passed by reference. */
3630 /* ??? At present this is a GCC extension to the IA-64 ABI. */
3631
3632 int
ia64_function_arg_pass_by_reference(cum,mode,type,named)3633 ia64_function_arg_pass_by_reference (cum, mode, type, named)
3634 CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED;
3635 enum machine_mode mode ATTRIBUTE_UNUSED;
3636 tree type;
3637 int named ATTRIBUTE_UNUSED;
3638 {
3639 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
3640 }
3641
3642
3643 /* Implement va_arg. */
3644
3645 rtx
ia64_va_arg(valist,type)3646 ia64_va_arg (valist, type)
3647 tree valist, type;
3648 {
3649 tree t;
3650
3651 /* Variable sized types are passed by reference. */
3652 if (TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
3653 {
3654 rtx addr = force_reg (ptr_mode,
3655 std_expand_builtin_va_arg (valist, build_pointer_type (type)));
3656 #ifdef POINTERS_EXTEND_UNSIGNED
3657 addr = convert_memory_address (Pmode, addr);
3658 #endif
3659 return gen_rtx_MEM (ptr_mode, addr);
3660 }
3661
3662 /* Arguments with alignment larger than 8 bytes start at the next even
3663 boundary. */
3664 if (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
3665 {
3666 t = build (PLUS_EXPR, TREE_TYPE (valist), valist,
3667 build_int_2 (2 * UNITS_PER_WORD - 1, 0));
3668 t = build (BIT_AND_EXPR, TREE_TYPE (t), t,
3669 build_int_2 (-2 * UNITS_PER_WORD, -1));
3670 t = build (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
3671 TREE_SIDE_EFFECTS (t) = 1;
3672 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3673 }
3674
3675 return std_expand_builtin_va_arg (valist, type);
3676 }
3677
3678 /* Return 1 if function return value returned in memory. Return 0 if it is
3679 in a register. */
3680
3681 int
ia64_return_in_memory(valtype)3682 ia64_return_in_memory (valtype)
3683 tree valtype;
3684 {
3685 enum machine_mode mode;
3686 enum machine_mode hfa_mode;
3687 HOST_WIDE_INT byte_size;
3688
3689 mode = TYPE_MODE (valtype);
3690 byte_size = GET_MODE_SIZE (mode);
3691 if (mode == BLKmode)
3692 {
3693 byte_size = int_size_in_bytes (valtype);
3694 if (byte_size < 0)
3695 return 1;
3696 }
3697
3698 /* Hfa's with up to 8 elements are returned in the FP argument registers. */
3699
3700 hfa_mode = hfa_element_mode (valtype, 0);
3701 if (hfa_mode != VOIDmode)
3702 {
3703 int hfa_size = GET_MODE_SIZE (hfa_mode);
3704
3705 if (byte_size / hfa_size > MAX_ARGUMENT_SLOTS)
3706 return 1;
3707 else
3708 return 0;
3709 }
3710 else if (byte_size > UNITS_PER_WORD * MAX_INT_RETURN_SLOTS)
3711 return 1;
3712 else
3713 return 0;
3714 }
3715
3716 /* Return rtx for register that holds the function return value. */
3717
3718 rtx
ia64_function_value(valtype,func)3719 ia64_function_value (valtype, func)
3720 tree valtype;
3721 tree func ATTRIBUTE_UNUSED;
3722 {
3723 enum machine_mode mode;
3724 enum machine_mode hfa_mode;
3725
3726 mode = TYPE_MODE (valtype);
3727 hfa_mode = hfa_element_mode (valtype, 0);
3728
3729 if (hfa_mode != VOIDmode)
3730 {
3731 rtx loc[8];
3732 int i;
3733 int hfa_size;
3734 int byte_size;
3735 int offset;
3736
3737 hfa_size = GET_MODE_SIZE (hfa_mode);
3738 byte_size = ((mode == BLKmode)
3739 ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode));
3740 offset = 0;
3741 for (i = 0; offset < byte_size; i++)
3742 {
3743 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
3744 gen_rtx_REG (hfa_mode, FR_ARG_FIRST + i),
3745 GEN_INT (offset));
3746 offset += hfa_size;
3747 }
3748
3749 if (i == 1)
3750 return XEXP (loc[0], 0);
3751 else
3752 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
3753 }
3754 else if (FLOAT_TYPE_P (valtype) &&
3755 ((mode != TFmode) || INTEL_EXTENDED_IEEE_FORMAT))
3756 return gen_rtx_REG (mode, FR_ARG_FIRST);
3757 else
3758 {
3759 if (BYTES_BIG_ENDIAN
3760 && (mode == BLKmode || (valtype && AGGREGATE_TYPE_P (valtype))))
3761 {
3762 rtx loc[8];
3763 int offset;
3764 int bytesize;
3765 int i;
3766
3767 offset = 0;
3768 bytesize = int_size_in_bytes (valtype);
3769 for (i = 0; offset < bytesize; i++)
3770 {
3771 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
3772 gen_rtx_REG (DImode,
3773 GR_RET_FIRST + i),
3774 GEN_INT (offset));
3775 offset += UNITS_PER_WORD;
3776 }
3777 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
3778 }
3779 else
3780 return gen_rtx_REG (mode, GR_RET_FIRST);
3781 }
3782 }
3783
3784 /* Print a memory address as an operand to reference that memory location. */
3785
3786 /* ??? Do we need this? It gets used only for 'a' operands. We could perhaps
3787 also call this from ia64_print_operand for memory addresses. */
3788
3789 void
ia64_print_operand_address(stream,address)3790 ia64_print_operand_address (stream, address)
3791 FILE * stream ATTRIBUTE_UNUSED;
3792 rtx address ATTRIBUTE_UNUSED;
3793 {
3794 }
3795
3796 /* Print an operand to an assembler instruction.
3797 C Swap and print a comparison operator.
3798 D Print an FP comparison operator.
3799 E Print 32 - constant, for SImode shifts as extract.
3800 e Print 64 - constant, for DImode rotates.
3801 F A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or
3802 a floating point register emitted normally.
3803 I Invert a predicate register by adding 1.
3804 J Select the proper predicate register for a condition.
3805 j Select the inverse predicate register for a condition.
3806 O Append .acq for volatile load.
3807 P Postincrement of a MEM.
3808 Q Append .rel for volatile store.
3809 S Shift amount for shladd instruction.
3810 T Print an 8-bit sign extended number (K) as a 32-bit unsigned number
3811 for Intel assembler.
3812 U Print an 8-bit sign extended number (K) as a 64-bit unsigned number
3813 for Intel assembler.
3814 r Print register name, or constant 0 as r0. HP compatibility for
3815 Linux kernel. */
3816 void
ia64_print_operand(file,x,code)3817 ia64_print_operand (file, x, code)
3818 FILE * file;
3819 rtx x;
3820 int code;
3821 {
3822 const char *str;
3823
3824 switch (code)
3825 {
3826 case 0:
3827 /* Handled below. */
3828 break;
3829
3830 case 'C':
3831 {
3832 enum rtx_code c = swap_condition (GET_CODE (x));
3833 fputs (GET_RTX_NAME (c), file);
3834 return;
3835 }
3836
3837 case 'D':
3838 switch (GET_CODE (x))
3839 {
3840 case NE:
3841 str = "neq";
3842 break;
3843 case UNORDERED:
3844 str = "unord";
3845 break;
3846 case ORDERED:
3847 str = "ord";
3848 break;
3849 default:
3850 str = GET_RTX_NAME (GET_CODE (x));
3851 break;
3852 }
3853 fputs (str, file);
3854 return;
3855
3856 case 'E':
3857 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - INTVAL (x));
3858 return;
3859
3860 case 'e':
3861 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - INTVAL (x));
3862 return;
3863
3864 case 'F':
3865 if (x == CONST0_RTX (GET_MODE (x)))
3866 str = reg_names [FR_REG (0)];
3867 else if (x == CONST1_RTX (GET_MODE (x)))
3868 str = reg_names [FR_REG (1)];
3869 else if (GET_CODE (x) == REG)
3870 str = reg_names [REGNO (x)];
3871 else
3872 abort ();
3873 fputs (str, file);
3874 return;
3875
3876 case 'I':
3877 fputs (reg_names [REGNO (x) + 1], file);
3878 return;
3879
3880 case 'J':
3881 case 'j':
3882 {
3883 unsigned int regno = REGNO (XEXP (x, 0));
3884 if (GET_CODE (x) == EQ)
3885 regno += 1;
3886 if (code == 'j')
3887 regno ^= 1;
3888 fputs (reg_names [regno], file);
3889 }
3890 return;
3891
3892 case 'O':
3893 if (MEM_VOLATILE_P (x))
3894 fputs(".acq", file);
3895 return;
3896
3897 case 'P':
3898 {
3899 HOST_WIDE_INT value;
3900
3901 switch (GET_CODE (XEXP (x, 0)))
3902 {
3903 default:
3904 return;
3905
3906 case POST_MODIFY:
3907 x = XEXP (XEXP (XEXP (x, 0), 1), 1);
3908 if (GET_CODE (x) == CONST_INT)
3909 value = INTVAL (x);
3910 else if (GET_CODE (x) == REG)
3911 {
3912 fprintf (file, ", %s", reg_names[REGNO (x)]);
3913 return;
3914 }
3915 else
3916 abort ();
3917 break;
3918
3919 case POST_INC:
3920 value = GET_MODE_SIZE (GET_MODE (x));
3921 break;
3922
3923 case POST_DEC:
3924 value = - (HOST_WIDE_INT) GET_MODE_SIZE (GET_MODE (x));
3925 break;
3926 }
3927
3928 putc (',', file);
3929 putc (' ', file);
3930 fprintf (file, HOST_WIDE_INT_PRINT_DEC, value);
3931 return;
3932 }
3933
3934 case 'Q':
3935 if (MEM_VOLATILE_P (x))
3936 fputs(".rel", file);
3937 return;
3938
3939 case 'S':
3940 fprintf (file, "%d", exact_log2 (INTVAL (x)));
3941 return;
3942
3943 case 'T':
3944 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
3945 {
3946 fprintf (file, "0x%x", (int) INTVAL (x) & 0xffffffff);
3947 return;
3948 }
3949 break;
3950
3951 case 'U':
3952 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
3953 {
3954 const char *prefix = "0x";
3955 if (INTVAL (x) & 0x80000000)
3956 {
3957 fprintf (file, "0xffffffff");
3958 prefix = "";
3959 }
3960 fprintf (file, "%s%x", prefix, (int) INTVAL (x) & 0xffffffff);
3961 return;
3962 }
3963 break;
3964
3965 case 'r':
3966 /* If this operand is the constant zero, write it as register zero.
3967 Any register, zero, or CONST_INT value is OK here. */
3968 if (GET_CODE (x) == REG)
3969 fputs (reg_names[REGNO (x)], file);
3970 else if (x == CONST0_RTX (GET_MODE (x)))
3971 fputs ("r0", file);
3972 else if (GET_CODE (x) == CONST_INT)
3973 output_addr_const (file, x);
3974 else
3975 output_operand_lossage ("invalid %%r value");
3976 return;
3977
3978 case '+':
3979 {
3980 const char *which;
3981
3982 /* For conditional branches, returns or calls, substitute
3983 sptk, dptk, dpnt, or spnt for %s. */
3984 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
3985 if (x)
3986 {
3987 int pred_val = INTVAL (XEXP (x, 0));
3988
3989 /* Guess top and bottom 10% statically predicted. */
3990 if (pred_val < REG_BR_PROB_BASE / 50)
3991 which = ".spnt";
3992 else if (pred_val < REG_BR_PROB_BASE / 2)
3993 which = ".dpnt";
3994 else if (pred_val < REG_BR_PROB_BASE / 100 * 98)
3995 which = ".dptk";
3996 else
3997 which = ".sptk";
3998 }
3999 else if (GET_CODE (current_output_insn) == CALL_INSN)
4000 which = ".sptk";
4001 else
4002 which = ".dptk";
4003
4004 fputs (which, file);
4005 return;
4006 }
4007
4008 case ',':
4009 x = current_insn_predicate;
4010 if (x)
4011 {
4012 unsigned int regno = REGNO (XEXP (x, 0));
4013 if (GET_CODE (x) == EQ)
4014 regno += 1;
4015 fprintf (file, "(%s) ", reg_names [regno]);
4016 }
4017 return;
4018
4019 default:
4020 output_operand_lossage ("ia64_print_operand: unknown code");
4021 return;
4022 }
4023
4024 switch (GET_CODE (x))
4025 {
4026 /* This happens for the spill/restore instructions. */
4027 case POST_INC:
4028 case POST_DEC:
4029 case POST_MODIFY:
4030 x = XEXP (x, 0);
4031 /* ... fall through ... */
4032
4033 case REG:
4034 fputs (reg_names [REGNO (x)], file);
4035 break;
4036
4037 case MEM:
4038 {
4039 rtx addr = XEXP (x, 0);
4040 if (GET_RTX_CLASS (GET_CODE (addr)) == 'a')
4041 addr = XEXP (addr, 0);
4042 fprintf (file, "[%s]", reg_names [REGNO (addr)]);
4043 break;
4044 }
4045
4046 default:
4047 output_addr_const (file, x);
4048 break;
4049 }
4050
4051 return;
4052 }
4053
4054 /* Calulate the cost of moving data from a register in class FROM to
4055 one in class TO, using MODE. */
4056
4057 int
ia64_register_move_cost(mode,from,to)4058 ia64_register_move_cost (mode, from, to)
4059 enum machine_mode mode;
4060 enum reg_class from, to;
4061 {
4062 /* ADDL_REGS is the same as GR_REGS for movement purposes. */
4063 if (to == ADDL_REGS)
4064 to = GR_REGS;
4065 if (from == ADDL_REGS)
4066 from = GR_REGS;
4067
4068 /* All costs are symmetric, so reduce cases by putting the
4069 lower number class as the destination. */
4070 if (from < to)
4071 {
4072 enum reg_class tmp = to;
4073 to = from, from = tmp;
4074 }
4075
4076 /* Moving from FR<->GR in TFmode must be more expensive than 2,
4077 so that we get secondary memory reloads. Between FR_REGS,
4078 we have to make this at least as expensive as MEMORY_MOVE_COST
4079 to avoid spectacularly poor register class preferencing. */
4080 if (mode == TFmode)
4081 {
4082 if (to != GR_REGS || from != GR_REGS)
4083 return MEMORY_MOVE_COST (mode, to, 0);
4084 else
4085 return 3;
4086 }
4087
4088 switch (to)
4089 {
4090 case PR_REGS:
4091 /* Moving between PR registers takes two insns. */
4092 if (from == PR_REGS)
4093 return 3;
4094 /* Moving between PR and anything but GR is impossible. */
4095 if (from != GR_REGS)
4096 return MEMORY_MOVE_COST (mode, to, 0);
4097 break;
4098
4099 case BR_REGS:
4100 /* Moving between BR and anything but GR is impossible. */
4101 if (from != GR_REGS && from != GR_AND_BR_REGS)
4102 return MEMORY_MOVE_COST (mode, to, 0);
4103 break;
4104
4105 case AR_I_REGS:
4106 case AR_M_REGS:
4107 /* Moving between AR and anything but GR is impossible. */
4108 if (from != GR_REGS)
4109 return MEMORY_MOVE_COST (mode, to, 0);
4110 break;
4111
4112 case GR_REGS:
4113 case FR_REGS:
4114 case GR_AND_FR_REGS:
4115 case GR_AND_BR_REGS:
4116 case ALL_REGS:
4117 break;
4118
4119 default:
4120 abort ();
4121 }
4122
4123 return 2;
4124 }
4125
4126 /* This function returns the register class required for a secondary
4127 register when copying between one of the registers in CLASS, and X,
4128 using MODE. A return value of NO_REGS means that no secondary register
4129 is required. */
4130
4131 enum reg_class
ia64_secondary_reload_class(class,mode,x)4132 ia64_secondary_reload_class (class, mode, x)
4133 enum reg_class class;
4134 enum machine_mode mode ATTRIBUTE_UNUSED;
4135 rtx x;
4136 {
4137 int regno = -1;
4138
4139 if (GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
4140 regno = true_regnum (x);
4141
4142 switch (class)
4143 {
4144 case BR_REGS:
4145 case AR_M_REGS:
4146 case AR_I_REGS:
4147 /* ??? BR<->BR register copies can happen due to a bad gcse/cse/global
4148 interaction. We end up with two pseudos with overlapping lifetimes
4149 both of which are equiv to the same constant, and both which need
4150 to be in BR_REGS. This seems to be a cse bug. cse_basic_block_end
4151 changes depending on the path length, which means the qty_first_reg
4152 check in make_regs_eqv can give different answers at different times.
4153 At some point I'll probably need a reload_indi pattern to handle
4154 this.
4155
4156 We can also get GR_AND_FR_REGS to BR_REGS/AR_REGS copies, where we
4157 wound up with a FP register from GR_AND_FR_REGS. Extend that to all
4158 non-general registers for good measure. */
4159 if (regno >= 0 && ! GENERAL_REGNO_P (regno))
4160 return GR_REGS;
4161
4162 /* This is needed if a pseudo used as a call_operand gets spilled to a
4163 stack slot. */
4164 if (GET_CODE (x) == MEM)
4165 return GR_REGS;
4166 break;
4167
4168 case FR_REGS:
4169 /* Need to go through general regsters to get to other class regs. */
4170 if (regno >= 0 && ! (FR_REGNO_P (regno) || GENERAL_REGNO_P (regno)))
4171 return GR_REGS;
4172
4173 /* This can happen when a paradoxical subreg is an operand to the
4174 muldi3 pattern. */
4175 /* ??? This shouldn't be necessary after instruction scheduling is
4176 enabled, because paradoxical subregs are not accepted by
4177 register_operand when INSN_SCHEDULING is defined. Or alternatively,
4178 stop the paradoxical subreg stupidity in the *_operand functions
4179 in recog.c. */
4180 if (GET_CODE (x) == MEM
4181 && (GET_MODE (x) == SImode || GET_MODE (x) == HImode
4182 || GET_MODE (x) == QImode))
4183 return GR_REGS;
4184
4185 /* This can happen because of the ior/and/etc patterns that accept FP
4186 registers as operands. If the third operand is a constant, then it
4187 needs to be reloaded into a FP register. */
4188 if (GET_CODE (x) == CONST_INT)
4189 return GR_REGS;
4190
4191 /* This can happen because of register elimination in a muldi3 insn.
4192 E.g. `26107 * (unsigned long)&u'. */
4193 if (GET_CODE (x) == PLUS)
4194 return GR_REGS;
4195 break;
4196
4197 case PR_REGS:
4198 /* ??? This happens if we cse/gcse a BImode value across a call,
4199 and the function has a nonlocal goto. This is because global
4200 does not allocate call crossing pseudos to hard registers when
4201 current_function_has_nonlocal_goto is true. This is relatively
4202 common for C++ programs that use exceptions. To reproduce,
4203 return NO_REGS and compile libstdc++. */
4204 if (GET_CODE (x) == MEM)
4205 return GR_REGS;
4206
4207 /* This can happen when we take a BImode subreg of a DImode value,
4208 and that DImode value winds up in some non-GR register. */
4209 if (regno >= 0 && ! GENERAL_REGNO_P (regno) && ! PR_REGNO_P (regno))
4210 return GR_REGS;
4211 break;
4212
4213 case GR_REGS:
4214 /* Since we have no offsettable memory addresses, we need a temporary
4215 to hold the address of the second word. */
4216 if (mode == TImode)
4217 return GR_REGS;
4218 break;
4219
4220 default:
4221 break;
4222 }
4223
4224 return NO_REGS;
4225 }
4226
4227 /* Emit text to declare externally defined variables and functions, because
4228 the Intel assembler does not support undefined externals. */
4229
4230 void
ia64_asm_output_external(file,decl,name)4231 ia64_asm_output_external (file, decl, name)
4232 FILE *file;
4233 tree decl;
4234 const char *name;
4235 {
4236 int save_referenced;
4237
4238 /* GNU as does not need anything here, but the HP linker does need
4239 something for external functions. */
4240
4241 if (TARGET_GNU_AS
4242 && (!TARGET_HPUX_LD
4243 || TREE_CODE (decl) != FUNCTION_DECL
4244 || strstr(name, "__builtin_") == name))
4245 return;
4246
4247 /* ??? The Intel assembler creates a reference that needs to be satisfied by
4248 the linker when we do this, so we need to be careful not to do this for
4249 builtin functions which have no library equivalent. Unfortunately, we
4250 can't tell here whether or not a function will actually be called by
4251 expand_expr, so we pull in library functions even if we may not need
4252 them later. */
4253 if (! strcmp (name, "__builtin_next_arg")
4254 || ! strcmp (name, "alloca")
4255 || ! strcmp (name, "__builtin_constant_p")
4256 || ! strcmp (name, "__builtin_args_info"))
4257 return;
4258
4259 if (TARGET_HPUX_LD)
4260 ia64_hpux_add_extern_decl (name);
4261 else
4262 {
4263 /* assemble_name will set TREE_SYMBOL_REFERENCED, so we must save and
4264 restore it. */
4265 save_referenced = TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl));
4266 if (TREE_CODE (decl) == FUNCTION_DECL)
4267 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
4268 (*targetm.asm_out.globalize_label) (file, name);
4269 TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)) = save_referenced;
4270 }
4271 }
4272
4273 /* Parse the -mfixed-range= option string. */
4274
4275 static void
fix_range(const_str)4276 fix_range (const_str)
4277 const char *const_str;
4278 {
4279 int i, first, last;
4280 char *str, *dash, *comma;
4281
4282 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
4283 REG2 are either register names or register numbers. The effect
4284 of this option is to mark the registers in the range from REG1 to
4285 REG2 as ``fixed'' so they won't be used by the compiler. This is
4286 used, e.g., to ensure that kernel mode code doesn't use f32-f127. */
4287
4288 i = strlen (const_str);
4289 str = (char *) alloca (i + 1);
4290 memcpy (str, const_str, i + 1);
4291
4292 while (1)
4293 {
4294 dash = strchr (str, '-');
4295 if (!dash)
4296 {
4297 warning ("value of -mfixed-range must have form REG1-REG2");
4298 return;
4299 }
4300 *dash = '\0';
4301
4302 comma = strchr (dash + 1, ',');
4303 if (comma)
4304 *comma = '\0';
4305
4306 first = decode_reg_name (str);
4307 if (first < 0)
4308 {
4309 warning ("unknown register name: %s", str);
4310 return;
4311 }
4312
4313 last = decode_reg_name (dash + 1);
4314 if (last < 0)
4315 {
4316 warning ("unknown register name: %s", dash + 1);
4317 return;
4318 }
4319
4320 *dash = '-';
4321
4322 if (first > last)
4323 {
4324 warning ("%s-%s is an empty range", str, dash + 1);
4325 return;
4326 }
4327
4328 for (i = first; i <= last; ++i)
4329 fixed_regs[i] = call_used_regs[i] = 1;
4330
4331 if (!comma)
4332 break;
4333
4334 *comma = ',';
4335 str = comma + 1;
4336 }
4337 }
4338
4339 static struct machine_function *
ia64_init_machine_status()4340 ia64_init_machine_status ()
4341 {
4342 return ggc_alloc_cleared (sizeof (struct machine_function));
4343 }
4344
4345 /* Handle TARGET_OPTIONS switches. */
4346
4347 void
ia64_override_options()4348 ia64_override_options ()
4349 {
4350 if (TARGET_AUTO_PIC)
4351 target_flags |= MASK_CONST_GP;
4352
4353 if (TARGET_INLINE_FLOAT_DIV_LAT && TARGET_INLINE_FLOAT_DIV_THR)
4354 {
4355 warning ("cannot optimize floating point division for both latency and throughput");
4356 target_flags &= ~MASK_INLINE_FLOAT_DIV_THR;
4357 }
4358
4359 if (TARGET_INLINE_INT_DIV_LAT && TARGET_INLINE_INT_DIV_THR)
4360 {
4361 warning ("cannot optimize integer division for both latency and throughput");
4362 target_flags &= ~MASK_INLINE_INT_DIV_THR;
4363 }
4364
4365 if (ia64_fixed_range_string)
4366 fix_range (ia64_fixed_range_string);
4367
4368 if (ia64_tls_size_string)
4369 {
4370 char *end;
4371 unsigned long tmp = strtoul (ia64_tls_size_string, &end, 10);
4372 if (*end || (tmp != 14 && tmp != 22 && tmp != 64))
4373 error ("bad value (%s) for -mtls-size= switch", ia64_tls_size_string);
4374 else
4375 ia64_tls_size = tmp;
4376 }
4377
4378 ia64_flag_schedule_insns2 = flag_schedule_insns_after_reload;
4379 flag_schedule_insns_after_reload = 0;
4380
4381 ia64_section_threshold = g_switch_set ? g_switch_value : IA64_DEFAULT_GVALUE;
4382
4383 init_machine_status = ia64_init_machine_status;
4384
4385 /* Tell the compiler which flavor of TFmode we're using. */
4386 if (INTEL_EXTENDED_IEEE_FORMAT)
4387 real_format_for_mode[TFmode - QFmode] = &ieee_extended_intel_128_format;
4388 }
4389
4390 static enum attr_itanium_requires_unit0 ia64_safe_itanium_requires_unit0 PARAMS((rtx));
4391 static enum attr_itanium_class ia64_safe_itanium_class PARAMS((rtx));
4392 static enum attr_type ia64_safe_type PARAMS((rtx));
4393
4394 static enum attr_itanium_requires_unit0
ia64_safe_itanium_requires_unit0(insn)4395 ia64_safe_itanium_requires_unit0 (insn)
4396 rtx insn;
4397 {
4398 if (recog_memoized (insn) >= 0)
4399 return get_attr_itanium_requires_unit0 (insn);
4400 else
4401 return ITANIUM_REQUIRES_UNIT0_NO;
4402 }
4403
4404 static enum attr_itanium_class
ia64_safe_itanium_class(insn)4405 ia64_safe_itanium_class (insn)
4406 rtx insn;
4407 {
4408 if (recog_memoized (insn) >= 0)
4409 return get_attr_itanium_class (insn);
4410 else
4411 return ITANIUM_CLASS_UNKNOWN;
4412 }
4413
4414 static enum attr_type
ia64_safe_type(insn)4415 ia64_safe_type (insn)
4416 rtx insn;
4417 {
4418 if (recog_memoized (insn) >= 0)
4419 return get_attr_type (insn);
4420 else
4421 return TYPE_UNKNOWN;
4422 }
4423
4424 /* The following collection of routines emit instruction group stop bits as
4425 necessary to avoid dependencies. */
4426
4427 /* Need to track some additional registers as far as serialization is
4428 concerned so we can properly handle br.call and br.ret. We could
4429 make these registers visible to gcc, but since these registers are
4430 never explicitly used in gcc generated code, it seems wasteful to
4431 do so (plus it would make the call and return patterns needlessly
4432 complex). */
4433 #define REG_GP (GR_REG (1))
4434 #define REG_RP (BR_REG (0))
4435 #define REG_AR_CFM (FIRST_PSEUDO_REGISTER + 1)
4436 /* This is used for volatile asms which may require a stop bit immediately
4437 before and after them. */
4438 #define REG_VOLATILE (FIRST_PSEUDO_REGISTER + 2)
4439 #define AR_UNAT_BIT_0 (FIRST_PSEUDO_REGISTER + 3)
4440 #define NUM_REGS (AR_UNAT_BIT_0 + 64)
4441
4442 /* For each register, we keep track of how it has been written in the
4443 current instruction group.
4444
4445 If a register is written unconditionally (no qualifying predicate),
4446 WRITE_COUNT is set to 2 and FIRST_PRED is ignored.
4447
4448 If a register is written if its qualifying predicate P is true, we
4449 set WRITE_COUNT to 1 and FIRST_PRED to P. Later on, the same register
4450 may be written again by the complement of P (P^1) and when this happens,
4451 WRITE_COUNT gets set to 2.
4452
4453 The result of this is that whenever an insn attempts to write a register
4454 whose WRITE_COUNT is two, we need to issue an insn group barrier first.
4455
4456 If a predicate register is written by a floating-point insn, we set
4457 WRITTEN_BY_FP to true.
4458
4459 If a predicate register is written by an AND.ORCM we set WRITTEN_BY_AND
4460 to true; if it was written by an OR.ANDCM we set WRITTEN_BY_OR to true. */
4461
4462 struct reg_write_state
4463 {
4464 unsigned int write_count : 2;
4465 unsigned int first_pred : 16;
4466 unsigned int written_by_fp : 1;
4467 unsigned int written_by_and : 1;
4468 unsigned int written_by_or : 1;
4469 };
4470
4471 /* Cumulative info for the current instruction group. */
4472 struct reg_write_state rws_sum[NUM_REGS];
4473 /* Info for the current instruction. This gets copied to rws_sum after a
4474 stop bit is emitted. */
4475 struct reg_write_state rws_insn[NUM_REGS];
4476
4477 /* Indicates whether this is the first instruction after a stop bit,
4478 in which case we don't need another stop bit. Without this, we hit
4479 the abort in ia64_variable_issue when scheduling an alloc. */
4480 static int first_instruction;
4481
4482 /* Misc flags needed to compute RAW/WAW dependencies while we are traversing
4483 RTL for one instruction. */
4484 struct reg_flags
4485 {
4486 unsigned int is_write : 1; /* Is register being written? */
4487 unsigned int is_fp : 1; /* Is register used as part of an fp op? */
4488 unsigned int is_branch : 1; /* Is register used as part of a branch? */
4489 unsigned int is_and : 1; /* Is register used as part of and.orcm? */
4490 unsigned int is_or : 1; /* Is register used as part of or.andcm? */
4491 unsigned int is_sibcall : 1; /* Is this a sibling or normal call? */
4492 };
4493
4494 static void rws_update PARAMS ((struct reg_write_state *, int,
4495 struct reg_flags, int));
4496 static int rws_access_regno PARAMS ((int, struct reg_flags, int));
4497 static int rws_access_reg PARAMS ((rtx, struct reg_flags, int));
4498 static void update_set_flags PARAMS ((rtx, struct reg_flags *, int *, rtx *));
4499 static int set_src_needs_barrier PARAMS ((rtx, struct reg_flags, int, rtx));
4500 static int rtx_needs_barrier PARAMS ((rtx, struct reg_flags, int));
4501 static void init_insn_group_barriers PARAMS ((void));
4502 static int group_barrier_needed_p PARAMS ((rtx));
4503 static int safe_group_barrier_needed_p PARAMS ((rtx));
4504
4505 /* Update *RWS for REGNO, which is being written by the current instruction,
4506 with predicate PRED, and associated register flags in FLAGS. */
4507
4508 static void
rws_update(rws,regno,flags,pred)4509 rws_update (rws, regno, flags, pred)
4510 struct reg_write_state *rws;
4511 int regno;
4512 struct reg_flags flags;
4513 int pred;
4514 {
4515 if (pred)
4516 rws[regno].write_count++;
4517 else
4518 rws[regno].write_count = 2;
4519 rws[regno].written_by_fp |= flags.is_fp;
4520 /* ??? Not tracking and/or across differing predicates. */
4521 rws[regno].written_by_and = flags.is_and;
4522 rws[regno].written_by_or = flags.is_or;
4523 rws[regno].first_pred = pred;
4524 }
4525
4526 /* Handle an access to register REGNO of type FLAGS using predicate register
4527 PRED. Update rws_insn and rws_sum arrays. Return 1 if this access creates
4528 a dependency with an earlier instruction in the same group. */
4529
4530 static int
rws_access_regno(regno,flags,pred)4531 rws_access_regno (regno, flags, pred)
4532 int regno;
4533 struct reg_flags flags;
4534 int pred;
4535 {
4536 int need_barrier = 0;
4537
4538 if (regno >= NUM_REGS)
4539 abort ();
4540
4541 if (! PR_REGNO_P (regno))
4542 flags.is_and = flags.is_or = 0;
4543
4544 if (flags.is_write)
4545 {
4546 int write_count;
4547
4548 /* One insn writes same reg multiple times? */
4549 if (rws_insn[regno].write_count > 0)
4550 abort ();
4551
4552 /* Update info for current instruction. */
4553 rws_update (rws_insn, regno, flags, pred);
4554 write_count = rws_sum[regno].write_count;
4555
4556 switch (write_count)
4557 {
4558 case 0:
4559 /* The register has not been written yet. */
4560 rws_update (rws_sum, regno, flags, pred);
4561 break;
4562
4563 case 1:
4564 /* The register has been written via a predicate. If this is
4565 not a complementary predicate, then we need a barrier. */
4566 /* ??? This assumes that P and P+1 are always complementary
4567 predicates for P even. */
4568 if (flags.is_and && rws_sum[regno].written_by_and)
4569 ;
4570 else if (flags.is_or && rws_sum[regno].written_by_or)
4571 ;
4572 else if ((rws_sum[regno].first_pred ^ 1) != pred)
4573 need_barrier = 1;
4574 rws_update (rws_sum, regno, flags, pred);
4575 break;
4576
4577 case 2:
4578 /* The register has been unconditionally written already. We
4579 need a barrier. */
4580 if (flags.is_and && rws_sum[regno].written_by_and)
4581 ;
4582 else if (flags.is_or && rws_sum[regno].written_by_or)
4583 ;
4584 else
4585 need_barrier = 1;
4586 rws_sum[regno].written_by_and = flags.is_and;
4587 rws_sum[regno].written_by_or = flags.is_or;
4588 break;
4589
4590 default:
4591 abort ();
4592 }
4593 }
4594 else
4595 {
4596 if (flags.is_branch)
4597 {
4598 /* Branches have several RAW exceptions that allow to avoid
4599 barriers. */
4600
4601 if (REGNO_REG_CLASS (regno) == BR_REGS || regno == AR_PFS_REGNUM)
4602 /* RAW dependencies on branch regs are permissible as long
4603 as the writer is a non-branch instruction. Since we
4604 never generate code that uses a branch register written
4605 by a branch instruction, handling this case is
4606 easy. */
4607 return 0;
4608
4609 if (REGNO_REG_CLASS (regno) == PR_REGS
4610 && ! rws_sum[regno].written_by_fp)
4611 /* The predicates of a branch are available within the
4612 same insn group as long as the predicate was written by
4613 something other than a floating-point instruction. */
4614 return 0;
4615 }
4616
4617 if (flags.is_and && rws_sum[regno].written_by_and)
4618 return 0;
4619 if (flags.is_or && rws_sum[regno].written_by_or)
4620 return 0;
4621
4622 switch (rws_sum[regno].write_count)
4623 {
4624 case 0:
4625 /* The register has not been written yet. */
4626 break;
4627
4628 case 1:
4629 /* The register has been written via a predicate. If this is
4630 not a complementary predicate, then we need a barrier. */
4631 /* ??? This assumes that P and P+1 are always complementary
4632 predicates for P even. */
4633 if ((rws_sum[regno].first_pred ^ 1) != pred)
4634 need_barrier = 1;
4635 break;
4636
4637 case 2:
4638 /* The register has been unconditionally written already. We
4639 need a barrier. */
4640 need_barrier = 1;
4641 break;
4642
4643 default:
4644 abort ();
4645 }
4646 }
4647
4648 return need_barrier;
4649 }
4650
4651 static int
rws_access_reg(reg,flags,pred)4652 rws_access_reg (reg, flags, pred)
4653 rtx reg;
4654 struct reg_flags flags;
4655 int pred;
4656 {
4657 int regno = REGNO (reg);
4658 int n = HARD_REGNO_NREGS (REGNO (reg), GET_MODE (reg));
4659
4660 if (n == 1)
4661 return rws_access_regno (regno, flags, pred);
4662 else
4663 {
4664 int need_barrier = 0;
4665 while (--n >= 0)
4666 need_barrier |= rws_access_regno (regno + n, flags, pred);
4667 return need_barrier;
4668 }
4669 }
4670
4671 /* Examine X, which is a SET rtx, and update the flags, the predicate, and
4672 the condition, stored in *PFLAGS, *PPRED and *PCOND. */
4673
4674 static void
update_set_flags(x,pflags,ppred,pcond)4675 update_set_flags (x, pflags, ppred, pcond)
4676 rtx x;
4677 struct reg_flags *pflags;
4678 int *ppred;
4679 rtx *pcond;
4680 {
4681 rtx src = SET_SRC (x);
4682
4683 *pcond = 0;
4684
4685 switch (GET_CODE (src))
4686 {
4687 case CALL:
4688 return;
4689
4690 case IF_THEN_ELSE:
4691 if (SET_DEST (x) == pc_rtx)
4692 /* X is a conditional branch. */
4693 return;
4694 else
4695 {
4696 int is_complemented = 0;
4697
4698 /* X is a conditional move. */
4699 rtx cond = XEXP (src, 0);
4700 if (GET_CODE (cond) == EQ)
4701 is_complemented = 1;
4702 cond = XEXP (cond, 0);
4703 if (GET_CODE (cond) != REG
4704 && REGNO_REG_CLASS (REGNO (cond)) != PR_REGS)
4705 abort ();
4706 *pcond = cond;
4707 if (XEXP (src, 1) == SET_DEST (x)
4708 || XEXP (src, 2) == SET_DEST (x))
4709 {
4710 /* X is a conditional move that conditionally writes the
4711 destination. */
4712
4713 /* We need another complement in this case. */
4714 if (XEXP (src, 1) == SET_DEST (x))
4715 is_complemented = ! is_complemented;
4716
4717 *ppred = REGNO (cond);
4718 if (is_complemented)
4719 ++*ppred;
4720 }
4721
4722 /* ??? If this is a conditional write to the dest, then this
4723 instruction does not actually read one source. This probably
4724 doesn't matter, because that source is also the dest. */
4725 /* ??? Multiple writes to predicate registers are allowed
4726 if they are all AND type compares, or if they are all OR
4727 type compares. We do not generate such instructions
4728 currently. */
4729 }
4730 /* ... fall through ... */
4731
4732 default:
4733 if (GET_RTX_CLASS (GET_CODE (src)) == '<'
4734 && GET_MODE_CLASS (GET_MODE (XEXP (src, 0))) == MODE_FLOAT)
4735 /* Set pflags->is_fp to 1 so that we know we're dealing
4736 with a floating point comparison when processing the
4737 destination of the SET. */
4738 pflags->is_fp = 1;
4739
4740 /* Discover if this is a parallel comparison. We only handle
4741 and.orcm and or.andcm at present, since we must retain a
4742 strict inverse on the predicate pair. */
4743 else if (GET_CODE (src) == AND)
4744 pflags->is_and = 1;
4745 else if (GET_CODE (src) == IOR)
4746 pflags->is_or = 1;
4747
4748 break;
4749 }
4750 }
4751
4752 /* Subroutine of rtx_needs_barrier; this function determines whether the
4753 source of a given SET rtx found in X needs a barrier. FLAGS and PRED
4754 are as in rtx_needs_barrier. COND is an rtx that holds the condition
4755 for this insn. */
4756
4757 static int
set_src_needs_barrier(x,flags,pred,cond)4758 set_src_needs_barrier (x, flags, pred, cond)
4759 rtx x;
4760 struct reg_flags flags;
4761 int pred;
4762 rtx cond;
4763 {
4764 int need_barrier = 0;
4765 rtx dst;
4766 rtx src = SET_SRC (x);
4767
4768 if (GET_CODE (src) == CALL)
4769 /* We don't need to worry about the result registers that
4770 get written by subroutine call. */
4771 return rtx_needs_barrier (src, flags, pred);
4772 else if (SET_DEST (x) == pc_rtx)
4773 {
4774 /* X is a conditional branch. */
4775 /* ??? This seems redundant, as the caller sets this bit for
4776 all JUMP_INSNs. */
4777 flags.is_branch = 1;
4778 return rtx_needs_barrier (src, flags, pred);
4779 }
4780
4781 need_barrier = rtx_needs_barrier (src, flags, pred);
4782
4783 /* This instruction unconditionally uses a predicate register. */
4784 if (cond)
4785 need_barrier |= rws_access_reg (cond, flags, 0);
4786
4787 dst = SET_DEST (x);
4788 if (GET_CODE (dst) == ZERO_EXTRACT)
4789 {
4790 need_barrier |= rtx_needs_barrier (XEXP (dst, 1), flags, pred);
4791 need_barrier |= rtx_needs_barrier (XEXP (dst, 2), flags, pred);
4792 dst = XEXP (dst, 0);
4793 }
4794 return need_barrier;
4795 }
4796
4797 /* Handle an access to rtx X of type FLAGS using predicate register PRED.
4798 Return 1 is this access creates a dependency with an earlier instruction
4799 in the same group. */
4800
4801 static int
rtx_needs_barrier(x,flags,pred)4802 rtx_needs_barrier (x, flags, pred)
4803 rtx x;
4804 struct reg_flags flags;
4805 int pred;
4806 {
4807 int i, j;
4808 int is_complemented = 0;
4809 int need_barrier = 0;
4810 const char *format_ptr;
4811 struct reg_flags new_flags;
4812 rtx cond = 0;
4813
4814 if (! x)
4815 return 0;
4816
4817 new_flags = flags;
4818
4819 switch (GET_CODE (x))
4820 {
4821 case SET:
4822 update_set_flags (x, &new_flags, &pred, &cond);
4823 need_barrier = set_src_needs_barrier (x, new_flags, pred, cond);
4824 if (GET_CODE (SET_SRC (x)) != CALL)
4825 {
4826 new_flags.is_write = 1;
4827 need_barrier |= rtx_needs_barrier (SET_DEST (x), new_flags, pred);
4828 }
4829 break;
4830
4831 case CALL:
4832 new_flags.is_write = 0;
4833 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
4834
4835 /* Avoid multiple register writes, in case this is a pattern with
4836 multiple CALL rtx. This avoids an abort in rws_access_reg. */
4837 if (! flags.is_sibcall && ! rws_insn[REG_AR_CFM].write_count)
4838 {
4839 new_flags.is_write = 1;
4840 need_barrier |= rws_access_regno (REG_RP, new_flags, pred);
4841 need_barrier |= rws_access_regno (AR_PFS_REGNUM, new_flags, pred);
4842 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
4843 }
4844 break;
4845
4846 case COND_EXEC:
4847 /* X is a predicated instruction. */
4848
4849 cond = COND_EXEC_TEST (x);
4850 if (pred)
4851 abort ();
4852 need_barrier = rtx_needs_barrier (cond, flags, 0);
4853
4854 if (GET_CODE (cond) == EQ)
4855 is_complemented = 1;
4856 cond = XEXP (cond, 0);
4857 if (GET_CODE (cond) != REG
4858 && REGNO_REG_CLASS (REGNO (cond)) != PR_REGS)
4859 abort ();
4860 pred = REGNO (cond);
4861 if (is_complemented)
4862 ++pred;
4863
4864 need_barrier |= rtx_needs_barrier (COND_EXEC_CODE (x), flags, pred);
4865 return need_barrier;
4866
4867 case CLOBBER:
4868 case USE:
4869 /* Clobber & use are for earlier compiler-phases only. */
4870 break;
4871
4872 case ASM_OPERANDS:
4873 case ASM_INPUT:
4874 /* We always emit stop bits for traditional asms. We emit stop bits
4875 for volatile extended asms if TARGET_VOL_ASM_STOP is true. */
4876 if (GET_CODE (x) != ASM_OPERANDS
4877 || (MEM_VOLATILE_P (x) && TARGET_VOL_ASM_STOP))
4878 {
4879 /* Avoid writing the register multiple times if we have multiple
4880 asm outputs. This avoids an abort in rws_access_reg. */
4881 if (! rws_insn[REG_VOLATILE].write_count)
4882 {
4883 new_flags.is_write = 1;
4884 rws_access_regno (REG_VOLATILE, new_flags, pred);
4885 }
4886 return 1;
4887 }
4888
4889 /* For all ASM_OPERANDS, we must traverse the vector of input operands.
4890 We can not just fall through here since then we would be confused
4891 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
4892 traditional asms unlike their normal usage. */
4893
4894 for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; --i)
4895 if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x, i), flags, pred))
4896 need_barrier = 1;
4897 break;
4898
4899 case PARALLEL:
4900 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
4901 {
4902 rtx pat = XVECEXP (x, 0, i);
4903 if (GET_CODE (pat) == SET)
4904 {
4905 update_set_flags (pat, &new_flags, &pred, &cond);
4906 need_barrier |= set_src_needs_barrier (pat, new_flags, pred, cond);
4907 }
4908 else if (GET_CODE (pat) == USE
4909 || GET_CODE (pat) == CALL
4910 || GET_CODE (pat) == ASM_OPERANDS)
4911 need_barrier |= rtx_needs_barrier (pat, flags, pred);
4912 else if (GET_CODE (pat) != CLOBBER && GET_CODE (pat) != RETURN)
4913 abort ();
4914 }
4915 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
4916 {
4917 rtx pat = XVECEXP (x, 0, i);
4918 if (GET_CODE (pat) == SET)
4919 {
4920 if (GET_CODE (SET_SRC (pat)) != CALL)
4921 {
4922 new_flags.is_write = 1;
4923 need_barrier |= rtx_needs_barrier (SET_DEST (pat), new_flags,
4924 pred);
4925 }
4926 }
4927 else if (GET_CODE (pat) == CLOBBER || GET_CODE (pat) == RETURN)
4928 need_barrier |= rtx_needs_barrier (pat, flags, pred);
4929 }
4930 break;
4931
4932 case SUBREG:
4933 x = SUBREG_REG (x);
4934 /* FALLTHRU */
4935 case REG:
4936 if (REGNO (x) == AR_UNAT_REGNUM)
4937 {
4938 for (i = 0; i < 64; ++i)
4939 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + i, flags, pred);
4940 }
4941 else
4942 need_barrier = rws_access_reg (x, flags, pred);
4943 break;
4944
4945 case MEM:
4946 /* Find the regs used in memory address computation. */
4947 new_flags.is_write = 0;
4948 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
4949 break;
4950
4951 case CONST_INT: case CONST_DOUBLE:
4952 case SYMBOL_REF: case LABEL_REF: case CONST:
4953 break;
4954
4955 /* Operators with side-effects. */
4956 case POST_INC: case POST_DEC:
4957 if (GET_CODE (XEXP (x, 0)) != REG)
4958 abort ();
4959
4960 new_flags.is_write = 0;
4961 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
4962 new_flags.is_write = 1;
4963 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
4964 break;
4965
4966 case POST_MODIFY:
4967 if (GET_CODE (XEXP (x, 0)) != REG)
4968 abort ();
4969
4970 new_flags.is_write = 0;
4971 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
4972 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
4973 new_flags.is_write = 1;
4974 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
4975 break;
4976
4977 /* Handle common unary and binary ops for efficiency. */
4978 case COMPARE: case PLUS: case MINUS: case MULT: case DIV:
4979 case MOD: case UDIV: case UMOD: case AND: case IOR:
4980 case XOR: case ASHIFT: case ROTATE: case ASHIFTRT: case LSHIFTRT:
4981 case ROTATERT: case SMIN: case SMAX: case UMIN: case UMAX:
4982 case NE: case EQ: case GE: case GT: case LE:
4983 case LT: case GEU: case GTU: case LEU: case LTU:
4984 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
4985 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
4986 break;
4987
4988 case NEG: case NOT: case SIGN_EXTEND: case ZERO_EXTEND:
4989 case TRUNCATE: case FLOAT_EXTEND: case FLOAT_TRUNCATE: case FLOAT:
4990 case FIX: case UNSIGNED_FLOAT: case UNSIGNED_FIX: case ABS:
4991 case SQRT: case FFS:
4992 need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
4993 break;
4994
4995 case UNSPEC:
4996 switch (XINT (x, 1))
4997 {
4998 case UNSPEC_LTOFF_DTPMOD:
4999 case UNSPEC_LTOFF_DTPREL:
5000 case UNSPEC_DTPREL:
5001 case UNSPEC_LTOFF_TPREL:
5002 case UNSPEC_TPREL:
5003 case UNSPEC_PRED_REL_MUTEX:
5004 case UNSPEC_PIC_CALL:
5005 case UNSPEC_MF:
5006 case UNSPEC_FETCHADD_ACQ:
5007 case UNSPEC_BSP_VALUE:
5008 case UNSPEC_FLUSHRS:
5009 case UNSPEC_BUNDLE_SELECTOR:
5010 break;
5011
5012 case UNSPEC_GR_SPILL:
5013 case UNSPEC_GR_RESTORE:
5014 {
5015 HOST_WIDE_INT offset = INTVAL (XVECEXP (x, 0, 1));
5016 HOST_WIDE_INT bit = (offset >> 3) & 63;
5017
5018 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
5019 new_flags.is_write = (XINT (x, 1) == 1);
5020 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + bit,
5021 new_flags, pred);
5022 break;
5023 }
5024
5025 case UNSPEC_FR_SPILL:
5026 case UNSPEC_FR_RESTORE:
5027 case UNSPEC_POPCNT:
5028 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
5029 break;
5030
5031 case UNSPEC_ADDP4:
5032 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
5033 break;
5034
5035 case UNSPEC_FR_RECIP_APPROX:
5036 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
5037 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
5038 break;
5039
5040 case UNSPEC_CMPXCHG_ACQ:
5041 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
5042 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 2), flags, pred);
5043 break;
5044
5045 default:
5046 abort ();
5047 }
5048 break;
5049
5050 case UNSPEC_VOLATILE:
5051 switch (XINT (x, 1))
5052 {
5053 case UNSPECV_ALLOC:
5054 /* Alloc must always be the first instruction of a group.
5055 We force this by always returning true. */
5056 /* ??? We might get better scheduling if we explicitly check for
5057 input/local/output register dependencies, and modify the
5058 scheduler so that alloc is always reordered to the start of
5059 the current group. We could then eliminate all of the
5060 first_instruction code. */
5061 rws_access_regno (AR_PFS_REGNUM, flags, pred);
5062
5063 new_flags.is_write = 1;
5064 rws_access_regno (REG_AR_CFM, new_flags, pred);
5065 return 1;
5066
5067 case UNSPECV_SET_BSP:
5068 need_barrier = 1;
5069 break;
5070
5071 case UNSPECV_BLOCKAGE:
5072 case UNSPECV_INSN_GROUP_BARRIER:
5073 case UNSPECV_BREAK:
5074 case UNSPECV_PSAC_ALL:
5075 case UNSPECV_PSAC_NORMAL:
5076 return 0;
5077
5078 default:
5079 abort ();
5080 }
5081 break;
5082
5083 case RETURN:
5084 new_flags.is_write = 0;
5085 need_barrier = rws_access_regno (REG_RP, flags, pred);
5086 need_barrier |= rws_access_regno (AR_PFS_REGNUM, flags, pred);
5087
5088 new_flags.is_write = 1;
5089 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
5090 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
5091 break;
5092
5093 default:
5094 format_ptr = GET_RTX_FORMAT (GET_CODE (x));
5095 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
5096 switch (format_ptr[i])
5097 {
5098 case '0': /* unused field */
5099 case 'i': /* integer */
5100 case 'n': /* note */
5101 case 'w': /* wide integer */
5102 case 's': /* pointer to string */
5103 case 'S': /* optional pointer to string */
5104 break;
5105
5106 case 'e':
5107 if (rtx_needs_barrier (XEXP (x, i), flags, pred))
5108 need_barrier = 1;
5109 break;
5110
5111 case 'E':
5112 for (j = XVECLEN (x, i) - 1; j >= 0; --j)
5113 if (rtx_needs_barrier (XVECEXP (x, i, j), flags, pred))
5114 need_barrier = 1;
5115 break;
5116
5117 default:
5118 abort ();
5119 }
5120 break;
5121 }
5122 return need_barrier;
5123 }
5124
5125 /* Clear out the state for group_barrier_needed_p at the start of a
5126 sequence of insns. */
5127
5128 static void
init_insn_group_barriers()5129 init_insn_group_barriers ()
5130 {
5131 memset (rws_sum, 0, sizeof (rws_sum));
5132 first_instruction = 1;
5133 }
5134
5135 /* Given the current state, recorded by previous calls to this function,
5136 determine whether a group barrier (a stop bit) is necessary before INSN.
5137 Return nonzero if so. */
5138
5139 static int
group_barrier_needed_p(insn)5140 group_barrier_needed_p (insn)
5141 rtx insn;
5142 {
5143 rtx pat;
5144 int need_barrier = 0;
5145 struct reg_flags flags;
5146
5147 memset (&flags, 0, sizeof (flags));
5148 switch (GET_CODE (insn))
5149 {
5150 case NOTE:
5151 break;
5152
5153 case BARRIER:
5154 /* A barrier doesn't imply an instruction group boundary. */
5155 break;
5156
5157 case CODE_LABEL:
5158 memset (rws_insn, 0, sizeof (rws_insn));
5159 return 1;
5160
5161 case CALL_INSN:
5162 flags.is_branch = 1;
5163 flags.is_sibcall = SIBLING_CALL_P (insn);
5164 memset (rws_insn, 0, sizeof (rws_insn));
5165
5166 /* Don't bundle a call following another call. */
5167 if ((pat = prev_active_insn (insn))
5168 && GET_CODE (pat) == CALL_INSN)
5169 {
5170 need_barrier = 1;
5171 break;
5172 }
5173
5174 need_barrier = rtx_needs_barrier (PATTERN (insn), flags, 0);
5175 break;
5176
5177 case JUMP_INSN:
5178 flags.is_branch = 1;
5179
5180 /* Don't bundle a jump following a call. */
5181 if ((pat = prev_active_insn (insn))
5182 && GET_CODE (pat) == CALL_INSN)
5183 {
5184 need_barrier = 1;
5185 break;
5186 }
5187 /* FALLTHRU */
5188
5189 case INSN:
5190 if (GET_CODE (PATTERN (insn)) == USE
5191 || GET_CODE (PATTERN (insn)) == CLOBBER)
5192 /* Don't care about USE and CLOBBER "insns"---those are used to
5193 indicate to the optimizer that it shouldn't get rid of
5194 certain operations. */
5195 break;
5196
5197 pat = PATTERN (insn);
5198
5199 /* Ug. Hack hacks hacked elsewhere. */
5200 switch (recog_memoized (insn))
5201 {
5202 /* We play dependency tricks with the epilogue in order
5203 to get proper schedules. Undo this for dv analysis. */
5204 case CODE_FOR_epilogue_deallocate_stack:
5205 case CODE_FOR_prologue_allocate_stack:
5206 pat = XVECEXP (pat, 0, 0);
5207 break;
5208
5209 /* The pattern we use for br.cloop confuses the code above.
5210 The second element of the vector is representative. */
5211 case CODE_FOR_doloop_end_internal:
5212 pat = XVECEXP (pat, 0, 1);
5213 break;
5214
5215 /* Doesn't generate code. */
5216 case CODE_FOR_pred_rel_mutex:
5217 case CODE_FOR_prologue_use:
5218 return 0;
5219
5220 default:
5221 break;
5222 }
5223
5224 memset (rws_insn, 0, sizeof (rws_insn));
5225 need_barrier = rtx_needs_barrier (pat, flags, 0);
5226
5227 /* Check to see if the previous instruction was a volatile
5228 asm. */
5229 if (! need_barrier)
5230 need_barrier = rws_access_regno (REG_VOLATILE, flags, 0);
5231 break;
5232
5233 default:
5234 abort ();
5235 }
5236
5237 if (first_instruction)
5238 {
5239 need_barrier = 0;
5240 first_instruction = 0;
5241 }
5242
5243 return need_barrier;
5244 }
5245
5246 /* Like group_barrier_needed_p, but do not clobber the current state. */
5247
5248 static int
safe_group_barrier_needed_p(insn)5249 safe_group_barrier_needed_p (insn)
5250 rtx insn;
5251 {
5252 struct reg_write_state rws_saved[NUM_REGS];
5253 int saved_first_instruction;
5254 int t;
5255
5256 memcpy (rws_saved, rws_sum, NUM_REGS * sizeof *rws_saved);
5257 saved_first_instruction = first_instruction;
5258
5259 t = group_barrier_needed_p (insn);
5260
5261 memcpy (rws_sum, rws_saved, NUM_REGS * sizeof *rws_saved);
5262 first_instruction = saved_first_instruction;
5263
5264 return t;
5265 }
5266
5267 /* INSNS is an chain of instructions. Scan the chain, and insert stop bits
5268 as necessary to eliminate dependendencies. This function assumes that
5269 a final instruction scheduling pass has been run which has already
5270 inserted most of the necessary stop bits. This function only inserts
5271 new ones at basic block boundaries, since these are invisible to the
5272 scheduler. */
5273
5274 static void
emit_insn_group_barriers(dump,insns)5275 emit_insn_group_barriers (dump, insns)
5276 FILE *dump;
5277 rtx insns;
5278 {
5279 rtx insn;
5280 rtx last_label = 0;
5281 int insns_since_last_label = 0;
5282
5283 init_insn_group_barriers ();
5284
5285 for (insn = insns; insn; insn = NEXT_INSN (insn))
5286 {
5287 if (GET_CODE (insn) == CODE_LABEL)
5288 {
5289 if (insns_since_last_label)
5290 last_label = insn;
5291 insns_since_last_label = 0;
5292 }
5293 else if (GET_CODE (insn) == NOTE
5294 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK)
5295 {
5296 if (insns_since_last_label)
5297 last_label = insn;
5298 insns_since_last_label = 0;
5299 }
5300 else if (GET_CODE (insn) == INSN
5301 && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
5302 && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
5303 {
5304 init_insn_group_barriers ();
5305 last_label = 0;
5306 }
5307 else if (INSN_P (insn))
5308 {
5309 insns_since_last_label = 1;
5310
5311 if (group_barrier_needed_p (insn))
5312 {
5313 if (last_label)
5314 {
5315 if (dump)
5316 fprintf (dump, "Emitting stop before label %d\n",
5317 INSN_UID (last_label));
5318 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), last_label);
5319 insn = last_label;
5320
5321 init_insn_group_barriers ();
5322 last_label = 0;
5323 }
5324 }
5325 }
5326 }
5327 }
5328
5329 /* Like emit_insn_group_barriers, but run if no final scheduling pass was run.
5330 This function has to emit all necessary group barriers. */
5331
5332 static void
emit_all_insn_group_barriers(dump,insns)5333 emit_all_insn_group_barriers (dump, insns)
5334 FILE *dump ATTRIBUTE_UNUSED;
5335 rtx insns;
5336 {
5337 rtx insn;
5338
5339 init_insn_group_barriers ();
5340
5341 for (insn = insns; insn; insn = NEXT_INSN (insn))
5342 {
5343 if (GET_CODE (insn) == BARRIER)
5344 {
5345 rtx last = prev_active_insn (insn);
5346
5347 if (! last)
5348 continue;
5349 if (GET_CODE (last) == JUMP_INSN
5350 && GET_CODE (PATTERN (last)) == ADDR_DIFF_VEC)
5351 last = prev_active_insn (last);
5352 if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
5353 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
5354
5355 init_insn_group_barriers ();
5356 }
5357 else if (INSN_P (insn))
5358 {
5359 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
5360 init_insn_group_barriers ();
5361 else if (group_barrier_needed_p (insn))
5362 {
5363 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
5364 init_insn_group_barriers ();
5365 group_barrier_needed_p (insn);
5366 }
5367 }
5368 }
5369 }
5370
5371 static int errata_find_address_regs PARAMS ((rtx *, void *));
5372 static void errata_emit_nops PARAMS ((rtx));
5373 static void fixup_errata PARAMS ((void));
5374
5375 /* This structure is used to track some details about the previous insns
5376 groups so we can determine if it may be necessary to insert NOPs to
5377 workaround hardware errata. */
5378 static struct group
5379 {
5380 HARD_REG_SET p_reg_set;
5381 HARD_REG_SET gr_reg_conditionally_set;
5382 } last_group[2];
5383
5384 /* Index into the last_group array. */
5385 static int group_idx;
5386
5387 /* Called through for_each_rtx; determines if a hard register that was
5388 conditionally set in the previous group is used as an address register.
5389 It ensures that for_each_rtx returns 1 in that case. */
5390 static int
errata_find_address_regs(xp,data)5391 errata_find_address_regs (xp, data)
5392 rtx *xp;
5393 void *data ATTRIBUTE_UNUSED;
5394 {
5395 rtx x = *xp;
5396 if (GET_CODE (x) != MEM)
5397 return 0;
5398 x = XEXP (x, 0);
5399 if (GET_CODE (x) == POST_MODIFY)
5400 x = XEXP (x, 0);
5401 if (GET_CODE (x) == REG)
5402 {
5403 struct group *prev_group = last_group + (group_idx ^ 1);
5404 if (TEST_HARD_REG_BIT (prev_group->gr_reg_conditionally_set,
5405 REGNO (x)))
5406 return 1;
5407 return -1;
5408 }
5409 return 0;
5410 }
5411
5412 /* Called for each insn; this function keeps track of the state in
5413 last_group and emits additional NOPs if necessary to work around
5414 an Itanium A/B step erratum. */
5415 static void
errata_emit_nops(insn)5416 errata_emit_nops (insn)
5417 rtx insn;
5418 {
5419 struct group *this_group = last_group + group_idx;
5420 struct group *prev_group = last_group + (group_idx ^ 1);
5421 rtx pat = PATTERN (insn);
5422 rtx cond = GET_CODE (pat) == COND_EXEC ? COND_EXEC_TEST (pat) : 0;
5423 rtx real_pat = cond ? COND_EXEC_CODE (pat) : pat;
5424 enum attr_type type;
5425 rtx set = real_pat;
5426
5427 if (GET_CODE (real_pat) == USE
5428 || GET_CODE (real_pat) == CLOBBER
5429 || GET_CODE (real_pat) == ASM_INPUT
5430 || GET_CODE (real_pat) == ADDR_VEC
5431 || GET_CODE (real_pat) == ADDR_DIFF_VEC
5432 || asm_noperands (PATTERN (insn)) >= 0)
5433 return;
5434
5435 /* single_set doesn't work for COND_EXEC insns, so we have to duplicate
5436 parts of it. */
5437
5438 if (GET_CODE (set) == PARALLEL)
5439 {
5440 int i;
5441 set = XVECEXP (real_pat, 0, 0);
5442 for (i = 1; i < XVECLEN (real_pat, 0); i++)
5443 if (GET_CODE (XVECEXP (real_pat, 0, i)) != USE
5444 && GET_CODE (XVECEXP (real_pat, 0, i)) != CLOBBER)
5445 {
5446 set = 0;
5447 break;
5448 }
5449 }
5450
5451 if (set && GET_CODE (set) != SET)
5452 set = 0;
5453
5454 type = get_attr_type (insn);
5455
5456 if (type == TYPE_F
5457 && set && REG_P (SET_DEST (set)) && PR_REGNO_P (REGNO (SET_DEST (set))))
5458 SET_HARD_REG_BIT (this_group->p_reg_set, REGNO (SET_DEST (set)));
5459
5460 if ((type == TYPE_M || type == TYPE_A) && cond && set
5461 && REG_P (SET_DEST (set))
5462 && GET_CODE (SET_SRC (set)) != PLUS
5463 && GET_CODE (SET_SRC (set)) != MINUS
5464 && (GET_CODE (SET_SRC (set)) != ASHIFT
5465 || !shladd_operand (XEXP (SET_SRC (set), 1), VOIDmode))
5466 && (GET_CODE (SET_SRC (set)) != MEM
5467 || GET_CODE (XEXP (SET_SRC (set), 0)) != POST_MODIFY)
5468 && GENERAL_REGNO_P (REGNO (SET_DEST (set))))
5469 {
5470 if (GET_RTX_CLASS (GET_CODE (cond)) != '<'
5471 || ! REG_P (XEXP (cond, 0)))
5472 abort ();
5473
5474 if (TEST_HARD_REG_BIT (prev_group->p_reg_set, REGNO (XEXP (cond, 0))))
5475 SET_HARD_REG_BIT (this_group->gr_reg_conditionally_set, REGNO (SET_DEST (set)));
5476 }
5477 if (for_each_rtx (&real_pat, errata_find_address_regs, NULL))
5478 {
5479 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
5480 emit_insn_before (gen_nop (), insn);
5481 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
5482 group_idx = 0;
5483 memset (last_group, 0, sizeof last_group);
5484 }
5485 }
5486
5487 /* Emit extra nops if they are required to work around hardware errata. */
5488
5489 static void
fixup_errata()5490 fixup_errata ()
5491 {
5492 rtx insn;
5493
5494 if (! TARGET_B_STEP)
5495 return;
5496
5497 group_idx = 0;
5498 memset (last_group, 0, sizeof last_group);
5499
5500 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
5501 {
5502 if (!INSN_P (insn))
5503 continue;
5504
5505 if (ia64_safe_type (insn) == TYPE_S)
5506 {
5507 group_idx ^= 1;
5508 memset (last_group + group_idx, 0, sizeof last_group[group_idx]);
5509 }
5510 else
5511 errata_emit_nops (insn);
5512 }
5513 }
5514
5515 /* Instruction scheduling support. */
5516 /* Describe one bundle. */
5517
5518 struct bundle
5519 {
5520 /* Zero if there's no possibility of a stop in this bundle other than
5521 at the end, otherwise the position of the optional stop bit. */
5522 int possible_stop;
5523 /* The types of the three slots. */
5524 enum attr_type t[3];
5525 /* The pseudo op to be emitted into the assembler output. */
5526 const char *name;
5527 };
5528
5529 #define NR_BUNDLES 10
5530
5531 /* A list of all available bundles. */
5532
5533 static const struct bundle bundle[NR_BUNDLES] =
5534 {
5535 { 2, { TYPE_M, TYPE_I, TYPE_I }, ".mii" },
5536 { 1, { TYPE_M, TYPE_M, TYPE_I }, ".mmi" },
5537 { 0, { TYPE_M, TYPE_F, TYPE_I }, ".mfi" },
5538 { 0, { TYPE_M, TYPE_M, TYPE_F }, ".mmf" },
5539 #if NR_BUNDLES == 10
5540 { 0, { TYPE_B, TYPE_B, TYPE_B }, ".bbb" },
5541 { 0, { TYPE_M, TYPE_B, TYPE_B }, ".mbb" },
5542 #endif
5543 { 0, { TYPE_M, TYPE_I, TYPE_B }, ".mib" },
5544 { 0, { TYPE_M, TYPE_M, TYPE_B }, ".mmb" },
5545 { 0, { TYPE_M, TYPE_F, TYPE_B }, ".mfb" },
5546 /* .mfi needs to occur earlier than .mlx, so that we only generate it if
5547 it matches an L type insn. Otherwise we'll try to generate L type
5548 nops. */
5549 { 0, { TYPE_M, TYPE_L, TYPE_X }, ".mlx" }
5550 };
5551
5552 /* Describe a packet of instructions. Packets consist of two bundles that
5553 are visible to the hardware in one scheduling window. */
5554
5555 struct ia64_packet
5556 {
5557 const struct bundle *t1, *t2;
5558 /* Precomputed value of the first split issue in this packet if a cycle
5559 starts at its beginning. */
5560 int first_split;
5561 /* For convenience, the insn types are replicated here so we don't have
5562 to go through T1 and T2 all the time. */
5563 enum attr_type t[6];
5564 };
5565
5566 /* An array containing all possible packets. */
5567 #define NR_PACKETS (NR_BUNDLES * NR_BUNDLES)
5568 static struct ia64_packet packets[NR_PACKETS];
5569
5570 /* Map attr_type to a string with the name. */
5571
5572 static const char *const type_names[] =
5573 {
5574 "UNKNOWN", "A", "I", "M", "F", "B", "L", "X", "S"
5575 };
5576
5577 /* Nonzero if we should insert stop bits into the schedule. */
5578 int ia64_final_schedule = 0;
5579
5580 static int itanium_split_issue PARAMS ((const struct ia64_packet *, int));
5581 static rtx ia64_single_set PARAMS ((rtx));
5582 static int insn_matches_slot PARAMS ((const struct ia64_packet *, enum attr_type, int, rtx));
5583 static void ia64_emit_insn_before PARAMS ((rtx, rtx));
5584 static void maybe_rotate PARAMS ((FILE *));
5585 static void finish_last_head PARAMS ((FILE *, int));
5586 static void rotate_one_bundle PARAMS ((FILE *));
5587 static void rotate_two_bundles PARAMS ((FILE *));
5588 static void nop_cycles_until PARAMS ((int, FILE *));
5589 static void cycle_end_fill_slots PARAMS ((FILE *));
5590 static int packet_matches_p PARAMS ((const struct ia64_packet *, int, int *));
5591 static int get_split PARAMS ((const struct ia64_packet *, int));
5592 static int find_best_insn PARAMS ((rtx *, enum attr_type *, int,
5593 const struct ia64_packet *, int));
5594 static void find_best_packet PARAMS ((int *, const struct ia64_packet **,
5595 rtx *, enum attr_type *, int));
5596 static int itanium_reorder PARAMS ((FILE *, rtx *, rtx *, int));
5597 static void dump_current_packet PARAMS ((FILE *));
5598 static void schedule_stop PARAMS ((FILE *));
5599 static rtx gen_nop_type PARAMS ((enum attr_type));
5600 static void ia64_emit_nops PARAMS ((void));
5601
5602 /* Map a bundle number to its pseudo-op. */
5603
5604 const char *
get_bundle_name(b)5605 get_bundle_name (b)
5606 int b;
5607 {
5608 return bundle[b].name;
5609 }
5610
5611 /* Compute the slot which will cause a split issue in packet P if the
5612 current cycle begins at slot BEGIN. */
5613
5614 static int
itanium_split_issue(p,begin)5615 itanium_split_issue (p, begin)
5616 const struct ia64_packet *p;
5617 int begin;
5618 {
5619 int type_count[TYPE_S];
5620 int i;
5621 int split = 6;
5622
5623 if (begin < 3)
5624 {
5625 /* Always split before and after MMF. */
5626 if (p->t[0] == TYPE_M && p->t[1] == TYPE_M && p->t[2] == TYPE_F)
5627 return 3;
5628 if (p->t[3] == TYPE_M && p->t[4] == TYPE_M && p->t[5] == TYPE_F)
5629 return 3;
5630 /* Always split after MBB and BBB. */
5631 if (p->t[1] == TYPE_B)
5632 return 3;
5633 /* Split after first bundle in MIB BBB combination. */
5634 if (p->t[2] == TYPE_B && p->t[3] == TYPE_B)
5635 return 3;
5636 }
5637
5638 memset (type_count, 0, sizeof type_count);
5639 for (i = begin; i < split; i++)
5640 {
5641 enum attr_type t0 = p->t[i];
5642 /* An MLX bundle reserves the same units as an MFI bundle. */
5643 enum attr_type t = (t0 == TYPE_L ? TYPE_F
5644 : t0 == TYPE_X ? TYPE_I
5645 : t0);
5646
5647 /* Itanium can execute up to 3 branches, 2 floating point, 2 memory, and
5648 2 integer per cycle. */
5649 int max = (t == TYPE_B ? 3 : 2);
5650 if (type_count[t] == max)
5651 return i;
5652
5653 type_count[t]++;
5654 }
5655 return split;
5656 }
5657
5658 /* Return the maximum number of instructions a cpu can issue. */
5659
5660 static int
ia64_issue_rate()5661 ia64_issue_rate ()
5662 {
5663 return 6;
5664 }
5665
5666 /* Helper function - like single_set, but look inside COND_EXEC. */
5667
5668 static rtx
ia64_single_set(insn)5669 ia64_single_set (insn)
5670 rtx insn;
5671 {
5672 rtx x = PATTERN (insn), ret;
5673 if (GET_CODE (x) == COND_EXEC)
5674 x = COND_EXEC_CODE (x);
5675 if (GET_CODE (x) == SET)
5676 return x;
5677
5678 /* Special case here prologue_allocate_stack and epilogue_deallocate_stack.
5679 Although they are not classical single set, the second set is there just
5680 to protect it from moving past FP-relative stack accesses. */
5681 switch (recog_memoized (insn))
5682 {
5683 case CODE_FOR_prologue_allocate_stack:
5684 case CODE_FOR_epilogue_deallocate_stack:
5685 ret = XVECEXP (x, 0, 0);
5686 break;
5687
5688 default:
5689 ret = single_set_2 (insn, x);
5690 break;
5691 }
5692
5693 return ret;
5694 }
5695
5696 /* Adjust the cost of a scheduling dependency. Return the new cost of
5697 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
5698
5699 static int
ia64_adjust_cost(insn,link,dep_insn,cost)5700 ia64_adjust_cost (insn, link, dep_insn, cost)
5701 rtx insn, link, dep_insn;
5702 int cost;
5703 {
5704 enum attr_type dep_type;
5705 enum attr_itanium_class dep_class;
5706 enum attr_itanium_class insn_class;
5707 rtx dep_set, set, src, addr;
5708
5709 if (GET_CODE (PATTERN (insn)) == CLOBBER
5710 || GET_CODE (PATTERN (insn)) == USE
5711 || GET_CODE (PATTERN (dep_insn)) == CLOBBER
5712 || GET_CODE (PATTERN (dep_insn)) == USE
5713 /* @@@ Not accurate for indirect calls. */
5714 || GET_CODE (insn) == CALL_INSN
5715 || ia64_safe_type (insn) == TYPE_S)
5716 return 0;
5717
5718 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT
5719 || REG_NOTE_KIND (link) == REG_DEP_ANTI)
5720 return 0;
5721
5722 dep_type = ia64_safe_type (dep_insn);
5723 dep_class = ia64_safe_itanium_class (dep_insn);
5724 insn_class = ia64_safe_itanium_class (insn);
5725
5726 /* Compares that feed a conditional branch can execute in the same
5727 cycle. */
5728 dep_set = ia64_single_set (dep_insn);
5729 set = ia64_single_set (insn);
5730
5731 if (dep_type != TYPE_F
5732 && dep_set
5733 && GET_CODE (SET_DEST (dep_set)) == REG
5734 && PR_REG (REGNO (SET_DEST (dep_set)))
5735 && GET_CODE (insn) == JUMP_INSN)
5736 return 0;
5737
5738 if (dep_set && GET_CODE (SET_DEST (dep_set)) == MEM)
5739 {
5740 /* ??? Can't find any information in the documenation about whether
5741 a sequence
5742 st [rx] = ra
5743 ld rb = [ry]
5744 splits issue. Assume it doesn't. */
5745 return 0;
5746 }
5747
5748 src = set ? SET_SRC (set) : 0;
5749 addr = 0;
5750 if (set)
5751 {
5752 if (GET_CODE (SET_DEST (set)) == MEM)
5753 addr = XEXP (SET_DEST (set), 0);
5754 else if (GET_CODE (SET_DEST (set)) == SUBREG
5755 && GET_CODE (SUBREG_REG (SET_DEST (set))) == MEM)
5756 addr = XEXP (SUBREG_REG (SET_DEST (set)), 0);
5757 else
5758 {
5759 addr = src;
5760 if (GET_CODE (addr) == UNSPEC && XVECLEN (addr, 0) > 0)
5761 addr = XVECEXP (addr, 0, 0);
5762 while (GET_CODE (addr) == SUBREG || GET_CODE (addr) == ZERO_EXTEND)
5763 addr = XEXP (addr, 0);
5764
5765 /* Note that LO_SUM is used for GOT loads. */
5766 if (GET_CODE (addr) == MEM || GET_CODE (addr) == LO_SUM)
5767 addr = XEXP (addr, 0);
5768 else
5769 addr = 0;
5770 }
5771 }
5772
5773 if (addr && GET_CODE (addr) == POST_MODIFY)
5774 addr = XEXP (addr, 0);
5775
5776 set = ia64_single_set (dep_insn);
5777
5778 if ((dep_class == ITANIUM_CLASS_IALU
5779 || dep_class == ITANIUM_CLASS_ILOG
5780 || dep_class == ITANIUM_CLASS_LD)
5781 && (insn_class == ITANIUM_CLASS_LD
5782 || insn_class == ITANIUM_CLASS_ST))
5783 {
5784 if (! addr || ! set)
5785 abort ();
5786 /* This isn't completely correct - an IALU that feeds an address has
5787 a latency of 1 cycle if it's issued in an M slot, but 2 cycles
5788 otherwise. Unfortunately there's no good way to describe this. */
5789 if (reg_overlap_mentioned_p (SET_DEST (set), addr))
5790 return cost + 1;
5791 }
5792
5793 if ((dep_class == ITANIUM_CLASS_IALU
5794 || dep_class == ITANIUM_CLASS_ILOG
5795 || dep_class == ITANIUM_CLASS_LD)
5796 && (insn_class == ITANIUM_CLASS_MMMUL
5797 || insn_class == ITANIUM_CLASS_MMSHF
5798 || insn_class == ITANIUM_CLASS_MMSHFI))
5799 return 3;
5800
5801 if (dep_class == ITANIUM_CLASS_FMAC
5802 && (insn_class == ITANIUM_CLASS_FMISC
5803 || insn_class == ITANIUM_CLASS_FCVTFX
5804 || insn_class == ITANIUM_CLASS_XMPY))
5805 return 7;
5806
5807 if ((dep_class == ITANIUM_CLASS_FMAC
5808 || dep_class == ITANIUM_CLASS_FMISC
5809 || dep_class == ITANIUM_CLASS_FCVTFX
5810 || dep_class == ITANIUM_CLASS_XMPY)
5811 && insn_class == ITANIUM_CLASS_STF)
5812 return 8;
5813
5814 /* Intel docs say only LD, ST, IALU, ILOG, ISHF consumers have latency 4,
5815 but HP engineers say any non-MM operation. */
5816 if ((dep_class == ITANIUM_CLASS_MMMUL
5817 || dep_class == ITANIUM_CLASS_MMSHF
5818 || dep_class == ITANIUM_CLASS_MMSHFI)
5819 && insn_class != ITANIUM_CLASS_MMMUL
5820 && insn_class != ITANIUM_CLASS_MMSHF
5821 && insn_class != ITANIUM_CLASS_MMSHFI)
5822 return 4;
5823
5824 return cost;
5825 }
5826
5827 /* Describe the current state of the Itanium pipeline. */
5828 static struct
5829 {
5830 /* The first slot that is used in the current cycle. */
5831 int first_slot;
5832 /* The next slot to fill. */
5833 int cur;
5834 /* The packet we have selected for the current issue window. */
5835 const struct ia64_packet *packet;
5836 /* The position of the split issue that occurs due to issue width
5837 limitations (6 if there's no split issue). */
5838 int split;
5839 /* Record data about the insns scheduled so far in the same issue
5840 window. The elements up to but not including FIRST_SLOT belong
5841 to the previous cycle, the ones starting with FIRST_SLOT belong
5842 to the current cycle. */
5843 enum attr_type types[6];
5844 rtx insns[6];
5845 int stopbit[6];
5846 /* Nonzero if we decided to schedule a stop bit. */
5847 int last_was_stop;
5848 } sched_data;
5849
5850 /* Temporary arrays; they have enough elements to hold all insns that
5851 can be ready at the same time while scheduling of the current block.
5852 SCHED_READY can hold ready insns, SCHED_TYPES their types. */
5853 static rtx *sched_ready;
5854 static enum attr_type *sched_types;
5855
5856 /* Determine whether an insn INSN of type ITYPE can fit into slot SLOT
5857 of packet P. */
5858
5859 static int
insn_matches_slot(p,itype,slot,insn)5860 insn_matches_slot (p, itype, slot, insn)
5861 const struct ia64_packet *p;
5862 enum attr_type itype;
5863 int slot;
5864 rtx insn;
5865 {
5866 enum attr_itanium_requires_unit0 u0;
5867 enum attr_type stype = p->t[slot];
5868
5869 if (insn)
5870 {
5871 u0 = ia64_safe_itanium_requires_unit0 (insn);
5872 if (u0 == ITANIUM_REQUIRES_UNIT0_YES)
5873 {
5874 int i;
5875 for (i = sched_data.first_slot; i < slot; i++)
5876 if (p->t[i] == stype
5877 || (stype == TYPE_F && p->t[i] == TYPE_L)
5878 || (stype == TYPE_I && p->t[i] == TYPE_X))
5879 return 0;
5880 }
5881 if (GET_CODE (insn) == CALL_INSN)
5882 {
5883 /* Reject calls in multiway branch packets. We want to limit
5884 the number of multiway branches we generate (since the branch
5885 predictor is limited), and this seems to work fairly well.
5886 (If we didn't do this, we'd have to add another test here to
5887 force calls into the third slot of the bundle.) */
5888 if (slot < 3)
5889 {
5890 if (p->t[1] == TYPE_B)
5891 return 0;
5892 }
5893 else
5894 {
5895 if (p->t[4] == TYPE_B)
5896 return 0;
5897 }
5898 }
5899 }
5900
5901 if (itype == stype)
5902 return 1;
5903 if (itype == TYPE_A)
5904 return stype == TYPE_M || stype == TYPE_I;
5905 return 0;
5906 }
5907
5908 /* Like emit_insn_before, but skip cycle_display notes.
5909 ??? When cycle display notes are implemented, update this. */
5910
5911 static void
ia64_emit_insn_before(insn,before)5912 ia64_emit_insn_before (insn, before)
5913 rtx insn, before;
5914 {
5915 emit_insn_before (insn, before);
5916 }
5917
5918 /* When rotating a bundle out of the issue window, insert a bundle selector
5919 insn in front of it. DUMP is the scheduling dump file or NULL. START
5920 is either 0 or 3, depending on whether we want to emit a bundle selector
5921 for the first bundle or the second bundle in the current issue window.
5922
5923 The selector insns are emitted this late because the selected packet can
5924 be changed until parts of it get rotated out. */
5925
5926 static void
finish_last_head(dump,start)5927 finish_last_head (dump, start)
5928 FILE *dump;
5929 int start;
5930 {
5931 const struct ia64_packet *p = sched_data.packet;
5932 const struct bundle *b = start == 0 ? p->t1 : p->t2;
5933 int bundle_type = b - bundle;
5934 rtx insn;
5935 int i;
5936
5937 if (! ia64_final_schedule)
5938 return;
5939
5940 for (i = start; sched_data.insns[i] == 0; i++)
5941 if (i == start + 3)
5942 abort ();
5943 insn = sched_data.insns[i];
5944
5945 if (dump)
5946 fprintf (dump, "// Emitting template before %d: %s\n",
5947 INSN_UID (insn), b->name);
5948
5949 ia64_emit_insn_before (gen_bundle_selector (GEN_INT (bundle_type)), insn);
5950 }
5951
5952 /* We can't schedule more insns this cycle. Fix up the scheduling state
5953 and advance FIRST_SLOT and CUR.
5954 We have to distribute the insns that are currently found between
5955 FIRST_SLOT and CUR into the slots of the packet we have selected. So
5956 far, they are stored successively in the fields starting at FIRST_SLOT;
5957 now they must be moved to the correct slots.
5958 DUMP is the current scheduling dump file, or NULL. */
5959
5960 static void
cycle_end_fill_slots(dump)5961 cycle_end_fill_slots (dump)
5962 FILE *dump;
5963 {
5964 const struct ia64_packet *packet = sched_data.packet;
5965 int slot, i;
5966 enum attr_type tmp_types[6];
5967 rtx tmp_insns[6];
5968
5969 memcpy (tmp_types, sched_data.types, 6 * sizeof (enum attr_type));
5970 memcpy (tmp_insns, sched_data.insns, 6 * sizeof (rtx));
5971
5972 for (i = slot = sched_data.first_slot; i < sched_data.cur; i++)
5973 {
5974 enum attr_type t = tmp_types[i];
5975 if (t != ia64_safe_type (tmp_insns[i]))
5976 abort ();
5977 while (! insn_matches_slot (packet, t, slot, tmp_insns[i]))
5978 {
5979 if (slot > sched_data.split)
5980 abort ();
5981 if (dump)
5982 fprintf (dump, "// Packet needs %s, have %s\n",
5983 type_names[packet->t[slot]], type_names[t]);
5984 sched_data.types[slot] = packet->t[slot];
5985 sched_data.insns[slot] = 0;
5986 sched_data.stopbit[slot] = 0;
5987
5988 /* ??? TYPE_L instructions always fill up two slots, but we don't
5989 support TYPE_L nops. */
5990 if (packet->t[slot] == TYPE_L)
5991 abort ();
5992
5993 slot++;
5994 }
5995
5996 /* Do _not_ use T here. If T == TYPE_A, then we'd risk changing the
5997 actual slot type later. */
5998 sched_data.types[slot] = packet->t[slot];
5999 sched_data.insns[slot] = tmp_insns[i];
6000 sched_data.stopbit[slot] = 0;
6001 slot++;
6002
6003 /* TYPE_L instructions always fill up two slots. */
6004 if (t == TYPE_L)
6005 {
6006 sched_data.types[slot] = packet->t[slot];
6007 sched_data.insns[slot] = 0;
6008 sched_data.stopbit[slot] = 0;
6009 slot++;
6010 }
6011 }
6012
6013 /* This isn't right - there's no need to pad out until the forced split;
6014 the CPU will automatically split if an insn isn't ready. */
6015 #if 0
6016 while (slot < sched_data.split)
6017 {
6018 sched_data.types[slot] = packet->t[slot];
6019 sched_data.insns[slot] = 0;
6020 sched_data.stopbit[slot] = 0;
6021 slot++;
6022 }
6023 #endif
6024
6025 sched_data.first_slot = sched_data.cur = slot;
6026 }
6027
6028 /* Bundle rotations, as described in the Itanium optimization manual.
6029 We can rotate either one or both bundles out of the issue window.
6030 DUMP is the current scheduling dump file, or NULL. */
6031
6032 static void
rotate_one_bundle(dump)6033 rotate_one_bundle (dump)
6034 FILE *dump;
6035 {
6036 if (dump)
6037 fprintf (dump, "// Rotating one bundle.\n");
6038
6039 finish_last_head (dump, 0);
6040 if (sched_data.cur > 3)
6041 {
6042 sched_data.cur -= 3;
6043 sched_data.first_slot -= 3;
6044 memmove (sched_data.types,
6045 sched_data.types + 3,
6046 sched_data.cur * sizeof *sched_data.types);
6047 memmove (sched_data.stopbit,
6048 sched_data.stopbit + 3,
6049 sched_data.cur * sizeof *sched_data.stopbit);
6050 memmove (sched_data.insns,
6051 sched_data.insns + 3,
6052 sched_data.cur * sizeof *sched_data.insns);
6053 sched_data.packet
6054 = &packets[(sched_data.packet->t2 - bundle) * NR_BUNDLES];
6055 }
6056 else
6057 {
6058 sched_data.cur = 0;
6059 sched_data.first_slot = 0;
6060 }
6061 }
6062
6063 static void
rotate_two_bundles(dump)6064 rotate_two_bundles (dump)
6065 FILE *dump;
6066 {
6067 if (dump)
6068 fprintf (dump, "// Rotating two bundles.\n");
6069
6070 if (sched_data.cur == 0)
6071 return;
6072
6073 finish_last_head (dump, 0);
6074 if (sched_data.cur > 3)
6075 finish_last_head (dump, 3);
6076 sched_data.cur = 0;
6077 sched_data.first_slot = 0;
6078 }
6079
6080 /* We're beginning a new block. Initialize data structures as necessary. */
6081
6082 static void
ia64_sched_init(dump,sched_verbose,max_ready)6083 ia64_sched_init (dump, sched_verbose, max_ready)
6084 FILE *dump ATTRIBUTE_UNUSED;
6085 int sched_verbose ATTRIBUTE_UNUSED;
6086 int max_ready;
6087 {
6088 static int initialized = 0;
6089
6090 if (! initialized)
6091 {
6092 int b1, b2, i;
6093
6094 initialized = 1;
6095
6096 for (i = b1 = 0; b1 < NR_BUNDLES; b1++)
6097 {
6098 const struct bundle *t1 = bundle + b1;
6099 for (b2 = 0; b2 < NR_BUNDLES; b2++, i++)
6100 {
6101 const struct bundle *t2 = bundle + b2;
6102
6103 packets[i].t1 = t1;
6104 packets[i].t2 = t2;
6105 }
6106 }
6107 for (i = 0; i < NR_PACKETS; i++)
6108 {
6109 int j;
6110 for (j = 0; j < 3; j++)
6111 packets[i].t[j] = packets[i].t1->t[j];
6112 for (j = 0; j < 3; j++)
6113 packets[i].t[j + 3] = packets[i].t2->t[j];
6114 packets[i].first_split = itanium_split_issue (packets + i, 0);
6115 }
6116
6117 }
6118
6119 init_insn_group_barriers ();
6120
6121 memset (&sched_data, 0, sizeof sched_data);
6122 sched_types = (enum attr_type *) xmalloc (max_ready
6123 * sizeof (enum attr_type));
6124 sched_ready = (rtx *) xmalloc (max_ready * sizeof (rtx));
6125 }
6126
6127 /* See if the packet P can match the insns we have already scheduled. Return
6128 nonzero if so. In *PSLOT, we store the first slot that is available for
6129 more instructions if we choose this packet.
6130 SPLIT holds the last slot we can use, there's a split issue after it so
6131 scheduling beyond it would cause us to use more than one cycle. */
6132
6133 static int
packet_matches_p(p,split,pslot)6134 packet_matches_p (p, split, pslot)
6135 const struct ia64_packet *p;
6136 int split;
6137 int *pslot;
6138 {
6139 int filled = sched_data.cur;
6140 int first = sched_data.first_slot;
6141 int i, slot;
6142
6143 /* First, check if the first of the two bundles must be a specific one (due
6144 to stop bits). */
6145 if (first > 0 && sched_data.stopbit[0] && p->t1->possible_stop != 1)
6146 return 0;
6147 if (first > 1 && sched_data.stopbit[1] && p->t1->possible_stop != 2)
6148 return 0;
6149
6150 for (i = 0; i < first; i++)
6151 if (! insn_matches_slot (p, sched_data.types[i], i,
6152 sched_data.insns[i]))
6153 return 0;
6154 for (i = slot = first; i < filled; i++)
6155 {
6156 while (slot < split)
6157 {
6158 if (insn_matches_slot (p, sched_data.types[i], slot,
6159 sched_data.insns[i]))
6160 break;
6161 slot++;
6162 }
6163 if (slot == split)
6164 return 0;
6165 slot++;
6166 }
6167
6168 if (pslot)
6169 *pslot = slot;
6170 return 1;
6171 }
6172
6173 /* A frontend for itanium_split_issue. For a packet P and a slot
6174 number FIRST that describes the start of the current clock cycle,
6175 return the slot number of the first split issue. This function
6176 uses the cached number found in P if possible. */
6177
6178 static int
get_split(p,first)6179 get_split (p, first)
6180 const struct ia64_packet *p;
6181 int first;
6182 {
6183 if (first == 0)
6184 return p->first_split;
6185 return itanium_split_issue (p, first);
6186 }
6187
6188 /* Given N_READY insns in the array READY, whose types are found in the
6189 corresponding array TYPES, return the insn that is best suited to be
6190 scheduled in slot SLOT of packet P. */
6191
6192 static int
find_best_insn(ready,types,n_ready,p,slot)6193 find_best_insn (ready, types, n_ready, p, slot)
6194 rtx *ready;
6195 enum attr_type *types;
6196 int n_ready;
6197 const struct ia64_packet *p;
6198 int slot;
6199 {
6200 int best = -1;
6201 int best_pri = 0;
6202 while (n_ready-- > 0)
6203 {
6204 rtx insn = ready[n_ready];
6205 if (! insn)
6206 continue;
6207 if (best >= 0 && INSN_PRIORITY (ready[n_ready]) < best_pri)
6208 break;
6209 /* If we have equally good insns, one of which has a stricter
6210 slot requirement, prefer the one with the stricter requirement. */
6211 if (best >= 0 && types[n_ready] == TYPE_A)
6212 continue;
6213 if (insn_matches_slot (p, types[n_ready], slot, insn))
6214 {
6215 best = n_ready;
6216 best_pri = INSN_PRIORITY (ready[best]);
6217
6218 /* If there's no way we could get a stricter requirement, stop
6219 looking now. */
6220 if (types[n_ready] != TYPE_A
6221 && ia64_safe_itanium_requires_unit0 (ready[n_ready]))
6222 break;
6223 break;
6224 }
6225 }
6226 return best;
6227 }
6228
6229 /* Select the best packet to use given the current scheduler state and the
6230 current ready list.
6231 READY is an array holding N_READY ready insns; TYPES is a corresponding
6232 array that holds their types. Store the best packet in *PPACKET and the
6233 number of insns that can be scheduled in the current cycle in *PBEST. */
6234
6235 static void
find_best_packet(pbest,ppacket,ready,types,n_ready)6236 find_best_packet (pbest, ppacket, ready, types, n_ready)
6237 int *pbest;
6238 const struct ia64_packet **ppacket;
6239 rtx *ready;
6240 enum attr_type *types;
6241 int n_ready;
6242 {
6243 int first = sched_data.first_slot;
6244 int best = 0;
6245 int lowest_end = 6;
6246 const struct ia64_packet *best_packet = NULL;
6247 int i;
6248
6249 for (i = 0; i < NR_PACKETS; i++)
6250 {
6251 const struct ia64_packet *p = packets + i;
6252 int slot;
6253 int split = get_split (p, first);
6254 int win = 0;
6255 int first_slot, last_slot;
6256 int b_nops = 0;
6257
6258 if (! packet_matches_p (p, split, &first_slot))
6259 continue;
6260
6261 memcpy (sched_ready, ready, n_ready * sizeof (rtx));
6262
6263 win = 0;
6264 last_slot = 6;
6265 for (slot = first_slot; slot < split; slot++)
6266 {
6267 int insn_nr;
6268
6269 /* Disallow a degenerate case where the first bundle doesn't
6270 contain anything but NOPs! */
6271 if (first_slot == 0 && win == 0 && slot == 3)
6272 {
6273 win = -1;
6274 break;
6275 }
6276
6277 insn_nr = find_best_insn (sched_ready, types, n_ready, p, slot);
6278 if (insn_nr >= 0)
6279 {
6280 sched_ready[insn_nr] = 0;
6281 last_slot = slot;
6282 win++;
6283 }
6284 else if (p->t[slot] == TYPE_B)
6285 b_nops++;
6286 }
6287 /* We must disallow MBB/BBB packets if any of their B slots would be
6288 filled with nops. */
6289 if (last_slot < 3)
6290 {
6291 if (p->t[1] == TYPE_B && (b_nops || last_slot < 2))
6292 win = -1;
6293 }
6294 else
6295 {
6296 if (p->t[4] == TYPE_B && (b_nops || last_slot < 5))
6297 win = -1;
6298 }
6299
6300 if (win > best
6301 || (win == best && last_slot < lowest_end))
6302 {
6303 best = win;
6304 lowest_end = last_slot;
6305 best_packet = p;
6306 }
6307 }
6308 *pbest = best;
6309 *ppacket = best_packet;
6310 }
6311
6312 /* Reorder the ready list so that the insns that can be issued in this cycle
6313 are found in the correct order at the end of the list.
6314 DUMP is the scheduling dump file, or NULL. READY points to the start,
6315 E_READY to the end of the ready list. MAY_FAIL determines what should be
6316 done if no insns can be scheduled in this cycle: if it is zero, we abort,
6317 otherwise we return 0.
6318 Return 1 if any insns can be scheduled in this cycle. */
6319
6320 static int
itanium_reorder(dump,ready,e_ready,may_fail)6321 itanium_reorder (dump, ready, e_ready, may_fail)
6322 FILE *dump;
6323 rtx *ready;
6324 rtx *e_ready;
6325 int may_fail;
6326 {
6327 const struct ia64_packet *best_packet;
6328 int n_ready = e_ready - ready;
6329 int first = sched_data.first_slot;
6330 int i, best, best_split, filled;
6331
6332 for (i = 0; i < n_ready; i++)
6333 sched_types[i] = ia64_safe_type (ready[i]);
6334
6335 find_best_packet (&best, &best_packet, ready, sched_types, n_ready);
6336
6337 if (best == 0)
6338 {
6339 if (may_fail)
6340 return 0;
6341 abort ();
6342 }
6343
6344 if (dump)
6345 {
6346 fprintf (dump, "// Selected bundles: %s %s (%d insns)\n",
6347 best_packet->t1->name,
6348 best_packet->t2 ? best_packet->t2->name : NULL, best);
6349 }
6350
6351 best_split = itanium_split_issue (best_packet, first);
6352 packet_matches_p (best_packet, best_split, &filled);
6353
6354 for (i = filled; i < best_split; i++)
6355 {
6356 int insn_nr;
6357
6358 insn_nr = find_best_insn (ready, sched_types, n_ready, best_packet, i);
6359 if (insn_nr >= 0)
6360 {
6361 rtx insn = ready[insn_nr];
6362 memmove (ready + insn_nr, ready + insn_nr + 1,
6363 (n_ready - insn_nr - 1) * sizeof (rtx));
6364 memmove (sched_types + insn_nr, sched_types + insn_nr + 1,
6365 (n_ready - insn_nr - 1) * sizeof (enum attr_type));
6366 ready[--n_ready] = insn;
6367 }
6368 }
6369
6370 sched_data.packet = best_packet;
6371 sched_data.split = best_split;
6372 return 1;
6373 }
6374
6375 /* Dump information about the current scheduling state to file DUMP. */
6376
6377 static void
dump_current_packet(dump)6378 dump_current_packet (dump)
6379 FILE *dump;
6380 {
6381 int i;
6382 fprintf (dump, "// %d slots filled:", sched_data.cur);
6383 for (i = 0; i < sched_data.first_slot; i++)
6384 {
6385 rtx insn = sched_data.insns[i];
6386 fprintf (dump, " %s", type_names[sched_data.types[i]]);
6387 if (insn)
6388 fprintf (dump, "/%s", type_names[ia64_safe_type (insn)]);
6389 if (sched_data.stopbit[i])
6390 fprintf (dump, " ;;");
6391 }
6392 fprintf (dump, " :::");
6393 for (i = sched_data.first_slot; i < sched_data.cur; i++)
6394 {
6395 rtx insn = sched_data.insns[i];
6396 enum attr_type t = ia64_safe_type (insn);
6397 fprintf (dump, " (%d) %s", INSN_UID (insn), type_names[t]);
6398 }
6399 fprintf (dump, "\n");
6400 }
6401
6402 /* Schedule a stop bit. DUMP is the current scheduling dump file, or
6403 NULL. */
6404
6405 static void
schedule_stop(dump)6406 schedule_stop (dump)
6407 FILE *dump;
6408 {
6409 const struct ia64_packet *best = sched_data.packet;
6410 int i;
6411 int best_stop = 6;
6412
6413 if (dump)
6414 fprintf (dump, "// Stop bit, cur = %d.\n", sched_data.cur);
6415
6416 if (sched_data.cur == 0)
6417 {
6418 if (dump)
6419 fprintf (dump, "// At start of bundle, so nothing to do.\n");
6420
6421 rotate_two_bundles (NULL);
6422 return;
6423 }
6424
6425 for (i = -1; i < NR_PACKETS; i++)
6426 {
6427 /* This is a slight hack to give the current packet the first chance.
6428 This is done to avoid e.g. switching from MIB to MBB bundles. */
6429 const struct ia64_packet *p = (i >= 0 ? packets + i : sched_data.packet);
6430 int split = get_split (p, sched_data.first_slot);
6431 const struct bundle *compare;
6432 int next, stoppos;
6433
6434 if (! packet_matches_p (p, split, &next))
6435 continue;
6436
6437 compare = next > 3 ? p->t2 : p->t1;
6438
6439 stoppos = 3;
6440 if (compare->possible_stop)
6441 stoppos = compare->possible_stop;
6442 if (next > 3)
6443 stoppos += 3;
6444
6445 if (stoppos < next || stoppos >= best_stop)
6446 {
6447 if (compare->possible_stop == 0)
6448 continue;
6449 stoppos = (next > 3 ? 6 : 3);
6450 }
6451 if (stoppos < next || stoppos >= best_stop)
6452 continue;
6453
6454 if (dump)
6455 fprintf (dump, "// switching from %s %s to %s %s (stop at %d)\n",
6456 best->t1->name, best->t2->name, p->t1->name, p->t2->name,
6457 stoppos);
6458
6459 best_stop = stoppos;
6460 best = p;
6461 }
6462
6463 sched_data.packet = best;
6464 cycle_end_fill_slots (dump);
6465 while (sched_data.cur < best_stop)
6466 {
6467 sched_data.types[sched_data.cur] = best->t[sched_data.cur];
6468 sched_data.insns[sched_data.cur] = 0;
6469 sched_data.stopbit[sched_data.cur] = 0;
6470 sched_data.cur++;
6471 }
6472 sched_data.stopbit[sched_data.cur - 1] = 1;
6473 sched_data.first_slot = best_stop;
6474
6475 if (dump)
6476 dump_current_packet (dump);
6477 }
6478
6479 /* If necessary, perform one or two rotations on the scheduling state.
6480 This should only be called if we are starting a new cycle. */
6481
6482 static void
maybe_rotate(dump)6483 maybe_rotate (dump)
6484 FILE *dump;
6485 {
6486 cycle_end_fill_slots (dump);
6487 if (sched_data.cur == 6)
6488 rotate_two_bundles (dump);
6489 else if (sched_data.cur >= 3)
6490 rotate_one_bundle (dump);
6491 sched_data.first_slot = sched_data.cur;
6492 }
6493
6494 /* The clock cycle when ia64_sched_reorder was last called. */
6495 static int prev_cycle;
6496
6497 /* The first insn scheduled in the previous cycle. This is the saved
6498 value of sched_data.first_slot. */
6499 static int prev_first;
6500
6501 /* Emit NOPs to fill the delay between PREV_CYCLE and CLOCK_VAR. Used to
6502 pad out the delay between MM (shifts, etc.) and integer operations. */
6503
6504 static void
nop_cycles_until(clock_var,dump)6505 nop_cycles_until (clock_var, dump)
6506 int clock_var;
6507 FILE *dump;
6508 {
6509 int prev_clock = prev_cycle;
6510 int cycles_left = clock_var - prev_clock;
6511 bool did_stop = false;
6512
6513 /* Finish the previous cycle; pad it out with NOPs. */
6514 if (sched_data.cur == 3)
6515 {
6516 sched_emit_insn (gen_insn_group_barrier (GEN_INT (3)));
6517 did_stop = true;
6518 maybe_rotate (dump);
6519 }
6520 else if (sched_data.cur > 0)
6521 {
6522 int need_stop = 0;
6523 int split = itanium_split_issue (sched_data.packet, prev_first);
6524
6525 if (sched_data.cur < 3 && split > 3)
6526 {
6527 split = 3;
6528 need_stop = 1;
6529 }
6530
6531 if (split > sched_data.cur)
6532 {
6533 int i;
6534 for (i = sched_data.cur; i < split; i++)
6535 {
6536 rtx t = sched_emit_insn (gen_nop_type (sched_data.packet->t[i]));
6537 sched_data.types[i] = sched_data.packet->t[i];
6538 sched_data.insns[i] = t;
6539 sched_data.stopbit[i] = 0;
6540 }
6541 sched_data.cur = split;
6542 }
6543
6544 if (! need_stop && sched_data.cur > 0 && sched_data.cur < 6
6545 && cycles_left > 1)
6546 {
6547 int i;
6548 for (i = sched_data.cur; i < 6; i++)
6549 {
6550 rtx t = sched_emit_insn (gen_nop_type (sched_data.packet->t[i]));
6551 sched_data.types[i] = sched_data.packet->t[i];
6552 sched_data.insns[i] = t;
6553 sched_data.stopbit[i] = 0;
6554 }
6555 sched_data.cur = 6;
6556 cycles_left--;
6557 need_stop = 1;
6558 }
6559
6560 if (need_stop || sched_data.cur == 6)
6561 {
6562 sched_emit_insn (gen_insn_group_barrier (GEN_INT (3)));
6563 did_stop = true;
6564 }
6565 maybe_rotate (dump);
6566 }
6567
6568 cycles_left--;
6569 while (cycles_left > 0)
6570 {
6571 sched_emit_insn (gen_bundle_selector (GEN_INT (0)));
6572 sched_emit_insn (gen_nop_type (TYPE_M));
6573 sched_emit_insn (gen_nop_type (TYPE_I));
6574 if (cycles_left > 1)
6575 {
6576 sched_emit_insn (gen_insn_group_barrier (GEN_INT (2)));
6577 cycles_left--;
6578 }
6579 sched_emit_insn (gen_nop_type (TYPE_I));
6580 sched_emit_insn (gen_insn_group_barrier (GEN_INT (3)));
6581 did_stop = true;
6582 cycles_left--;
6583 }
6584
6585 if (did_stop)
6586 init_insn_group_barriers ();
6587 }
6588
6589 /* We are about to being issuing insns for this clock cycle.
6590 Override the default sort algorithm to better slot instructions. */
6591
6592 static int
ia64_internal_sched_reorder(dump,sched_verbose,ready,pn_ready,reorder_type,clock_var)6593 ia64_internal_sched_reorder (dump, sched_verbose, ready, pn_ready,
6594 reorder_type, clock_var)
6595 FILE *dump ATTRIBUTE_UNUSED;
6596 int sched_verbose ATTRIBUTE_UNUSED;
6597 rtx *ready;
6598 int *pn_ready;
6599 int reorder_type, clock_var;
6600 {
6601 int n_asms;
6602 int n_ready = *pn_ready;
6603 rtx *e_ready = ready + n_ready;
6604 rtx *insnp;
6605
6606 if (sched_verbose)
6607 {
6608 fprintf (dump, "// ia64_sched_reorder (type %d):\n", reorder_type);
6609 dump_current_packet (dump);
6610 }
6611
6612 /* Work around the pipeline flush that will occurr if the results of
6613 an MM instruction are accessed before the result is ready. Intel
6614 documentation says this only happens with IALU, ISHF, ILOG, LD,
6615 and ST consumers, but experimental evidence shows that *any* non-MM
6616 type instruction will incurr the flush. */
6617 if (reorder_type == 0 && clock_var > 0 && ia64_final_schedule)
6618 {
6619 for (insnp = ready; insnp < e_ready; insnp++)
6620 {
6621 rtx insn = *insnp, link;
6622 enum attr_itanium_class t = ia64_safe_itanium_class (insn);
6623
6624 if (t == ITANIUM_CLASS_MMMUL
6625 || t == ITANIUM_CLASS_MMSHF
6626 || t == ITANIUM_CLASS_MMSHFI)
6627 continue;
6628
6629 for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
6630 if (REG_NOTE_KIND (link) == 0)
6631 {
6632 rtx other = XEXP (link, 0);
6633 enum attr_itanium_class t0 = ia64_safe_itanium_class (other);
6634 if (t0 == ITANIUM_CLASS_MMSHF || t0 == ITANIUM_CLASS_MMMUL)
6635 {
6636 nop_cycles_until (clock_var, sched_verbose ? dump : NULL);
6637 goto out;
6638 }
6639 }
6640 }
6641 }
6642 out:
6643
6644 prev_first = sched_data.first_slot;
6645 prev_cycle = clock_var;
6646
6647 if (reorder_type == 0)
6648 maybe_rotate (sched_verbose ? dump : NULL);
6649
6650 /* First, move all USEs, CLOBBERs and other crud out of the way. */
6651 n_asms = 0;
6652 for (insnp = ready; insnp < e_ready; insnp++)
6653 if (insnp < e_ready)
6654 {
6655 rtx insn = *insnp;
6656 enum attr_type t = ia64_safe_type (insn);
6657 if (t == TYPE_UNKNOWN)
6658 {
6659 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
6660 || asm_noperands (PATTERN (insn)) >= 0)
6661 {
6662 rtx lowest = ready[n_asms];
6663 ready[n_asms] = insn;
6664 *insnp = lowest;
6665 n_asms++;
6666 }
6667 else
6668 {
6669 rtx highest = ready[n_ready - 1];
6670 ready[n_ready - 1] = insn;
6671 *insnp = highest;
6672 if (ia64_final_schedule && group_barrier_needed_p (insn))
6673 {
6674 schedule_stop (sched_verbose ? dump : NULL);
6675 sched_data.last_was_stop = 1;
6676 maybe_rotate (sched_verbose ? dump : NULL);
6677 }
6678
6679 return 1;
6680 }
6681 }
6682 }
6683 if (n_asms < n_ready)
6684 {
6685 /* Some normal insns to process. Skip the asms. */
6686 ready += n_asms;
6687 n_ready -= n_asms;
6688 }
6689 else if (n_ready > 0)
6690 {
6691 /* Only asm insns left. */
6692 if (ia64_final_schedule && group_barrier_needed_p (ready[n_ready - 1]))
6693 {
6694 schedule_stop (sched_verbose ? dump : NULL);
6695 sched_data.last_was_stop = 1;
6696 maybe_rotate (sched_verbose ? dump : NULL);
6697 }
6698 cycle_end_fill_slots (sched_verbose ? dump : NULL);
6699 return 1;
6700 }
6701
6702 if (ia64_final_schedule)
6703 {
6704 int nr_need_stop = 0;
6705
6706 for (insnp = ready; insnp < e_ready; insnp++)
6707 if (safe_group_barrier_needed_p (*insnp))
6708 nr_need_stop++;
6709
6710 /* Schedule a stop bit if
6711 - all insns require a stop bit, or
6712 - we are starting a new cycle and _any_ insns require a stop bit.
6713 The reason for the latter is that if our schedule is accurate, then
6714 the additional stop won't decrease performance at this point (since
6715 there's a split issue at this point anyway), but it gives us more
6716 freedom when scheduling the currently ready insns. */
6717 if ((reorder_type == 0 && nr_need_stop)
6718 || (reorder_type == 1 && n_ready == nr_need_stop))
6719 {
6720 schedule_stop (sched_verbose ? dump : NULL);
6721 sched_data.last_was_stop = 1;
6722 maybe_rotate (sched_verbose ? dump : NULL);
6723 if (reorder_type == 1)
6724 return 0;
6725 }
6726 else
6727 {
6728 int deleted = 0;
6729 insnp = e_ready;
6730 /* Move down everything that needs a stop bit, preserving relative
6731 order. */
6732 while (insnp-- > ready + deleted)
6733 while (insnp >= ready + deleted)
6734 {
6735 rtx insn = *insnp;
6736 if (! safe_group_barrier_needed_p (insn))
6737 break;
6738 memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
6739 *ready = insn;
6740 deleted++;
6741 }
6742 n_ready -= deleted;
6743 ready += deleted;
6744 if (deleted != nr_need_stop)
6745 abort ();
6746 }
6747 }
6748
6749 return itanium_reorder (sched_verbose ? dump : NULL,
6750 ready, e_ready, reorder_type == 1);
6751 }
6752
6753 static int
ia64_sched_reorder(dump,sched_verbose,ready,pn_ready,clock_var)6754 ia64_sched_reorder (dump, sched_verbose, ready, pn_ready, clock_var)
6755 FILE *dump;
6756 int sched_verbose;
6757 rtx *ready;
6758 int *pn_ready;
6759 int clock_var;
6760 {
6761 return ia64_internal_sched_reorder (dump, sched_verbose, ready,
6762 pn_ready, 0, clock_var);
6763 }
6764
6765 /* Like ia64_sched_reorder, but called after issuing each insn.
6766 Override the default sort algorithm to better slot instructions. */
6767
6768 static int
ia64_sched_reorder2(dump,sched_verbose,ready,pn_ready,clock_var)6769 ia64_sched_reorder2 (dump, sched_verbose, ready, pn_ready, clock_var)
6770 FILE *dump ATTRIBUTE_UNUSED;
6771 int sched_verbose ATTRIBUTE_UNUSED;
6772 rtx *ready;
6773 int *pn_ready;
6774 int clock_var;
6775 {
6776 if (sched_data.last_was_stop)
6777 return 0;
6778
6779 /* Detect one special case and try to optimize it.
6780 If we have 1.M;;MI 2.MIx, and slots 2.1 (M) and 2.2 (I) are both NOPs,
6781 then we can get better code by transforming this to 1.MFB;; 2.MIx. */
6782 if (sched_data.first_slot == 1
6783 && sched_data.stopbit[0]
6784 && ((sched_data.cur == 4
6785 && (sched_data.types[1] == TYPE_M || sched_data.types[1] == TYPE_A)
6786 && (sched_data.types[2] == TYPE_I || sched_data.types[2] == TYPE_A)
6787 && (sched_data.types[3] != TYPE_M && sched_data.types[3] != TYPE_A))
6788 || (sched_data.cur == 3
6789 && (sched_data.types[1] == TYPE_M
6790 || sched_data.types[1] == TYPE_A)
6791 && (sched_data.types[2] != TYPE_M
6792 && sched_data.types[2] != TYPE_I
6793 && sched_data.types[2] != TYPE_A))))
6794
6795 {
6796 int i, best;
6797 rtx stop = sched_data.insns[1];
6798
6799 /* Search backward for the stop bit that must be there. */
6800 while (1)
6801 {
6802 int insn_code;
6803
6804 stop = PREV_INSN (stop);
6805 if (GET_CODE (stop) != INSN)
6806 abort ();
6807 insn_code = recog_memoized (stop);
6808
6809 /* Ignore .pred.rel.mutex.
6810
6811 ??? Update this to ignore cycle display notes too
6812 ??? once those are implemented */
6813 if (insn_code == CODE_FOR_pred_rel_mutex
6814 || insn_code == CODE_FOR_prologue_use)
6815 continue;
6816
6817 if (insn_code == CODE_FOR_insn_group_barrier)
6818 break;
6819 abort ();
6820 }
6821
6822 /* Adjust the stop bit's slot selector. */
6823 if (INTVAL (XVECEXP (PATTERN (stop), 0, 0)) != 1)
6824 abort ();
6825 XVECEXP (PATTERN (stop), 0, 0) = GEN_INT (3);
6826
6827 sched_data.stopbit[0] = 0;
6828 sched_data.stopbit[2] = 1;
6829
6830 sched_data.types[5] = sched_data.types[3];
6831 sched_data.types[4] = sched_data.types[2];
6832 sched_data.types[3] = sched_data.types[1];
6833 sched_data.insns[5] = sched_data.insns[3];
6834 sched_data.insns[4] = sched_data.insns[2];
6835 sched_data.insns[3] = sched_data.insns[1];
6836 sched_data.stopbit[5] = sched_data.stopbit[4] = sched_data.stopbit[3] = 0;
6837 sched_data.cur += 2;
6838 sched_data.first_slot = 3;
6839 for (i = 0; i < NR_PACKETS; i++)
6840 {
6841 const struct ia64_packet *p = packets + i;
6842 if (p->t[0] == TYPE_M && p->t[1] == TYPE_F && p->t[2] == TYPE_B)
6843 {
6844 sched_data.packet = p;
6845 break;
6846 }
6847 }
6848 rotate_one_bundle (sched_verbose ? dump : NULL);
6849
6850 best = 6;
6851 for (i = 0; i < NR_PACKETS; i++)
6852 {
6853 const struct ia64_packet *p = packets + i;
6854 int split = get_split (p, sched_data.first_slot);
6855 int next;
6856
6857 /* Disallow multiway branches here. */
6858 if (p->t[1] == TYPE_B)
6859 continue;
6860
6861 if (packet_matches_p (p, split, &next) && next < best)
6862 {
6863 best = next;
6864 sched_data.packet = p;
6865 sched_data.split = split;
6866 }
6867 }
6868 if (best == 6)
6869 abort ();
6870 }
6871
6872 if (*pn_ready > 0)
6873 {
6874 int more = ia64_internal_sched_reorder (dump, sched_verbose,
6875 ready, pn_ready, 1,
6876 clock_var);
6877 if (more)
6878 return more;
6879 /* Did we schedule a stop? If so, finish this cycle. */
6880 if (sched_data.cur == sched_data.first_slot)
6881 return 0;
6882 }
6883
6884 if (sched_verbose)
6885 fprintf (dump, "// Can't issue more this cycle; updating type array.\n");
6886
6887 cycle_end_fill_slots (sched_verbose ? dump : NULL);
6888 if (sched_verbose)
6889 dump_current_packet (dump);
6890 return 0;
6891 }
6892
6893 /* We are about to issue INSN. Return the number of insns left on the
6894 ready queue that can be issued this cycle. */
6895
6896 static int
ia64_variable_issue(dump,sched_verbose,insn,can_issue_more)6897 ia64_variable_issue (dump, sched_verbose, insn, can_issue_more)
6898 FILE *dump;
6899 int sched_verbose;
6900 rtx insn;
6901 int can_issue_more ATTRIBUTE_UNUSED;
6902 {
6903 enum attr_type t = ia64_safe_type (insn);
6904
6905 if (sched_data.last_was_stop)
6906 {
6907 int t = sched_data.first_slot;
6908 if (t == 0)
6909 t = 3;
6910 ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (t)), insn);
6911 init_insn_group_barriers ();
6912 sched_data.last_was_stop = 0;
6913 }
6914
6915 if (t == TYPE_UNKNOWN)
6916 {
6917 if (sched_verbose)
6918 fprintf (dump, "// Ignoring type %s\n", type_names[t]);
6919 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
6920 || asm_noperands (PATTERN (insn)) >= 0)
6921 {
6922 /* This must be some kind of asm. Clear the scheduling state. */
6923 rotate_two_bundles (sched_verbose ? dump : NULL);
6924 if (ia64_final_schedule)
6925 group_barrier_needed_p (insn);
6926 }
6927 return 1;
6928 }
6929
6930 /* This is _not_ just a sanity check. group_barrier_needed_p will update
6931 important state info. Don't delete this test. */
6932 if (ia64_final_schedule
6933 && group_barrier_needed_p (insn))
6934 abort ();
6935
6936 sched_data.stopbit[sched_data.cur] = 0;
6937 sched_data.insns[sched_data.cur] = insn;
6938 sched_data.types[sched_data.cur] = t;
6939
6940 sched_data.cur++;
6941 if (sched_verbose)
6942 fprintf (dump, "// Scheduling insn %d of type %s\n",
6943 INSN_UID (insn), type_names[t]);
6944
6945 if (GET_CODE (insn) == CALL_INSN && ia64_final_schedule)
6946 {
6947 schedule_stop (sched_verbose ? dump : NULL);
6948 sched_data.last_was_stop = 1;
6949 }
6950
6951 return 1;
6952 }
6953
6954 /* Free data allocated by ia64_sched_init. */
6955
6956 static void
ia64_sched_finish(dump,sched_verbose)6957 ia64_sched_finish (dump, sched_verbose)
6958 FILE *dump;
6959 int sched_verbose;
6960 {
6961 if (sched_verbose)
6962 fprintf (dump, "// Finishing schedule.\n");
6963 rotate_two_bundles (NULL);
6964 free (sched_types);
6965 free (sched_ready);
6966 }
6967
6968 /* Emit pseudo-ops for the assembler to describe predicate relations.
6969 At present this assumes that we only consider predicate pairs to
6970 be mutex, and that the assembler can deduce proper values from
6971 straight-line code. */
6972
6973 static void
emit_predicate_relation_info()6974 emit_predicate_relation_info ()
6975 {
6976 basic_block bb;
6977
6978 FOR_EACH_BB_REVERSE (bb)
6979 {
6980 int r;
6981 rtx head = bb->head;
6982
6983 /* We only need such notes at code labels. */
6984 if (GET_CODE (head) != CODE_LABEL)
6985 continue;
6986 if (GET_CODE (NEXT_INSN (head)) == NOTE
6987 && NOTE_LINE_NUMBER (NEXT_INSN (head)) == NOTE_INSN_BASIC_BLOCK)
6988 head = NEXT_INSN (head);
6989
6990 for (r = PR_REG (0); r < PR_REG (64); r += 2)
6991 if (REGNO_REG_SET_P (bb->global_live_at_start, r))
6992 {
6993 rtx p = gen_rtx_REG (BImode, r);
6994 rtx n = emit_insn_after (gen_pred_rel_mutex (p), head);
6995 if (head == bb->end)
6996 bb->end = n;
6997 head = n;
6998 }
6999 }
7000
7001 /* Look for conditional calls that do not return, and protect predicate
7002 relations around them. Otherwise the assembler will assume the call
7003 returns, and complain about uses of call-clobbered predicates after
7004 the call. */
7005 FOR_EACH_BB_REVERSE (bb)
7006 {
7007 rtx insn = bb->head;
7008
7009 while (1)
7010 {
7011 if (GET_CODE (insn) == CALL_INSN
7012 && GET_CODE (PATTERN (insn)) == COND_EXEC
7013 && find_reg_note (insn, REG_NORETURN, NULL_RTX))
7014 {
7015 rtx b = emit_insn_before (gen_safe_across_calls_all (), insn);
7016 rtx a = emit_insn_after (gen_safe_across_calls_normal (), insn);
7017 if (bb->head == insn)
7018 bb->head = b;
7019 if (bb->end == insn)
7020 bb->end = a;
7021 }
7022
7023 if (insn == bb->end)
7024 break;
7025 insn = NEXT_INSN (insn);
7026 }
7027 }
7028 }
7029
7030 /* Generate a NOP instruction of type T. We will never generate L type
7031 nops. */
7032
7033 static rtx
gen_nop_type(t)7034 gen_nop_type (t)
7035 enum attr_type t;
7036 {
7037 switch (t)
7038 {
7039 case TYPE_M:
7040 return gen_nop_m ();
7041 case TYPE_I:
7042 return gen_nop_i ();
7043 case TYPE_B:
7044 return gen_nop_b ();
7045 case TYPE_F:
7046 return gen_nop_f ();
7047 case TYPE_X:
7048 return gen_nop_x ();
7049 default:
7050 abort ();
7051 }
7052 }
7053
7054 /* After the last scheduling pass, fill in NOPs. It's easier to do this
7055 here than while scheduling. */
7056
7057 static void
ia64_emit_nops()7058 ia64_emit_nops ()
7059 {
7060 rtx insn;
7061 const struct bundle *b = 0;
7062 int bundle_pos = 0;
7063
7064 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
7065 {
7066 rtx pat;
7067 enum attr_type t;
7068 pat = INSN_P (insn) ? PATTERN (insn) : const0_rtx;
7069 if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER)
7070 continue;
7071 if ((GET_CODE (pat) == UNSPEC && XINT (pat, 1) == UNSPEC_BUNDLE_SELECTOR)
7072 || GET_CODE (insn) == CODE_LABEL)
7073 {
7074 if (b)
7075 while (bundle_pos < 3)
7076 {
7077 emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn);
7078 bundle_pos++;
7079 }
7080 if (GET_CODE (insn) != CODE_LABEL)
7081 b = bundle + INTVAL (XVECEXP (pat, 0, 0));
7082 else
7083 b = 0;
7084 bundle_pos = 0;
7085 continue;
7086 }
7087 else if (GET_CODE (pat) == UNSPEC_VOLATILE
7088 && XINT (pat, 1) == UNSPECV_INSN_GROUP_BARRIER)
7089 {
7090 int t = INTVAL (XVECEXP (pat, 0, 0));
7091 if (b)
7092 while (bundle_pos < t)
7093 {
7094 emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn);
7095 bundle_pos++;
7096 }
7097 continue;
7098 }
7099
7100 if (bundle_pos == 3)
7101 b = 0;
7102
7103 if (b && INSN_P (insn))
7104 {
7105 t = ia64_safe_type (insn);
7106 if (asm_noperands (PATTERN (insn)) >= 0
7107 || GET_CODE (PATTERN (insn)) == ASM_INPUT)
7108 {
7109 while (bundle_pos < 3)
7110 {
7111 if (b->t[bundle_pos] != TYPE_L)
7112 emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn);
7113 bundle_pos++;
7114 }
7115 continue;
7116 }
7117
7118 if (t == TYPE_UNKNOWN)
7119 continue;
7120 while (bundle_pos < 3)
7121 {
7122 if (t == b->t[bundle_pos]
7123 || (t == TYPE_A && (b->t[bundle_pos] == TYPE_M
7124 || b->t[bundle_pos] == TYPE_I)))
7125 break;
7126
7127 emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn);
7128 bundle_pos++;
7129 }
7130 if (bundle_pos < 3)
7131 bundle_pos++;
7132 }
7133 }
7134 }
7135
7136 /* Perform machine dependent operations on the rtl chain INSNS. */
7137
7138 void
ia64_reorg(insns)7139 ia64_reorg (insns)
7140 rtx insns;
7141 {
7142 /* We are freeing block_for_insn in the toplev to keep compatibility
7143 with old MDEP_REORGS that are not CFG based. Recompute it now. */
7144 compute_bb_for_insn ();
7145
7146 /* If optimizing, we'll have split before scheduling. */
7147 if (optimize == 0)
7148 split_all_insns (0);
7149
7150 /* ??? update_life_info_in_dirty_blocks fails to terminate during
7151 non-optimizing bootstrap. */
7152 update_life_info (NULL, UPDATE_LIFE_GLOBAL_RM_NOTES, PROP_DEATH_NOTES);
7153
7154 if (ia64_flag_schedule_insns2)
7155 {
7156 timevar_push (TV_SCHED2);
7157 ia64_final_schedule = 1;
7158 schedule_ebbs (rtl_dump_file);
7159 ia64_final_schedule = 0;
7160 timevar_pop (TV_SCHED2);
7161
7162 /* This relies on the NOTE_INSN_BASIC_BLOCK notes to be in the same
7163 place as they were during scheduling. */
7164 emit_insn_group_barriers (rtl_dump_file, insns);
7165 ia64_emit_nops ();
7166 }
7167 else
7168 emit_all_insn_group_barriers (rtl_dump_file, insns);
7169
7170 /* A call must not be the last instruction in a function, so that the
7171 return address is still within the function, so that unwinding works
7172 properly. Note that IA-64 differs from dwarf2 on this point. */
7173 if (flag_unwind_tables || (flag_exceptions && !USING_SJLJ_EXCEPTIONS))
7174 {
7175 rtx insn;
7176 int saw_stop = 0;
7177
7178 insn = get_last_insn ();
7179 if (! INSN_P (insn))
7180 insn = prev_active_insn (insn);
7181 /* Skip over insns that expand to nothing. */
7182 while (GET_CODE (insn) == INSN && get_attr_empty (insn) == EMPTY_YES)
7183 {
7184 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
7185 && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
7186 saw_stop = 1;
7187 insn = prev_active_insn (insn);
7188 }
7189 if (GET_CODE (insn) == CALL_INSN)
7190 {
7191 if (! saw_stop)
7192 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
7193 emit_insn (gen_break_f ());
7194 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
7195 }
7196 }
7197
7198 fixup_errata ();
7199 emit_predicate_relation_info ();
7200 }
7201
7202 /* Return true if REGNO is used by the epilogue. */
7203
7204 int
ia64_epilogue_uses(regno)7205 ia64_epilogue_uses (regno)
7206 int regno;
7207 {
7208 switch (regno)
7209 {
7210 case R_GR (1):
7211 /* With a call to a function in another module, we will write a new
7212 value to "gp". After returning from such a call, we need to make
7213 sure the function restores the original gp-value, even if the
7214 function itself does not use the gp anymore. */
7215 return !(TARGET_AUTO_PIC || TARGET_NO_PIC);
7216
7217 case IN_REG (0): case IN_REG (1): case IN_REG (2): case IN_REG (3):
7218 case IN_REG (4): case IN_REG (5): case IN_REG (6): case IN_REG (7):
7219 /* For functions defined with the syscall_linkage attribute, all
7220 input registers are marked as live at all function exits. This
7221 prevents the register allocator from using the input registers,
7222 which in turn makes it possible to restart a system call after
7223 an interrupt without having to save/restore the input registers.
7224 This also prevents kernel data from leaking to application code. */
7225 return lookup_attribute ("syscall_linkage",
7226 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))) != NULL;
7227
7228 case R_BR (0):
7229 /* Conditional return patterns can't represent the use of `b0' as
7230 the return address, so we force the value live this way. */
7231 return 1;
7232
7233 case AR_PFS_REGNUM:
7234 /* Likewise for ar.pfs, which is used by br.ret. */
7235 return 1;
7236
7237 default:
7238 return 0;
7239 }
7240 }
7241
7242 /* Return true if REGNO is used by the frame unwinder. */
7243
7244 int
ia64_eh_uses(regno)7245 ia64_eh_uses (regno)
7246 int regno;
7247 {
7248 if (! reload_completed)
7249 return 0;
7250
7251 if (current_frame_info.reg_save_b0
7252 && regno == current_frame_info.reg_save_b0)
7253 return 1;
7254 if (current_frame_info.reg_save_pr
7255 && regno == current_frame_info.reg_save_pr)
7256 return 1;
7257 if (current_frame_info.reg_save_ar_pfs
7258 && regno == current_frame_info.reg_save_ar_pfs)
7259 return 1;
7260 if (current_frame_info.reg_save_ar_unat
7261 && regno == current_frame_info.reg_save_ar_unat)
7262 return 1;
7263 if (current_frame_info.reg_save_ar_lc
7264 && regno == current_frame_info.reg_save_ar_lc)
7265 return 1;
7266
7267 return 0;
7268 }
7269
7270 /* For ia64, SYMBOL_REF_FLAG set means that it is a function.
7271
7272 We add @ to the name if this goes in small data/bss. We can only put
7273 a variable in small data/bss if it is defined in this module or a module
7274 that we are statically linked with. We can't check the second condition,
7275 but TREE_STATIC gives us the first one. */
7276
7277 /* ??? If we had IPA, we could check the second condition. We could support
7278 programmer added section attributes if the variable is not defined in this
7279 module. */
7280
7281 /* ??? See the v850 port for a cleaner way to do this. */
7282
7283 /* ??? We could also support own long data here. Generating movl/add/ld8
7284 instead of addl,ld8/ld8. This makes the code bigger, but should make the
7285 code faster because there is one less load. This also includes incomplete
7286 types which can't go in sdata/sbss. */
7287
7288 static bool
ia64_in_small_data_p(exp)7289 ia64_in_small_data_p (exp)
7290 tree exp;
7291 {
7292 if (TARGET_NO_SDATA)
7293 return false;
7294
7295 /* Functions are never small data. */
7296 if (TREE_CODE (exp) == FUNCTION_DECL)
7297 return false;
7298
7299 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
7300 {
7301 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
7302 if (strcmp (section, ".sdata") == 0
7303 || strcmp (section, ".sbss") == 0)
7304 return true;
7305 }
7306 else
7307 {
7308 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
7309
7310 /* If this is an incomplete type with size 0, then we can't put it
7311 in sdata because it might be too big when completed. */
7312 if (size > 0 && size <= ia64_section_threshold)
7313 return true;
7314 }
7315
7316 return false;
7317 }
7318
7319 static void
ia64_encode_section_info(decl,first)7320 ia64_encode_section_info (decl, first)
7321 tree decl;
7322 int first ATTRIBUTE_UNUSED;
7323 {
7324 const char *symbol_str;
7325 bool is_local;
7326 rtx symbol;
7327 char encoding = 0;
7328
7329 if (TREE_CODE (decl) == FUNCTION_DECL)
7330 {
7331 SYMBOL_REF_FLAG (XEXP (DECL_RTL (decl), 0)) = 1;
7332 return;
7333 }
7334
7335 /* Careful not to prod global register variables. */
7336 if (TREE_CODE (decl) != VAR_DECL
7337 || GET_CODE (DECL_RTL (decl)) != MEM
7338 || GET_CODE (XEXP (DECL_RTL (decl), 0)) != SYMBOL_REF)
7339 return;
7340
7341 symbol = XEXP (DECL_RTL (decl), 0);
7342 symbol_str = XSTR (symbol, 0);
7343
7344 is_local = (*targetm.binds_local_p) (decl);
7345
7346 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL (decl))
7347 encoding = " GLil"[decl_tls_model (decl)];
7348 /* Determine if DECL will wind up in .sdata/.sbss. */
7349 else if (is_local && ia64_in_small_data_p (decl))
7350 encoding = 's';
7351
7352 /* Finally, encode this into the symbol string. */
7353 if (encoding)
7354 {
7355 char *newstr;
7356 size_t len;
7357
7358 if (symbol_str[0] == ENCODE_SECTION_INFO_CHAR)
7359 {
7360 if (encoding == symbol_str[1])
7361 return;
7362 /* ??? Sdata became thread or thread becaome not thread. Lose. */
7363 if (encoding == 's' || symbol_str[1] == 's')
7364 abort ();
7365 }
7366
7367 len = strlen (symbol_str);
7368 newstr = alloca (len + 3);
7369 newstr[0] = ENCODE_SECTION_INFO_CHAR;
7370 newstr[1] = encoding;
7371 memcpy (newstr + 2, symbol_str, len + 1);
7372
7373 XSTR (symbol, 0) = ggc_alloc_string (newstr, len + 2);
7374 }
7375
7376 /* This decl is marked as being in small data/bss but it shouldn't be;
7377 one likely explanation for this is that the decl has been moved into
7378 a different section from the one it was in when encode_section_info
7379 was first called. Remove the encoding. */
7380 else if (symbol_str[0] == ENCODE_SECTION_INFO_CHAR)
7381 XSTR (symbol, 0) = ggc_strdup (symbol_str + 2);
7382 }
7383
7384 static const char *
ia64_strip_name_encoding(str)7385 ia64_strip_name_encoding (str)
7386 const char *str;
7387 {
7388 if (str[0] == ENCODE_SECTION_INFO_CHAR)
7389 str += 2;
7390 if (str[0] == '*')
7391 str++;
7392 return str;
7393 }
7394
7395 /* True if it is OK to do sibling call optimization for the specified
7396 call expression EXP. DECL will be the called function, or NULL if
7397 this is an indirect call. */
7398 bool
ia64_function_ok_for_sibcall(decl)7399 ia64_function_ok_for_sibcall (decl)
7400 tree decl;
7401 {
7402 /* We can't perform a sibcall if the current function has the syscall_linkage
7403 attribute. */
7404 if (lookup_attribute ("syscall_linkage",
7405 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
7406 return false;
7407
7408 /* We must always return with our current GP. This means we can
7409 only sibcall to functions defined in the current module. */
7410 return decl && (*targetm.binds_local_p) (decl);
7411 }
7412
7413 /* Output assembly directives for prologue regions. */
7414
7415 /* The current basic block number. */
7416
7417 static bool last_block;
7418
7419 /* True if we need a copy_state command at the start of the next block. */
7420
7421 static bool need_copy_state;
7422
7423 /* The function emits unwind directives for the start of an epilogue. */
7424
7425 static void
process_epilogue()7426 process_epilogue ()
7427 {
7428 /* If this isn't the last block of the function, then we need to label the
7429 current state, and copy it back in at the start of the next block. */
7430
7431 if (!last_block)
7432 {
7433 fprintf (asm_out_file, "\t.label_state 1\n");
7434 need_copy_state = true;
7435 }
7436
7437 fprintf (asm_out_file, "\t.restore sp\n");
7438 }
7439
7440 /* This function processes a SET pattern looking for specific patterns
7441 which result in emitting an assembly directive required for unwinding. */
7442
7443 static int
process_set(asm_out_file,pat)7444 process_set (asm_out_file, pat)
7445 FILE *asm_out_file;
7446 rtx pat;
7447 {
7448 rtx src = SET_SRC (pat);
7449 rtx dest = SET_DEST (pat);
7450 int src_regno, dest_regno;
7451
7452 /* Look for the ALLOC insn. */
7453 if (GET_CODE (src) == UNSPEC_VOLATILE
7454 && XINT (src, 1) == UNSPECV_ALLOC
7455 && GET_CODE (dest) == REG)
7456 {
7457 dest_regno = REGNO (dest);
7458
7459 /* If this is the final destination for ar.pfs, then this must
7460 be the alloc in the prologue. */
7461 if (dest_regno == current_frame_info.reg_save_ar_pfs)
7462 fprintf (asm_out_file, "\t.save ar.pfs, r%d\n",
7463 ia64_dbx_register_number (dest_regno));
7464 else
7465 {
7466 /* This must be an alloc before a sibcall. We must drop the
7467 old frame info. The easiest way to drop the old frame
7468 info is to ensure we had a ".restore sp" directive
7469 followed by a new prologue. If the procedure doesn't
7470 have a memory-stack frame, we'll issue a dummy ".restore
7471 sp" now. */
7472 if (current_frame_info.total_size == 0 && !frame_pointer_needed)
7473 /* if haven't done process_epilogue() yet, do it now */
7474 process_epilogue ();
7475 fprintf (asm_out_file, "\t.prologue\n");
7476 }
7477 return 1;
7478 }
7479
7480 /* Look for SP = .... */
7481 if (GET_CODE (dest) == REG && REGNO (dest) == STACK_POINTER_REGNUM)
7482 {
7483 if (GET_CODE (src) == PLUS)
7484 {
7485 rtx op0 = XEXP (src, 0);
7486 rtx op1 = XEXP (src, 1);
7487 if (op0 == dest && GET_CODE (op1) == CONST_INT)
7488 {
7489 if (INTVAL (op1) < 0)
7490 {
7491 fputs ("\t.fframe ", asm_out_file);
7492 fprintf (asm_out_file, HOST_WIDE_INT_PRINT_DEC,
7493 -INTVAL (op1));
7494 fputc ('\n', asm_out_file);
7495 }
7496 else
7497 process_epilogue ();
7498 }
7499 else
7500 abort ();
7501 }
7502 else if (GET_CODE (src) == REG
7503 && REGNO (src) == HARD_FRAME_POINTER_REGNUM)
7504 process_epilogue ();
7505 else
7506 abort ();
7507
7508 return 1;
7509 }
7510
7511 /* Register move we need to look at. */
7512 if (GET_CODE (dest) == REG && GET_CODE (src) == REG)
7513 {
7514 src_regno = REGNO (src);
7515 dest_regno = REGNO (dest);
7516
7517 switch (src_regno)
7518 {
7519 case BR_REG (0):
7520 /* Saving return address pointer. */
7521 if (dest_regno != current_frame_info.reg_save_b0)
7522 abort ();
7523 fprintf (asm_out_file, "\t.save rp, r%d\n",
7524 ia64_dbx_register_number (dest_regno));
7525 return 1;
7526
7527 case PR_REG (0):
7528 if (dest_regno != current_frame_info.reg_save_pr)
7529 abort ();
7530 fprintf (asm_out_file, "\t.save pr, r%d\n",
7531 ia64_dbx_register_number (dest_regno));
7532 return 1;
7533
7534 case AR_UNAT_REGNUM:
7535 if (dest_regno != current_frame_info.reg_save_ar_unat)
7536 abort ();
7537 fprintf (asm_out_file, "\t.save ar.unat, r%d\n",
7538 ia64_dbx_register_number (dest_regno));
7539 return 1;
7540
7541 case AR_LC_REGNUM:
7542 if (dest_regno != current_frame_info.reg_save_ar_lc)
7543 abort ();
7544 fprintf (asm_out_file, "\t.save ar.lc, r%d\n",
7545 ia64_dbx_register_number (dest_regno));
7546 return 1;
7547
7548 case STACK_POINTER_REGNUM:
7549 if (dest_regno != HARD_FRAME_POINTER_REGNUM
7550 || ! frame_pointer_needed)
7551 abort ();
7552 fprintf (asm_out_file, "\t.vframe r%d\n",
7553 ia64_dbx_register_number (dest_regno));
7554 return 1;
7555
7556 default:
7557 /* Everything else should indicate being stored to memory. */
7558 abort ();
7559 }
7560 }
7561
7562 /* Memory store we need to look at. */
7563 if (GET_CODE (dest) == MEM && GET_CODE (src) == REG)
7564 {
7565 long off;
7566 rtx base;
7567 const char *saveop;
7568
7569 if (GET_CODE (XEXP (dest, 0)) == REG)
7570 {
7571 base = XEXP (dest, 0);
7572 off = 0;
7573 }
7574 else if (GET_CODE (XEXP (dest, 0)) == PLUS
7575 && GET_CODE (XEXP (XEXP (dest, 0), 1)) == CONST_INT)
7576 {
7577 base = XEXP (XEXP (dest, 0), 0);
7578 off = INTVAL (XEXP (XEXP (dest, 0), 1));
7579 }
7580 else
7581 abort ();
7582
7583 if (base == hard_frame_pointer_rtx)
7584 {
7585 saveop = ".savepsp";
7586 off = - off;
7587 }
7588 else if (base == stack_pointer_rtx)
7589 saveop = ".savesp";
7590 else
7591 abort ();
7592
7593 src_regno = REGNO (src);
7594 switch (src_regno)
7595 {
7596 case BR_REG (0):
7597 if (current_frame_info.reg_save_b0 != 0)
7598 abort ();
7599 fprintf (asm_out_file, "\t%s rp, %ld\n", saveop, off);
7600 return 1;
7601
7602 case PR_REG (0):
7603 if (current_frame_info.reg_save_pr != 0)
7604 abort ();
7605 fprintf (asm_out_file, "\t%s pr, %ld\n", saveop, off);
7606 return 1;
7607
7608 case AR_LC_REGNUM:
7609 if (current_frame_info.reg_save_ar_lc != 0)
7610 abort ();
7611 fprintf (asm_out_file, "\t%s ar.lc, %ld\n", saveop, off);
7612 return 1;
7613
7614 case AR_PFS_REGNUM:
7615 if (current_frame_info.reg_save_ar_pfs != 0)
7616 abort ();
7617 fprintf (asm_out_file, "\t%s ar.pfs, %ld\n", saveop, off);
7618 return 1;
7619
7620 case AR_UNAT_REGNUM:
7621 if (current_frame_info.reg_save_ar_unat != 0)
7622 abort ();
7623 fprintf (asm_out_file, "\t%s ar.unat, %ld\n", saveop, off);
7624 return 1;
7625
7626 case GR_REG (4):
7627 case GR_REG (5):
7628 case GR_REG (6):
7629 case GR_REG (7):
7630 fprintf (asm_out_file, "\t.save.g 0x%x\n",
7631 1 << (src_regno - GR_REG (4)));
7632 return 1;
7633
7634 case BR_REG (1):
7635 case BR_REG (2):
7636 case BR_REG (3):
7637 case BR_REG (4):
7638 case BR_REG (5):
7639 fprintf (asm_out_file, "\t.save.b 0x%x\n",
7640 1 << (src_regno - BR_REG (1)));
7641 return 1;
7642
7643 case FR_REG (2):
7644 case FR_REG (3):
7645 case FR_REG (4):
7646 case FR_REG (5):
7647 fprintf (asm_out_file, "\t.save.f 0x%x\n",
7648 1 << (src_regno - FR_REG (2)));
7649 return 1;
7650
7651 case FR_REG (16): case FR_REG (17): case FR_REG (18): case FR_REG (19):
7652 case FR_REG (20): case FR_REG (21): case FR_REG (22): case FR_REG (23):
7653 case FR_REG (24): case FR_REG (25): case FR_REG (26): case FR_REG (27):
7654 case FR_REG (28): case FR_REG (29): case FR_REG (30): case FR_REG (31):
7655 fprintf (asm_out_file, "\t.save.gf 0x0, 0x%x\n",
7656 1 << (src_regno - FR_REG (12)));
7657 return 1;
7658
7659 default:
7660 return 0;
7661 }
7662 }
7663
7664 return 0;
7665 }
7666
7667
7668 /* This function looks at a single insn and emits any directives
7669 required to unwind this insn. */
7670 void
process_for_unwind_directive(asm_out_file,insn)7671 process_for_unwind_directive (asm_out_file, insn)
7672 FILE *asm_out_file;
7673 rtx insn;
7674 {
7675 if (flag_unwind_tables
7676 || (flag_exceptions && !USING_SJLJ_EXCEPTIONS))
7677 {
7678 rtx pat;
7679
7680 if (GET_CODE (insn) == NOTE
7681 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK)
7682 {
7683 last_block = NOTE_BASIC_BLOCK (insn)->next_bb == EXIT_BLOCK_PTR;
7684
7685 /* Restore unwind state from immediately before the epilogue. */
7686 if (need_copy_state)
7687 {
7688 fprintf (asm_out_file, "\t.body\n");
7689 fprintf (asm_out_file, "\t.copy_state 1\n");
7690 need_copy_state = false;
7691 }
7692 }
7693
7694 if (GET_CODE (insn) == NOTE || ! RTX_FRAME_RELATED_P (insn))
7695 return;
7696
7697 pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
7698 if (pat)
7699 pat = XEXP (pat, 0);
7700 else
7701 pat = PATTERN (insn);
7702
7703 switch (GET_CODE (pat))
7704 {
7705 case SET:
7706 process_set (asm_out_file, pat);
7707 break;
7708
7709 case PARALLEL:
7710 {
7711 int par_index;
7712 int limit = XVECLEN (pat, 0);
7713 for (par_index = 0; par_index < limit; par_index++)
7714 {
7715 rtx x = XVECEXP (pat, 0, par_index);
7716 if (GET_CODE (x) == SET)
7717 process_set (asm_out_file, x);
7718 }
7719 break;
7720 }
7721
7722 default:
7723 abort ();
7724 }
7725 }
7726 }
7727
7728
7729 void
ia64_init_builtins()7730 ia64_init_builtins ()
7731 {
7732 tree psi_type_node = build_pointer_type (integer_type_node);
7733 tree pdi_type_node = build_pointer_type (long_integer_type_node);
7734
7735 /* __sync_val_compare_and_swap_si, __sync_bool_compare_and_swap_si */
7736 tree si_ftype_psi_si_si
7737 = build_function_type_list (integer_type_node,
7738 psi_type_node, integer_type_node,
7739 integer_type_node, NULL_TREE);
7740
7741 /* __sync_val_compare_and_swap_di */
7742 tree di_ftype_pdi_di_di
7743 = build_function_type_list (long_integer_type_node,
7744 pdi_type_node, long_integer_type_node,
7745 long_integer_type_node, NULL_TREE);
7746 /* __sync_bool_compare_and_swap_di */
7747 tree si_ftype_pdi_di_di
7748 = build_function_type_list (integer_type_node,
7749 pdi_type_node, long_integer_type_node,
7750 long_integer_type_node, NULL_TREE);
7751 /* __sync_synchronize */
7752 tree void_ftype_void
7753 = build_function_type (void_type_node, void_list_node);
7754
7755 /* __sync_lock_test_and_set_si */
7756 tree si_ftype_psi_si
7757 = build_function_type_list (integer_type_node,
7758 psi_type_node, integer_type_node, NULL_TREE);
7759
7760 /* __sync_lock_test_and_set_di */
7761 tree di_ftype_pdi_di
7762 = build_function_type_list (long_integer_type_node,
7763 pdi_type_node, long_integer_type_node,
7764 NULL_TREE);
7765
7766 /* __sync_lock_release_si */
7767 tree void_ftype_psi
7768 = build_function_type_list (void_type_node, psi_type_node, NULL_TREE);
7769
7770 /* __sync_lock_release_di */
7771 tree void_ftype_pdi
7772 = build_function_type_list (void_type_node, pdi_type_node, NULL_TREE);
7773
7774 #define def_builtin(name, type, code) \
7775 builtin_function ((name), (type), (code), BUILT_IN_MD, NULL, NULL_TREE)
7776
7777 def_builtin ("__sync_val_compare_and_swap_si", si_ftype_psi_si_si,
7778 IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI);
7779 def_builtin ("__sync_val_compare_and_swap_di", di_ftype_pdi_di_di,
7780 IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI);
7781 def_builtin ("__sync_bool_compare_and_swap_si", si_ftype_psi_si_si,
7782 IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI);
7783 def_builtin ("__sync_bool_compare_and_swap_di", si_ftype_pdi_di_di,
7784 IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI);
7785
7786 def_builtin ("__sync_synchronize", void_ftype_void,
7787 IA64_BUILTIN_SYNCHRONIZE);
7788
7789 def_builtin ("__sync_lock_test_and_set_si", si_ftype_psi_si,
7790 IA64_BUILTIN_LOCK_TEST_AND_SET_SI);
7791 def_builtin ("__sync_lock_test_and_set_di", di_ftype_pdi_di,
7792 IA64_BUILTIN_LOCK_TEST_AND_SET_DI);
7793 def_builtin ("__sync_lock_release_si", void_ftype_psi,
7794 IA64_BUILTIN_LOCK_RELEASE_SI);
7795 def_builtin ("__sync_lock_release_di", void_ftype_pdi,
7796 IA64_BUILTIN_LOCK_RELEASE_DI);
7797
7798 def_builtin ("__builtin_ia64_bsp",
7799 build_function_type (ptr_type_node, void_list_node),
7800 IA64_BUILTIN_BSP);
7801
7802 def_builtin ("__builtin_ia64_flushrs",
7803 build_function_type (void_type_node, void_list_node),
7804 IA64_BUILTIN_FLUSHRS);
7805
7806 def_builtin ("__sync_fetch_and_add_si", si_ftype_psi_si,
7807 IA64_BUILTIN_FETCH_AND_ADD_SI);
7808 def_builtin ("__sync_fetch_and_sub_si", si_ftype_psi_si,
7809 IA64_BUILTIN_FETCH_AND_SUB_SI);
7810 def_builtin ("__sync_fetch_and_or_si", si_ftype_psi_si,
7811 IA64_BUILTIN_FETCH_AND_OR_SI);
7812 def_builtin ("__sync_fetch_and_and_si", si_ftype_psi_si,
7813 IA64_BUILTIN_FETCH_AND_AND_SI);
7814 def_builtin ("__sync_fetch_and_xor_si", si_ftype_psi_si,
7815 IA64_BUILTIN_FETCH_AND_XOR_SI);
7816 def_builtin ("__sync_fetch_and_nand_si", si_ftype_psi_si,
7817 IA64_BUILTIN_FETCH_AND_NAND_SI);
7818
7819 def_builtin ("__sync_add_and_fetch_si", si_ftype_psi_si,
7820 IA64_BUILTIN_ADD_AND_FETCH_SI);
7821 def_builtin ("__sync_sub_and_fetch_si", si_ftype_psi_si,
7822 IA64_BUILTIN_SUB_AND_FETCH_SI);
7823 def_builtin ("__sync_or_and_fetch_si", si_ftype_psi_si,
7824 IA64_BUILTIN_OR_AND_FETCH_SI);
7825 def_builtin ("__sync_and_and_fetch_si", si_ftype_psi_si,
7826 IA64_BUILTIN_AND_AND_FETCH_SI);
7827 def_builtin ("__sync_xor_and_fetch_si", si_ftype_psi_si,
7828 IA64_BUILTIN_XOR_AND_FETCH_SI);
7829 def_builtin ("__sync_nand_and_fetch_si", si_ftype_psi_si,
7830 IA64_BUILTIN_NAND_AND_FETCH_SI);
7831
7832 def_builtin ("__sync_fetch_and_add_di", di_ftype_pdi_di,
7833 IA64_BUILTIN_FETCH_AND_ADD_DI);
7834 def_builtin ("__sync_fetch_and_sub_di", di_ftype_pdi_di,
7835 IA64_BUILTIN_FETCH_AND_SUB_DI);
7836 def_builtin ("__sync_fetch_and_or_di", di_ftype_pdi_di,
7837 IA64_BUILTIN_FETCH_AND_OR_DI);
7838 def_builtin ("__sync_fetch_and_and_di", di_ftype_pdi_di,
7839 IA64_BUILTIN_FETCH_AND_AND_DI);
7840 def_builtin ("__sync_fetch_and_xor_di", di_ftype_pdi_di,
7841 IA64_BUILTIN_FETCH_AND_XOR_DI);
7842 def_builtin ("__sync_fetch_and_nand_di", di_ftype_pdi_di,
7843 IA64_BUILTIN_FETCH_AND_NAND_DI);
7844
7845 def_builtin ("__sync_add_and_fetch_di", di_ftype_pdi_di,
7846 IA64_BUILTIN_ADD_AND_FETCH_DI);
7847 def_builtin ("__sync_sub_and_fetch_di", di_ftype_pdi_di,
7848 IA64_BUILTIN_SUB_AND_FETCH_DI);
7849 def_builtin ("__sync_or_and_fetch_di", di_ftype_pdi_di,
7850 IA64_BUILTIN_OR_AND_FETCH_DI);
7851 def_builtin ("__sync_and_and_fetch_di", di_ftype_pdi_di,
7852 IA64_BUILTIN_AND_AND_FETCH_DI);
7853 def_builtin ("__sync_xor_and_fetch_di", di_ftype_pdi_di,
7854 IA64_BUILTIN_XOR_AND_FETCH_DI);
7855 def_builtin ("__sync_nand_and_fetch_di", di_ftype_pdi_di,
7856 IA64_BUILTIN_NAND_AND_FETCH_DI);
7857
7858 #undef def_builtin
7859 }
7860
7861 /* Expand fetch_and_op intrinsics. The basic code sequence is:
7862
7863 mf
7864 tmp = [ptr];
7865 do {
7866 ret = tmp;
7867 ar.ccv = tmp;
7868 tmp <op>= value;
7869 cmpxchgsz.acq tmp = [ptr], tmp
7870 } while (tmp != ret)
7871 */
7872
7873 static rtx
ia64_expand_fetch_and_op(binoptab,mode,arglist,target)7874 ia64_expand_fetch_and_op (binoptab, mode, arglist, target)
7875 optab binoptab;
7876 enum machine_mode mode;
7877 tree arglist;
7878 rtx target;
7879 {
7880 rtx ret, label, tmp, ccv, insn, mem, value;
7881 tree arg0, arg1;
7882
7883 arg0 = TREE_VALUE (arglist);
7884 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7885 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
7886 #ifdef POINTERS_EXTEND_UNSIGNED
7887 if (GET_MODE(mem) != Pmode)
7888 mem = convert_memory_address (Pmode, mem);
7889 #endif
7890 value = expand_expr (arg1, NULL_RTX, mode, 0);
7891
7892 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
7893 MEM_VOLATILE_P (mem) = 1;
7894
7895 if (target && register_operand (target, mode))
7896 ret = target;
7897 else
7898 ret = gen_reg_rtx (mode);
7899
7900 emit_insn (gen_mf ());
7901
7902 /* Special case for fetchadd instructions. */
7903 if (binoptab == add_optab && fetchadd_operand (value, VOIDmode))
7904 {
7905 if (mode == SImode)
7906 insn = gen_fetchadd_acq_si (ret, mem, value);
7907 else
7908 insn = gen_fetchadd_acq_di (ret, mem, value);
7909 emit_insn (insn);
7910 return ret;
7911 }
7912
7913 tmp = gen_reg_rtx (mode);
7914 /* ar.ccv must always be loaded with a zero-extended DImode value. */
7915 ccv = gen_rtx_REG (DImode, AR_CCV_REGNUM);
7916 emit_move_insn (tmp, mem);
7917
7918 label = gen_label_rtx ();
7919 emit_label (label);
7920 emit_move_insn (ret, tmp);
7921 convert_move (ccv, tmp, /*unsignedp=*/1);
7922
7923 /* Perform the specific operation. Special case NAND by noticing
7924 one_cmpl_optab instead. */
7925 if (binoptab == one_cmpl_optab)
7926 {
7927 tmp = expand_unop (mode, binoptab, tmp, NULL, OPTAB_WIDEN);
7928 binoptab = and_optab;
7929 }
7930 tmp = expand_binop (mode, binoptab, tmp, value, tmp, 1, OPTAB_WIDEN);
7931
7932 if (mode == SImode)
7933 insn = gen_cmpxchg_acq_si (tmp, mem, tmp, ccv);
7934 else
7935 insn = gen_cmpxchg_acq_di (tmp, mem, tmp, ccv);
7936 emit_insn (insn);
7937
7938 emit_cmp_and_jump_insns (tmp, ret, NE, 0, mode, 1, label);
7939
7940 return ret;
7941 }
7942
7943 /* Expand op_and_fetch intrinsics. The basic code sequence is:
7944
7945 mf
7946 tmp = [ptr];
7947 do {
7948 old = tmp;
7949 ar.ccv = tmp;
7950 ret = tmp <op> value;
7951 cmpxchgsz.acq tmp = [ptr], ret
7952 } while (tmp != old)
7953 */
7954
7955 static rtx
ia64_expand_op_and_fetch(binoptab,mode,arglist,target)7956 ia64_expand_op_and_fetch (binoptab, mode, arglist, target)
7957 optab binoptab;
7958 enum machine_mode mode;
7959 tree arglist;
7960 rtx target;
7961 {
7962 rtx old, label, tmp, ret, ccv, insn, mem, value;
7963 tree arg0, arg1;
7964
7965 arg0 = TREE_VALUE (arglist);
7966 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7967 mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
7968 #ifdef POINTERS_EXTEND_UNSIGNED
7969 if (GET_MODE(mem) != Pmode)
7970 mem = convert_memory_address (Pmode, mem);
7971 #endif
7972
7973 value = expand_expr (arg1, NULL_RTX, mode, 0);
7974
7975 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
7976 MEM_VOLATILE_P (mem) = 1;
7977
7978 if (target && ! register_operand (target, mode))
7979 target = NULL_RTX;
7980
7981 emit_insn (gen_mf ());
7982 tmp = gen_reg_rtx (mode);
7983 old = gen_reg_rtx (mode);
7984 /* ar.ccv must always be loaded with a zero-extended DImode value. */
7985 ccv = gen_rtx_REG (DImode, AR_CCV_REGNUM);
7986
7987 emit_move_insn (tmp, mem);
7988
7989 label = gen_label_rtx ();
7990 emit_label (label);
7991 emit_move_insn (old, tmp);
7992 convert_move (ccv, tmp, /*unsignedp=*/1);
7993
7994 /* Perform the specific operation. Special case NAND by noticing
7995 one_cmpl_optab instead. */
7996 if (binoptab == one_cmpl_optab)
7997 {
7998 tmp = expand_unop (mode, binoptab, tmp, NULL, OPTAB_WIDEN);
7999 binoptab = and_optab;
8000 }
8001 ret = expand_binop (mode, binoptab, tmp, value, target, 1, OPTAB_WIDEN);
8002
8003 if (mode == SImode)
8004 insn = gen_cmpxchg_acq_si (tmp, mem, ret, ccv);
8005 else
8006 insn = gen_cmpxchg_acq_di (tmp, mem, ret, ccv);
8007 emit_insn (insn);
8008
8009 emit_cmp_and_jump_insns (tmp, old, NE, 0, mode, 1, label);
8010
8011 return ret;
8012 }
8013
8014 /* Expand val_ and bool_compare_and_swap. For val_ we want:
8015
8016 ar.ccv = oldval
8017 mf
8018 cmpxchgsz.acq ret = [ptr], newval, ar.ccv
8019 return ret
8020
8021 For bool_ it's the same except return ret == oldval.
8022 */
8023
8024 static rtx
ia64_expand_compare_and_swap(rmode,mode,boolp,arglist,target)8025 ia64_expand_compare_and_swap (rmode, mode, boolp, arglist, target)
8026 enum machine_mode rmode;
8027 enum machine_mode mode;
8028 int boolp;
8029 tree arglist;
8030 rtx target;
8031 {
8032 tree arg0, arg1, arg2;
8033 rtx mem, old, new, ccv, tmp, insn;
8034
8035 arg0 = TREE_VALUE (arglist);
8036 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8037 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
8038 mem = expand_expr (arg0, NULL_RTX, ptr_mode, 0);
8039 old = expand_expr (arg1, NULL_RTX, mode, 0);
8040 new = expand_expr (arg2, NULL_RTX, mode, 0);
8041
8042 mem = gen_rtx_MEM (mode, force_reg (ptr_mode, mem));
8043 MEM_VOLATILE_P (mem) = 1;
8044
8045 if (GET_MODE (old) != mode)
8046 old = convert_to_mode (mode, old, /*unsignedp=*/1);
8047 if (GET_MODE (new) != mode)
8048 new = convert_to_mode (mode, new, /*unsignedp=*/1);
8049
8050 if (! register_operand (old, mode))
8051 old = copy_to_mode_reg (mode, old);
8052 if (! register_operand (new, mode))
8053 new = copy_to_mode_reg (mode, new);
8054
8055 if (! boolp && target && register_operand (target, mode))
8056 tmp = target;
8057 else
8058 tmp = gen_reg_rtx (mode);
8059
8060 ccv = gen_rtx_REG (DImode, AR_CCV_REGNUM);
8061 convert_move (ccv, old, /*unsignedp=*/1);
8062 emit_insn (gen_mf ());
8063 if (mode == SImode)
8064 insn = gen_cmpxchg_acq_si (tmp, mem, new, ccv);
8065 else
8066 insn = gen_cmpxchg_acq_di (tmp, mem, new, ccv);
8067 emit_insn (insn);
8068
8069 if (boolp)
8070 {
8071 if (! target)
8072 target = gen_reg_rtx (rmode);
8073 return emit_store_flag_force (target, EQ, tmp, old, mode, 1, 1);
8074 }
8075 else
8076 return tmp;
8077 }
8078
8079 /* Expand lock_test_and_set. I.e. `xchgsz ret = [ptr], new'. */
8080
8081 static rtx
ia64_expand_lock_test_and_set(mode,arglist,target)8082 ia64_expand_lock_test_and_set (mode, arglist, target)
8083 enum machine_mode mode;
8084 tree arglist;
8085 rtx target;
8086 {
8087 tree arg0, arg1;
8088 rtx mem, new, ret, insn;
8089
8090 arg0 = TREE_VALUE (arglist);
8091 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8092 mem = expand_expr (arg0, NULL_RTX, ptr_mode, 0);
8093 new = expand_expr (arg1, NULL_RTX, mode, 0);
8094
8095 mem = gen_rtx_MEM (mode, force_reg (ptr_mode, mem));
8096 MEM_VOLATILE_P (mem) = 1;
8097 if (! register_operand (new, mode))
8098 new = copy_to_mode_reg (mode, new);
8099
8100 if (target && register_operand (target, mode))
8101 ret = target;
8102 else
8103 ret = gen_reg_rtx (mode);
8104
8105 if (mode == SImode)
8106 insn = gen_xchgsi (ret, mem, new);
8107 else
8108 insn = gen_xchgdi (ret, mem, new);
8109 emit_insn (insn);
8110
8111 return ret;
8112 }
8113
8114 /* Expand lock_release. I.e. `stsz.rel [ptr] = r0'. */
8115
8116 static rtx
ia64_expand_lock_release(mode,arglist,target)8117 ia64_expand_lock_release (mode, arglist, target)
8118 enum machine_mode mode;
8119 tree arglist;
8120 rtx target ATTRIBUTE_UNUSED;
8121 {
8122 tree arg0;
8123 rtx mem;
8124
8125 arg0 = TREE_VALUE (arglist);
8126 mem = expand_expr (arg0, NULL_RTX, ptr_mode, 0);
8127
8128 mem = gen_rtx_MEM (mode, force_reg (ptr_mode, mem));
8129 MEM_VOLATILE_P (mem) = 1;
8130
8131 emit_move_insn (mem, const0_rtx);
8132
8133 return const0_rtx;
8134 }
8135
8136 rtx
ia64_expand_builtin(exp,target,subtarget,mode,ignore)8137 ia64_expand_builtin (exp, target, subtarget, mode, ignore)
8138 tree exp;
8139 rtx target;
8140 rtx subtarget ATTRIBUTE_UNUSED;
8141 enum machine_mode mode ATTRIBUTE_UNUSED;
8142 int ignore ATTRIBUTE_UNUSED;
8143 {
8144 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
8145 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
8146 tree arglist = TREE_OPERAND (exp, 1);
8147 enum machine_mode rmode = VOIDmode;
8148
8149 switch (fcode)
8150 {
8151 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI:
8152 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI:
8153 mode = SImode;
8154 rmode = SImode;
8155 break;
8156
8157 case IA64_BUILTIN_LOCK_TEST_AND_SET_SI:
8158 case IA64_BUILTIN_LOCK_RELEASE_SI:
8159 case IA64_BUILTIN_FETCH_AND_ADD_SI:
8160 case IA64_BUILTIN_FETCH_AND_SUB_SI:
8161 case IA64_BUILTIN_FETCH_AND_OR_SI:
8162 case IA64_BUILTIN_FETCH_AND_AND_SI:
8163 case IA64_BUILTIN_FETCH_AND_XOR_SI:
8164 case IA64_BUILTIN_FETCH_AND_NAND_SI:
8165 case IA64_BUILTIN_ADD_AND_FETCH_SI:
8166 case IA64_BUILTIN_SUB_AND_FETCH_SI:
8167 case IA64_BUILTIN_OR_AND_FETCH_SI:
8168 case IA64_BUILTIN_AND_AND_FETCH_SI:
8169 case IA64_BUILTIN_XOR_AND_FETCH_SI:
8170 case IA64_BUILTIN_NAND_AND_FETCH_SI:
8171 mode = SImode;
8172 break;
8173
8174 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI:
8175 mode = DImode;
8176 rmode = SImode;
8177 break;
8178
8179 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI:
8180 mode = DImode;
8181 rmode = DImode;
8182 break;
8183
8184 case IA64_BUILTIN_LOCK_TEST_AND_SET_DI:
8185 case IA64_BUILTIN_LOCK_RELEASE_DI:
8186 case IA64_BUILTIN_FETCH_AND_ADD_DI:
8187 case IA64_BUILTIN_FETCH_AND_SUB_DI:
8188 case IA64_BUILTIN_FETCH_AND_OR_DI:
8189 case IA64_BUILTIN_FETCH_AND_AND_DI:
8190 case IA64_BUILTIN_FETCH_AND_XOR_DI:
8191 case IA64_BUILTIN_FETCH_AND_NAND_DI:
8192 case IA64_BUILTIN_ADD_AND_FETCH_DI:
8193 case IA64_BUILTIN_SUB_AND_FETCH_DI:
8194 case IA64_BUILTIN_OR_AND_FETCH_DI:
8195 case IA64_BUILTIN_AND_AND_FETCH_DI:
8196 case IA64_BUILTIN_XOR_AND_FETCH_DI:
8197 case IA64_BUILTIN_NAND_AND_FETCH_DI:
8198 mode = DImode;
8199 break;
8200
8201 default:
8202 break;
8203 }
8204
8205 switch (fcode)
8206 {
8207 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI:
8208 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI:
8209 return ia64_expand_compare_and_swap (rmode, mode, 1, arglist,
8210 target);
8211
8212 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI:
8213 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI:
8214 return ia64_expand_compare_and_swap (rmode, mode, 0, arglist,
8215 target);
8216
8217 case IA64_BUILTIN_SYNCHRONIZE:
8218 emit_insn (gen_mf ());
8219 return const0_rtx;
8220
8221 case IA64_BUILTIN_LOCK_TEST_AND_SET_SI:
8222 case IA64_BUILTIN_LOCK_TEST_AND_SET_DI:
8223 return ia64_expand_lock_test_and_set (mode, arglist, target);
8224
8225 case IA64_BUILTIN_LOCK_RELEASE_SI:
8226 case IA64_BUILTIN_LOCK_RELEASE_DI:
8227 return ia64_expand_lock_release (mode, arglist, target);
8228
8229 case IA64_BUILTIN_BSP:
8230 if (! target || ! register_operand (target, DImode))
8231 target = gen_reg_rtx (DImode);
8232 emit_insn (gen_bsp_value (target));
8233 return target;
8234
8235 case IA64_BUILTIN_FLUSHRS:
8236 emit_insn (gen_flushrs ());
8237 return const0_rtx;
8238
8239 case IA64_BUILTIN_FETCH_AND_ADD_SI:
8240 case IA64_BUILTIN_FETCH_AND_ADD_DI:
8241 return ia64_expand_fetch_and_op (add_optab, mode, arglist, target);
8242
8243 case IA64_BUILTIN_FETCH_AND_SUB_SI:
8244 case IA64_BUILTIN_FETCH_AND_SUB_DI:
8245 return ia64_expand_fetch_and_op (sub_optab, mode, arglist, target);
8246
8247 case IA64_BUILTIN_FETCH_AND_OR_SI:
8248 case IA64_BUILTIN_FETCH_AND_OR_DI:
8249 return ia64_expand_fetch_and_op (ior_optab, mode, arglist, target);
8250
8251 case IA64_BUILTIN_FETCH_AND_AND_SI:
8252 case IA64_BUILTIN_FETCH_AND_AND_DI:
8253 return ia64_expand_fetch_and_op (and_optab, mode, arglist, target);
8254
8255 case IA64_BUILTIN_FETCH_AND_XOR_SI:
8256 case IA64_BUILTIN_FETCH_AND_XOR_DI:
8257 return ia64_expand_fetch_and_op (xor_optab, mode, arglist, target);
8258
8259 case IA64_BUILTIN_FETCH_AND_NAND_SI:
8260 case IA64_BUILTIN_FETCH_AND_NAND_DI:
8261 return ia64_expand_fetch_and_op (one_cmpl_optab, mode, arglist, target);
8262
8263 case IA64_BUILTIN_ADD_AND_FETCH_SI:
8264 case IA64_BUILTIN_ADD_AND_FETCH_DI:
8265 return ia64_expand_op_and_fetch (add_optab, mode, arglist, target);
8266
8267 case IA64_BUILTIN_SUB_AND_FETCH_SI:
8268 case IA64_BUILTIN_SUB_AND_FETCH_DI:
8269 return ia64_expand_op_and_fetch (sub_optab, mode, arglist, target);
8270
8271 case IA64_BUILTIN_OR_AND_FETCH_SI:
8272 case IA64_BUILTIN_OR_AND_FETCH_DI:
8273 return ia64_expand_op_and_fetch (ior_optab, mode, arglist, target);
8274
8275 case IA64_BUILTIN_AND_AND_FETCH_SI:
8276 case IA64_BUILTIN_AND_AND_FETCH_DI:
8277 return ia64_expand_op_and_fetch (and_optab, mode, arglist, target);
8278
8279 case IA64_BUILTIN_XOR_AND_FETCH_SI:
8280 case IA64_BUILTIN_XOR_AND_FETCH_DI:
8281 return ia64_expand_op_and_fetch (xor_optab, mode, arglist, target);
8282
8283 case IA64_BUILTIN_NAND_AND_FETCH_SI:
8284 case IA64_BUILTIN_NAND_AND_FETCH_DI:
8285 return ia64_expand_op_and_fetch (one_cmpl_optab, mode, arglist, target);
8286
8287 default:
8288 break;
8289 }
8290
8291 return NULL_RTX;
8292 }
8293
8294 /* For the HP-UX IA64 aggregate parameters are passed stored in the
8295 most significant bits of the stack slot. */
8296
8297 enum direction
ia64_hpux_function_arg_padding(mode,type)8298 ia64_hpux_function_arg_padding (mode, type)
8299 enum machine_mode mode;
8300 tree type;
8301 {
8302 /* Exception to normal case for structures/unions/etc. */
8303
8304 if (type && AGGREGATE_TYPE_P (type)
8305 && int_size_in_bytes (type) < UNITS_PER_WORD)
8306 return upward;
8307
8308 /* This is the standard FUNCTION_ARG_PADDING with !BYTES_BIG_ENDIAN
8309 hardwired to be true. */
8310
8311 return((mode == BLKmode
8312 ? (type && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
8313 && int_size_in_bytes (type) < (PARM_BOUNDARY / BITS_PER_UNIT))
8314 : GET_MODE_BITSIZE (mode) < PARM_BOUNDARY)
8315 ? downward : upward);
8316 }
8317
8318 /* Linked list of all external functions that are to be emitted by GCC.
8319 We output the name if and only if TREE_SYMBOL_REFERENCED is set in
8320 order to avoid putting out names that are never really used. */
8321
8322 struct extern_func_list
8323 {
8324 struct extern_func_list *next; /* next external */
8325 char *name; /* name of the external */
8326 } *extern_func_head = 0;
8327
8328 static void
ia64_hpux_add_extern_decl(name)8329 ia64_hpux_add_extern_decl (name)
8330 const char *name;
8331 {
8332 struct extern_func_list *p;
8333
8334 p = (struct extern_func_list *) xmalloc (sizeof (struct extern_func_list));
8335 p->name = xmalloc (strlen (name) + 1);
8336 strcpy(p->name, name);
8337 p->next = extern_func_head;
8338 extern_func_head = p;
8339 }
8340
8341 /* Print out the list of used global functions. */
8342
8343 void
ia64_hpux_asm_file_end(file)8344 ia64_hpux_asm_file_end (file)
8345 FILE *file;
8346 {
8347 while (extern_func_head)
8348 {
8349 const char *real_name;
8350 tree decl;
8351
8352 real_name = (* targetm.strip_name_encoding) (extern_func_head->name);
8353 decl = maybe_get_identifier (real_name);
8354
8355 if (!decl
8356 || (! TREE_ASM_WRITTEN (decl) && TREE_SYMBOL_REFERENCED (decl)))
8357 {
8358 if (decl)
8359 TREE_ASM_WRITTEN (decl) = 1;
8360 (*targetm.asm_out.globalize_label) (file, extern_func_head->name);
8361 fprintf (file, "%s", TYPE_ASM_OP);
8362 assemble_name (file, extern_func_head->name);
8363 putc (',', file);
8364 fprintf (file, TYPE_OPERAND_FMT, "function");
8365 putc ('\n', file);
8366 }
8367 extern_func_head = extern_func_head->next;
8368 }
8369 }
8370
8371
8372 /* Switch to the section to which we should output X. The only thing
8373 special we do here is to honor small data. */
8374
8375 static void
ia64_select_rtx_section(mode,x,align)8376 ia64_select_rtx_section (mode, x, align)
8377 enum machine_mode mode;
8378 rtx x;
8379 unsigned HOST_WIDE_INT align;
8380 {
8381 if (GET_MODE_SIZE (mode) > 0
8382 && GET_MODE_SIZE (mode) <= ia64_section_threshold)
8383 sdata_section ();
8384 else
8385 default_elf_select_rtx_section (mode, x, align);
8386 }
8387
8388 /* It is illegal to have relocations in shared segments on AIX and HPUX.
8389 Pretend flag_pic is always set. */
8390
8391 static void
ia64_rwreloc_select_section(exp,reloc,align)8392 ia64_rwreloc_select_section (exp, reloc, align)
8393 tree exp;
8394 int reloc;
8395 unsigned HOST_WIDE_INT align;
8396 {
8397 default_elf_select_section_1 (exp, reloc, align, true);
8398 }
8399
8400 static void
ia64_rwreloc_unique_section(decl,reloc)8401 ia64_rwreloc_unique_section (decl, reloc)
8402 tree decl;
8403 int reloc;
8404 {
8405 default_unique_section_1 (decl, reloc, true);
8406 }
8407
8408 static void
ia64_rwreloc_select_rtx_section(mode,x,align)8409 ia64_rwreloc_select_rtx_section (mode, x, align)
8410 enum machine_mode mode;
8411 rtx x;
8412 unsigned HOST_WIDE_INT align;
8413 {
8414 int save_pic = flag_pic;
8415 flag_pic = 1;
8416 ia64_select_rtx_section (mode, x, align);
8417 flag_pic = save_pic;
8418 }
8419
8420 static unsigned int
ia64_rwreloc_section_type_flags(decl,name,reloc)8421 ia64_rwreloc_section_type_flags (decl, name, reloc)
8422 tree decl;
8423 const char *name;
8424 int reloc;
8425 {
8426 return default_section_type_flags_1 (decl, name, reloc, true);
8427 }
8428
8429
8430 /* Output the assembler code for a thunk function. THUNK_DECL is the
8431 declaration for the thunk function itself, FUNCTION is the decl for
8432 the target function. DELTA is an immediate constant offset to be
8433 added to THIS. If VCALL_OFFSET is non-zero, the word at
8434 *(*this + vcall_offset) should be added to THIS. */
8435
8436 static void
ia64_output_mi_thunk(file,thunk,delta,vcall_offset,function)8437 ia64_output_mi_thunk (file, thunk, delta, vcall_offset, function)
8438 FILE *file;
8439 tree thunk ATTRIBUTE_UNUSED;
8440 HOST_WIDE_INT delta;
8441 HOST_WIDE_INT vcall_offset;
8442 tree function;
8443 {
8444 rtx this, insn, funexp;
8445
8446 reload_completed = 1;
8447 no_new_pseudos = 1;
8448
8449 /* Set things up as ia64_expand_prologue might. */
8450 last_scratch_gr_reg = 15;
8451
8452 memset (¤t_frame_info, 0, sizeof (current_frame_info));
8453 current_frame_info.spill_cfa_off = -16;
8454 current_frame_info.n_input_regs = 1;
8455 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
8456
8457 if (!TARGET_REG_NAMES)
8458 reg_names[IN_REG (0)] = ia64_reg_numbers[0];
8459
8460 /* Mark the end of the (empty) prologue. */
8461 emit_note (NULL, NOTE_INSN_PROLOGUE_END);
8462
8463 this = gen_rtx_REG (Pmode, IN_REG (0));
8464 if (TARGET_ILP32)
8465 emit_insn (gen_ptr_extend (this,
8466 gen_rtx_REG (ptr_mode, IN_REG (0))));
8467
8468 /* Apply the constant offset, if required. */
8469 if (delta)
8470 {
8471 rtx delta_rtx = GEN_INT (delta);
8472
8473 if (!CONST_OK_FOR_I (delta))
8474 {
8475 rtx tmp = gen_rtx_REG (Pmode, 2);
8476 emit_move_insn (tmp, delta_rtx);
8477 delta_rtx = tmp;
8478 }
8479 emit_insn (gen_adddi3 (this, this, delta_rtx));
8480 }
8481
8482 /* Apply the offset from the vtable, if required. */
8483 if (vcall_offset)
8484 {
8485 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
8486 rtx tmp = gen_rtx_REG (Pmode, 2);
8487
8488 if (TARGET_ILP32)
8489 {
8490 rtx t = gen_rtx_REG (ptr_mode, 2);
8491 emit_move_insn (t, gen_rtx_MEM (ptr_mode, this));
8492 emit_insn (gen_ptr_extend (tmp, t));
8493 }
8494 else
8495 emit_move_insn (tmp, gen_rtx_MEM (Pmode, this));
8496
8497 if (!CONST_OK_FOR_J (vcall_offset))
8498 {
8499 rtx tmp2 = gen_rtx_REG (Pmode, next_scratch_gr_reg ());
8500 emit_move_insn (tmp2, vcall_offset_rtx);
8501 vcall_offset_rtx = tmp2;
8502 }
8503 emit_insn (gen_adddi3 (tmp, tmp, vcall_offset_rtx));
8504
8505 if (TARGET_ILP32)
8506 emit_move_insn (gen_rtx_REG (ptr_mode, 2),
8507 gen_rtx_MEM (ptr_mode, tmp));
8508 else
8509 emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp));
8510
8511 emit_insn (gen_adddi3 (this, this, tmp));
8512 }
8513
8514 /* Generate a tail call to the target function. */
8515 if (! TREE_USED (function))
8516 {
8517 assemble_external (function);
8518 TREE_USED (function) = 1;
8519 }
8520 funexp = XEXP (DECL_RTL (function), 0);
8521 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
8522 ia64_expand_call (NULL_RTX, funexp, NULL_RTX, 1);
8523 insn = get_last_insn ();
8524 SIBLING_CALL_P (insn) = 1;
8525
8526 /* Code generation for calls relies on splitting. */
8527 reload_completed = 1;
8528 try_split (PATTERN (insn), insn, 0);
8529
8530 emit_barrier ();
8531
8532 /* Run just enough of rest_of_compilation to get the insns emitted.
8533 There's not really enough bulk here to make other passes such as
8534 instruction scheduling worth while. Note that use_thunk calls
8535 assemble_start_function and assemble_end_function. */
8536
8537 insn = get_insns ();
8538 emit_all_insn_group_barriers (NULL, insn);
8539 shorten_branches (insn);
8540 final_start_function (insn, file, 1);
8541 final (insn, file, 1, 0);
8542 final_end_function ();
8543
8544 reload_completed = 0;
8545 no_new_pseudos = 0;
8546 }
8547
8548 #include "gt-ia64.h"
8549