1 /* Subroutines used for code generation on the DEC Alpha. 2 Copyright (C) 1992-2013 Free Software Foundation, Inc. 3 Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu) 4 5 This file is part of GCC. 6 7 GCC is free software; you can redistribute it and/or modify 8 it under the terms of the GNU General Public License as published by 9 the Free Software Foundation; either version 3, or (at your option) 10 any later version. 11 12 GCC is distributed in the hope that it will be useful, 13 but WITHOUT ANY WARRANTY; without even the implied warranty of 14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 GNU General Public License for more details. 16 17 You should have received a copy of the GNU General Public License 18 along with GCC; see the file COPYING3. If not see 19 <http://www.gnu.org/licenses/>. */ 20 21 22 #include "config.h" 23 #include "system.h" 24 #include "coretypes.h" 25 #include "tm.h" 26 #include "rtl.h" 27 #include "tree.h" 28 #include "regs.h" 29 #include "hard-reg-set.h" 30 #include "insn-config.h" 31 #include "conditions.h" 32 #include "output.h" 33 #include "insn-attr.h" 34 #include "flags.h" 35 #include "recog.h" 36 #include "expr.h" 37 #include "optabs.h" 38 #include "reload.h" 39 #include "obstack.h" 40 #include "except.h" 41 #include "function.h" 42 #include "diagnostic-core.h" 43 #include "ggc.h" 44 #include "tm_p.h" 45 #include "target.h" 46 #include "target-def.h" 47 #include "common/common-target.h" 48 #include "debug.h" 49 #include "langhooks.h" 50 #include "splay-tree.h" 51 #include "gimple.h" 52 #include "tree-flow.h" 53 #include "tree-stdarg.h" 54 #include "tm-constrs.h" 55 #include "df.h" 56 #include "libfuncs.h" 57 #include "opts.h" 58 #include "params.h" 59 60 /* Specify which cpu to schedule for. */ 61 enum processor_type alpha_tune; 62 63 /* Which cpu we're generating code for. */ 64 enum processor_type alpha_cpu; 65 66 static const char * const alpha_cpu_name[] = 67 { 68 "ev4", "ev5", "ev6" 69 }; 70 71 /* Specify how accurate floating-point traps need to be. */ 72 73 enum alpha_trap_precision alpha_tp; 74 75 /* Specify the floating-point rounding mode. */ 76 77 enum alpha_fp_rounding_mode alpha_fprm; 78 79 /* Specify which things cause traps. */ 80 81 enum alpha_fp_trap_mode alpha_fptm; 82 83 /* Nonzero if inside of a function, because the Alpha asm can't 84 handle .files inside of functions. */ 85 86 static int inside_function = FALSE; 87 88 /* The number of cycles of latency we should assume on memory reads. */ 89 90 int alpha_memory_latency = 3; 91 92 /* Whether the function needs the GP. */ 93 94 static int alpha_function_needs_gp; 95 96 /* The assembler name of the current function. */ 97 98 static const char *alpha_fnname; 99 100 /* The next explicit relocation sequence number. */ 101 extern GTY(()) int alpha_next_sequence_number; 102 int alpha_next_sequence_number = 1; 103 104 /* The literal and gpdisp sequence numbers for this insn, as printed 105 by %# and %* respectively. */ 106 extern GTY(()) int alpha_this_literal_sequence_number; 107 extern GTY(()) int alpha_this_gpdisp_sequence_number; 108 int alpha_this_literal_sequence_number; 109 int alpha_this_gpdisp_sequence_number; 110 111 /* Costs of various operations on the different architectures. */ 112 113 struct alpha_rtx_cost_data 114 { 115 unsigned char fp_add; 116 unsigned char fp_mult; 117 unsigned char fp_div_sf; 118 unsigned char fp_div_df; 119 unsigned char int_mult_si; 120 unsigned char int_mult_di; 121 unsigned char int_shift; 122 unsigned char int_cmov; 123 unsigned short int_div; 124 }; 125 126 static struct alpha_rtx_cost_data const alpha_rtx_cost_data[PROCESSOR_MAX] = 127 { 128 { /* EV4 */ 129 COSTS_N_INSNS (6), /* fp_add */ 130 COSTS_N_INSNS (6), /* fp_mult */ 131 COSTS_N_INSNS (34), /* fp_div_sf */ 132 COSTS_N_INSNS (63), /* fp_div_df */ 133 COSTS_N_INSNS (23), /* int_mult_si */ 134 COSTS_N_INSNS (23), /* int_mult_di */ 135 COSTS_N_INSNS (2), /* int_shift */ 136 COSTS_N_INSNS (2), /* int_cmov */ 137 COSTS_N_INSNS (97), /* int_div */ 138 }, 139 { /* EV5 */ 140 COSTS_N_INSNS (4), /* fp_add */ 141 COSTS_N_INSNS (4), /* fp_mult */ 142 COSTS_N_INSNS (15), /* fp_div_sf */ 143 COSTS_N_INSNS (22), /* fp_div_df */ 144 COSTS_N_INSNS (8), /* int_mult_si */ 145 COSTS_N_INSNS (12), /* int_mult_di */ 146 COSTS_N_INSNS (1) + 1, /* int_shift */ 147 COSTS_N_INSNS (1), /* int_cmov */ 148 COSTS_N_INSNS (83), /* int_div */ 149 }, 150 { /* EV6 */ 151 COSTS_N_INSNS (4), /* fp_add */ 152 COSTS_N_INSNS (4), /* fp_mult */ 153 COSTS_N_INSNS (12), /* fp_div_sf */ 154 COSTS_N_INSNS (15), /* fp_div_df */ 155 COSTS_N_INSNS (7), /* int_mult_si */ 156 COSTS_N_INSNS (7), /* int_mult_di */ 157 COSTS_N_INSNS (1), /* int_shift */ 158 COSTS_N_INSNS (2), /* int_cmov */ 159 COSTS_N_INSNS (86), /* int_div */ 160 }, 161 }; 162 163 /* Similar but tuned for code size instead of execution latency. The 164 extra +N is fractional cost tuning based on latency. It's used to 165 encourage use of cheaper insns like shift, but only if there's just 166 one of them. */ 167 168 static struct alpha_rtx_cost_data const alpha_rtx_cost_size = 169 { 170 COSTS_N_INSNS (1), /* fp_add */ 171 COSTS_N_INSNS (1), /* fp_mult */ 172 COSTS_N_INSNS (1), /* fp_div_sf */ 173 COSTS_N_INSNS (1) + 1, /* fp_div_df */ 174 COSTS_N_INSNS (1) + 1, /* int_mult_si */ 175 COSTS_N_INSNS (1) + 2, /* int_mult_di */ 176 COSTS_N_INSNS (1), /* int_shift */ 177 COSTS_N_INSNS (1), /* int_cmov */ 178 COSTS_N_INSNS (6), /* int_div */ 179 }; 180 181 /* Get the number of args of a function in one of two ways. */ 182 #if TARGET_ABI_OPEN_VMS 183 #define NUM_ARGS crtl->args.info.num_args 184 #else 185 #define NUM_ARGS crtl->args.info 186 #endif 187 188 #define REG_PV 27 189 #define REG_RA 26 190 191 /* Declarations of static functions. */ 192 static struct machine_function *alpha_init_machine_status (void); 193 static rtx alpha_emit_xfloating_compare (enum rtx_code *, rtx, rtx); 194 195 #if TARGET_ABI_OPEN_VMS 196 static void alpha_write_linkage (FILE *, const char *); 197 static bool vms_valid_pointer_mode (enum machine_mode); 198 #else 199 #define vms_patch_builtins() gcc_unreachable() 200 #endif 201 202 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING 203 /* Implement TARGET_MANGLE_TYPE. */ 204 205 static const char * 206 alpha_mangle_type (const_tree type) 207 { 208 if (TYPE_MAIN_VARIANT (type) == long_double_type_node 209 && TARGET_LONG_DOUBLE_128) 210 return "g"; 211 212 /* For all other types, use normal C++ mangling. */ 213 return NULL; 214 } 215 #endif 216 217 /* Parse target option strings. */ 218 219 static void 220 alpha_option_override (void) 221 { 222 static const struct cpu_table { 223 const char *const name; 224 const enum processor_type processor; 225 const int flags; 226 const unsigned short line_size; /* in bytes */ 227 const unsigned short l1_size; /* in kb. */ 228 const unsigned short l2_size; /* in kb. */ 229 } cpu_table[] = { 230 /* EV4/LCA45 had 8k L1 caches; EV45 had 16k L1 caches. 231 EV4/EV45 had 128k to 16M 32-byte direct Bcache. LCA45 232 had 64k to 8M 8-byte direct Bcache. */ 233 { "ev4", PROCESSOR_EV4, 0, 32, 8, 8*1024 }, 234 { "21064", PROCESSOR_EV4, 0, 32, 8, 8*1024 }, 235 { "ev45", PROCESSOR_EV4, 0, 32, 16, 16*1024 }, 236 237 /* EV5 or EV56 had 8k 32 byte L1, 96k 32 or 64 byte L2, 238 and 1M to 16M 64 byte L3 (not modeled). 239 PCA56 had 16k 64-byte cache; PCA57 had 32k Icache. 240 PCA56 had 8k 64-byte cache; PCA57 had 16k Dcache. */ 241 { "ev5", PROCESSOR_EV5, 0, 32, 8, 96 }, 242 { "21164", PROCESSOR_EV5, 0, 32, 8, 96 }, 243 { "ev56", PROCESSOR_EV5, MASK_BWX, 32, 8, 96 }, 244 { "21164a", PROCESSOR_EV5, MASK_BWX, 32, 8, 96 }, 245 { "pca56", PROCESSOR_EV5, MASK_BWX|MASK_MAX, 64, 16, 4*1024 }, 246 { "21164PC",PROCESSOR_EV5, MASK_BWX|MASK_MAX, 64, 16, 4*1024 }, 247 { "21164pc",PROCESSOR_EV5, MASK_BWX|MASK_MAX, 64, 16, 4*1024 }, 248 249 /* EV6 had 64k 64 byte L1, 1M to 16M Bcache. */ 250 { "ev6", PROCESSOR_EV6, MASK_BWX|MASK_MAX|MASK_FIX, 64, 64, 16*1024 }, 251 { "21264", PROCESSOR_EV6, MASK_BWX|MASK_MAX|MASK_FIX, 64, 64, 16*1024 }, 252 { "ev67", PROCESSOR_EV6, MASK_BWX|MASK_MAX|MASK_FIX|MASK_CIX, 253 64, 64, 16*1024 }, 254 { "21264a", PROCESSOR_EV6, MASK_BWX|MASK_MAX|MASK_FIX|MASK_CIX, 255 64, 64, 16*1024 } 256 }; 257 258 int const ct_size = ARRAY_SIZE (cpu_table); 259 int line_size = 0, l1_size = 0, l2_size = 0; 260 int i; 261 262 #ifdef SUBTARGET_OVERRIDE_OPTIONS 263 SUBTARGET_OVERRIDE_OPTIONS; 264 #endif 265 266 /* Default to full IEEE compliance mode for Go language. */ 267 if (strcmp (lang_hooks.name, "GNU Go") == 0 268 && !(target_flags_explicit & MASK_IEEE)) 269 target_flags |= MASK_IEEE; 270 271 alpha_fprm = ALPHA_FPRM_NORM; 272 alpha_tp = ALPHA_TP_PROG; 273 alpha_fptm = ALPHA_FPTM_N; 274 275 if (TARGET_IEEE) 276 { 277 alpha_tp = ALPHA_TP_INSN; 278 alpha_fptm = ALPHA_FPTM_SU; 279 } 280 if (TARGET_IEEE_WITH_INEXACT) 281 { 282 alpha_tp = ALPHA_TP_INSN; 283 alpha_fptm = ALPHA_FPTM_SUI; 284 } 285 286 if (alpha_tp_string) 287 { 288 if (! strcmp (alpha_tp_string, "p")) 289 alpha_tp = ALPHA_TP_PROG; 290 else if (! strcmp (alpha_tp_string, "f")) 291 alpha_tp = ALPHA_TP_FUNC; 292 else if (! strcmp (alpha_tp_string, "i")) 293 alpha_tp = ALPHA_TP_INSN; 294 else 295 error ("bad value %qs for -mtrap-precision switch", alpha_tp_string); 296 } 297 298 if (alpha_fprm_string) 299 { 300 if (! strcmp (alpha_fprm_string, "n")) 301 alpha_fprm = ALPHA_FPRM_NORM; 302 else if (! strcmp (alpha_fprm_string, "m")) 303 alpha_fprm = ALPHA_FPRM_MINF; 304 else if (! strcmp (alpha_fprm_string, "c")) 305 alpha_fprm = ALPHA_FPRM_CHOP; 306 else if (! strcmp (alpha_fprm_string,"d")) 307 alpha_fprm = ALPHA_FPRM_DYN; 308 else 309 error ("bad value %qs for -mfp-rounding-mode switch", 310 alpha_fprm_string); 311 } 312 313 if (alpha_fptm_string) 314 { 315 if (strcmp (alpha_fptm_string, "n") == 0) 316 alpha_fptm = ALPHA_FPTM_N; 317 else if (strcmp (alpha_fptm_string, "u") == 0) 318 alpha_fptm = ALPHA_FPTM_U; 319 else if (strcmp (alpha_fptm_string, "su") == 0) 320 alpha_fptm = ALPHA_FPTM_SU; 321 else if (strcmp (alpha_fptm_string, "sui") == 0) 322 alpha_fptm = ALPHA_FPTM_SUI; 323 else 324 error ("bad value %qs for -mfp-trap-mode switch", alpha_fptm_string); 325 } 326 327 if (alpha_cpu_string) 328 { 329 for (i = 0; i < ct_size; i++) 330 if (! strcmp (alpha_cpu_string, cpu_table [i].name)) 331 { 332 alpha_tune = alpha_cpu = cpu_table[i].processor; 333 line_size = cpu_table[i].line_size; 334 l1_size = cpu_table[i].l1_size; 335 l2_size = cpu_table[i].l2_size; 336 target_flags &= ~ (MASK_BWX | MASK_MAX | MASK_FIX | MASK_CIX); 337 target_flags |= cpu_table[i].flags; 338 break; 339 } 340 if (i == ct_size) 341 error ("bad value %qs for -mcpu switch", alpha_cpu_string); 342 } 343 344 if (alpha_tune_string) 345 { 346 for (i = 0; i < ct_size; i++) 347 if (! strcmp (alpha_tune_string, cpu_table [i].name)) 348 { 349 alpha_tune = cpu_table[i].processor; 350 line_size = cpu_table[i].line_size; 351 l1_size = cpu_table[i].l1_size; 352 l2_size = cpu_table[i].l2_size; 353 break; 354 } 355 if (i == ct_size) 356 error ("bad value %qs for -mtune switch", alpha_tune_string); 357 } 358 359 if (line_size) 360 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE, line_size, 361 global_options.x_param_values, 362 global_options_set.x_param_values); 363 if (l1_size) 364 maybe_set_param_value (PARAM_L1_CACHE_SIZE, l1_size, 365 global_options.x_param_values, 366 global_options_set.x_param_values); 367 if (l2_size) 368 maybe_set_param_value (PARAM_L2_CACHE_SIZE, l2_size, 369 global_options.x_param_values, 370 global_options_set.x_param_values); 371 372 /* Do some sanity checks on the above options. */ 373 374 if ((alpha_fptm == ALPHA_FPTM_SU || alpha_fptm == ALPHA_FPTM_SUI) 375 && alpha_tp != ALPHA_TP_INSN && alpha_cpu != PROCESSOR_EV6) 376 { 377 warning (0, "fp software completion requires -mtrap-precision=i"); 378 alpha_tp = ALPHA_TP_INSN; 379 } 380 381 if (alpha_cpu == PROCESSOR_EV6) 382 { 383 /* Except for EV6 pass 1 (not released), we always have precise 384 arithmetic traps. Which means we can do software completion 385 without minding trap shadows. */ 386 alpha_tp = ALPHA_TP_PROG; 387 } 388 389 if (TARGET_FLOAT_VAX) 390 { 391 if (alpha_fprm == ALPHA_FPRM_MINF || alpha_fprm == ALPHA_FPRM_DYN) 392 { 393 warning (0, "rounding mode not supported for VAX floats"); 394 alpha_fprm = ALPHA_FPRM_NORM; 395 } 396 if (alpha_fptm == ALPHA_FPTM_SUI) 397 { 398 warning (0, "trap mode not supported for VAX floats"); 399 alpha_fptm = ALPHA_FPTM_SU; 400 } 401 if (target_flags_explicit & MASK_LONG_DOUBLE_128) 402 warning (0, "128-bit long double not supported for VAX floats"); 403 target_flags &= ~MASK_LONG_DOUBLE_128; 404 } 405 406 { 407 char *end; 408 int lat; 409 410 if (!alpha_mlat_string) 411 alpha_mlat_string = "L1"; 412 413 if (ISDIGIT ((unsigned char)alpha_mlat_string[0]) 414 && (lat = strtol (alpha_mlat_string, &end, 10), *end == '\0')) 415 ; 416 else if ((alpha_mlat_string[0] == 'L' || alpha_mlat_string[0] == 'l') 417 && ISDIGIT ((unsigned char)alpha_mlat_string[1]) 418 && alpha_mlat_string[2] == '\0') 419 { 420 static int const cache_latency[][4] = 421 { 422 { 3, 30, -1 }, /* ev4 -- Bcache is a guess */ 423 { 2, 12, 38 }, /* ev5 -- Bcache from PC164 LMbench numbers */ 424 { 3, 12, 30 }, /* ev6 -- Bcache from DS20 LMbench. */ 425 }; 426 427 lat = alpha_mlat_string[1] - '0'; 428 if (lat <= 0 || lat > 3 || cache_latency[alpha_tune][lat-1] == -1) 429 { 430 warning (0, "L%d cache latency unknown for %s", 431 lat, alpha_cpu_name[alpha_tune]); 432 lat = 3; 433 } 434 else 435 lat = cache_latency[alpha_tune][lat-1]; 436 } 437 else if (! strcmp (alpha_mlat_string, "main")) 438 { 439 /* Most current memories have about 370ns latency. This is 440 a reasonable guess for a fast cpu. */ 441 lat = 150; 442 } 443 else 444 { 445 warning (0, "bad value %qs for -mmemory-latency", alpha_mlat_string); 446 lat = 3; 447 } 448 449 alpha_memory_latency = lat; 450 } 451 452 /* Default the definition of "small data" to 8 bytes. */ 453 if (!global_options_set.x_g_switch_value) 454 g_switch_value = 8; 455 456 /* Infer TARGET_SMALL_DATA from -fpic/-fPIC. */ 457 if (flag_pic == 1) 458 target_flags |= MASK_SMALL_DATA; 459 else if (flag_pic == 2) 460 target_flags &= ~MASK_SMALL_DATA; 461 462 /* Align labels and loops for optimal branching. */ 463 /* ??? Kludge these by not doing anything if we don't optimize. */ 464 if (optimize > 0) 465 { 466 if (align_loops <= 0) 467 align_loops = 16; 468 if (align_jumps <= 0) 469 align_jumps = 16; 470 } 471 if (align_functions <= 0) 472 align_functions = 16; 473 474 /* Register variables and functions with the garbage collector. */ 475 476 /* Set up function hooks. */ 477 init_machine_status = alpha_init_machine_status; 478 479 /* Tell the compiler when we're using VAX floating point. */ 480 if (TARGET_FLOAT_VAX) 481 { 482 REAL_MODE_FORMAT (SFmode) = &vax_f_format; 483 REAL_MODE_FORMAT (DFmode) = &vax_g_format; 484 REAL_MODE_FORMAT (TFmode) = NULL; 485 } 486 487 #ifdef TARGET_DEFAULT_LONG_DOUBLE_128 488 if (!(target_flags_explicit & MASK_LONG_DOUBLE_128)) 489 target_flags |= MASK_LONG_DOUBLE_128; 490 #endif 491 } 492 493 /* Returns 1 if VALUE is a mask that contains full bytes of zero or ones. */ 494 495 int 496 zap_mask (HOST_WIDE_INT value) 497 { 498 int i; 499 500 for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR; 501 i++, value >>= 8) 502 if ((value & 0xff) != 0 && (value & 0xff) != 0xff) 503 return 0; 504 505 return 1; 506 } 507 508 /* Return true if OP is valid for a particular TLS relocation. 509 We are already guaranteed that OP is a CONST. */ 510 511 int 512 tls_symbolic_operand_1 (rtx op, int size, int unspec) 513 { 514 op = XEXP (op, 0); 515 516 if (GET_CODE (op) != UNSPEC || XINT (op, 1) != unspec) 517 return 0; 518 op = XVECEXP (op, 0, 0); 519 520 if (GET_CODE (op) != SYMBOL_REF) 521 return 0; 522 523 switch (SYMBOL_REF_TLS_MODEL (op)) 524 { 525 case TLS_MODEL_LOCAL_DYNAMIC: 526 return unspec == UNSPEC_DTPREL && size == alpha_tls_size; 527 case TLS_MODEL_INITIAL_EXEC: 528 return unspec == UNSPEC_TPREL && size == 64; 529 case TLS_MODEL_LOCAL_EXEC: 530 return unspec == UNSPEC_TPREL && size == alpha_tls_size; 531 default: 532 gcc_unreachable (); 533 } 534 } 535 536 /* Used by aligned_memory_operand and unaligned_memory_operand to 537 resolve what reload is going to do with OP if it's a register. */ 538 539 rtx 540 resolve_reload_operand (rtx op) 541 { 542 if (reload_in_progress) 543 { 544 rtx tmp = op; 545 if (GET_CODE (tmp) == SUBREG) 546 tmp = SUBREG_REG (tmp); 547 if (REG_P (tmp) 548 && REGNO (tmp) >= FIRST_PSEUDO_REGISTER) 549 { 550 op = reg_equiv_memory_loc (REGNO (tmp)); 551 if (op == 0) 552 return 0; 553 } 554 } 555 return op; 556 } 557 558 /* The scalar modes supported differs from the default check-what-c-supports 559 version in that sometimes TFmode is available even when long double 560 indicates only DFmode. */ 561 562 static bool 563 alpha_scalar_mode_supported_p (enum machine_mode mode) 564 { 565 switch (mode) 566 { 567 case QImode: 568 case HImode: 569 case SImode: 570 case DImode: 571 case TImode: /* via optabs.c */ 572 return true; 573 574 case SFmode: 575 case DFmode: 576 return true; 577 578 case TFmode: 579 return TARGET_HAS_XFLOATING_LIBS; 580 581 default: 582 return false; 583 } 584 } 585 586 /* Alpha implements a couple of integer vector mode operations when 587 TARGET_MAX is enabled. We do not check TARGET_MAX here, however, 588 which allows the vectorizer to operate on e.g. move instructions, 589 or when expand_vector_operations can do something useful. */ 590 591 static bool 592 alpha_vector_mode_supported_p (enum machine_mode mode) 593 { 594 return mode == V8QImode || mode == V4HImode || mode == V2SImode; 595 } 596 597 /* Return 1 if this function can directly return via $26. */ 598 599 int 600 direct_return (void) 601 { 602 return (TARGET_ABI_OSF 603 && reload_completed 604 && alpha_sa_size () == 0 605 && get_frame_size () == 0 606 && crtl->outgoing_args_size == 0 607 && crtl->args.pretend_args_size == 0); 608 } 609 610 /* Return the TLS model to use for SYMBOL. */ 611 612 static enum tls_model 613 tls_symbolic_operand_type (rtx symbol) 614 { 615 enum tls_model model; 616 617 if (GET_CODE (symbol) != SYMBOL_REF) 618 return TLS_MODEL_NONE; 619 model = SYMBOL_REF_TLS_MODEL (symbol); 620 621 /* Local-exec with a 64-bit size is the same code as initial-exec. */ 622 if (model == TLS_MODEL_LOCAL_EXEC && alpha_tls_size == 64) 623 model = TLS_MODEL_INITIAL_EXEC; 624 625 return model; 626 } 627 628 /* Return true if the function DECL will share the same GP as any 629 function in the current unit of translation. */ 630 631 static bool 632 decl_has_samegp (const_tree decl) 633 { 634 /* Functions that are not local can be overridden, and thus may 635 not share the same gp. */ 636 if (!(*targetm.binds_local_p) (decl)) 637 return false; 638 639 /* If -msmall-data is in effect, assume that there is only one GP 640 for the module, and so any local symbol has this property. We 641 need explicit relocations to be able to enforce this for symbols 642 not defined in this unit of translation, however. */ 643 if (TARGET_EXPLICIT_RELOCS && TARGET_SMALL_DATA) 644 return true; 645 646 /* Functions that are not external are defined in this UoT. */ 647 /* ??? Irritatingly, static functions not yet emitted are still 648 marked "external". Apply this to non-static functions only. */ 649 return !TREE_PUBLIC (decl) || !DECL_EXTERNAL (decl); 650 } 651 652 /* Return true if EXP should be placed in the small data section. */ 653 654 static bool 655 alpha_in_small_data_p (const_tree exp) 656 { 657 /* We want to merge strings, so we never consider them small data. */ 658 if (TREE_CODE (exp) == STRING_CST) 659 return false; 660 661 /* Functions are never in the small data area. Duh. */ 662 if (TREE_CODE (exp) == FUNCTION_DECL) 663 return false; 664 665 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp)) 666 { 667 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp)); 668 if (strcmp (section, ".sdata") == 0 669 || strcmp (section, ".sbss") == 0) 670 return true; 671 } 672 else 673 { 674 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp)); 675 676 /* If this is an incomplete type with size 0, then we can't put it 677 in sdata because it might be too big when completed. */ 678 if (size > 0 && size <= g_switch_value) 679 return true; 680 } 681 682 return false; 683 } 684 685 #if TARGET_ABI_OPEN_VMS 686 static bool 687 vms_valid_pointer_mode (enum machine_mode mode) 688 { 689 return (mode == SImode || mode == DImode); 690 } 691 692 static bool 693 alpha_linkage_symbol_p (const char *symname) 694 { 695 int symlen = strlen (symname); 696 697 if (symlen > 4) 698 return strcmp (&symname [symlen - 4], "..lk") == 0; 699 700 return false; 701 } 702 703 #define LINKAGE_SYMBOL_REF_P(X) \ 704 ((GET_CODE (X) == SYMBOL_REF \ 705 && alpha_linkage_symbol_p (XSTR (X, 0))) \ 706 || (GET_CODE (X) == CONST \ 707 && GET_CODE (XEXP (X, 0)) == PLUS \ 708 && GET_CODE (XEXP (XEXP (X, 0), 0)) == SYMBOL_REF \ 709 && alpha_linkage_symbol_p (XSTR (XEXP (XEXP (X, 0), 0), 0)))) 710 #endif 711 712 /* legitimate_address_p recognizes an RTL expression that is a valid 713 memory address for an instruction. The MODE argument is the 714 machine mode for the MEM expression that wants to use this address. 715 716 For Alpha, we have either a constant address or the sum of a 717 register and a constant address, or just a register. For DImode, 718 any of those forms can be surrounded with an AND that clear the 719 low-order three bits; this is an "unaligned" access. */ 720 721 static bool 722 alpha_legitimate_address_p (enum machine_mode mode, rtx x, bool strict) 723 { 724 /* If this is an ldq_u type address, discard the outer AND. */ 725 if (mode == DImode 726 && GET_CODE (x) == AND 727 && CONST_INT_P (XEXP (x, 1)) 728 && INTVAL (XEXP (x, 1)) == -8) 729 x = XEXP (x, 0); 730 731 /* Discard non-paradoxical subregs. */ 732 if (GET_CODE (x) == SUBREG 733 && (GET_MODE_SIZE (GET_MODE (x)) 734 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (x))))) 735 x = SUBREG_REG (x); 736 737 /* Unadorned general registers are valid. */ 738 if (REG_P (x) 739 && (strict 740 ? STRICT_REG_OK_FOR_BASE_P (x) 741 : NONSTRICT_REG_OK_FOR_BASE_P (x))) 742 return true; 743 744 /* Constant addresses (i.e. +/- 32k) are valid. */ 745 if (CONSTANT_ADDRESS_P (x)) 746 return true; 747 748 #if TARGET_ABI_OPEN_VMS 749 if (LINKAGE_SYMBOL_REF_P (x)) 750 return true; 751 #endif 752 753 /* Register plus a small constant offset is valid. */ 754 if (GET_CODE (x) == PLUS) 755 { 756 rtx ofs = XEXP (x, 1); 757 x = XEXP (x, 0); 758 759 /* Discard non-paradoxical subregs. */ 760 if (GET_CODE (x) == SUBREG 761 && (GET_MODE_SIZE (GET_MODE (x)) 762 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (x))))) 763 x = SUBREG_REG (x); 764 765 if (REG_P (x)) 766 { 767 if (! strict 768 && NONSTRICT_REG_OK_FP_BASE_P (x) 769 && CONST_INT_P (ofs)) 770 return true; 771 if ((strict 772 ? STRICT_REG_OK_FOR_BASE_P (x) 773 : NONSTRICT_REG_OK_FOR_BASE_P (x)) 774 && CONSTANT_ADDRESS_P (ofs)) 775 return true; 776 } 777 } 778 779 /* If we're managing explicit relocations, LO_SUM is valid, as are small 780 data symbols. Avoid explicit relocations of modes larger than word 781 mode since i.e. $LC0+8($1) can fold around +/- 32k offset. */ 782 else if (TARGET_EXPLICIT_RELOCS 783 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD) 784 { 785 if (small_symbolic_operand (x, Pmode)) 786 return true; 787 788 if (GET_CODE (x) == LO_SUM) 789 { 790 rtx ofs = XEXP (x, 1); 791 x = XEXP (x, 0); 792 793 /* Discard non-paradoxical subregs. */ 794 if (GET_CODE (x) == SUBREG 795 && (GET_MODE_SIZE (GET_MODE (x)) 796 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (x))))) 797 x = SUBREG_REG (x); 798 799 /* Must have a valid base register. */ 800 if (! (REG_P (x) 801 && (strict 802 ? STRICT_REG_OK_FOR_BASE_P (x) 803 : NONSTRICT_REG_OK_FOR_BASE_P (x)))) 804 return false; 805 806 /* The symbol must be local. */ 807 if (local_symbolic_operand (ofs, Pmode) 808 || dtp32_symbolic_operand (ofs, Pmode) 809 || tp32_symbolic_operand (ofs, Pmode)) 810 return true; 811 } 812 } 813 814 return false; 815 } 816 817 /* Build the SYMBOL_REF for __tls_get_addr. */ 818 819 static GTY(()) rtx tls_get_addr_libfunc; 820 821 static rtx 822 get_tls_get_addr (void) 823 { 824 if (!tls_get_addr_libfunc) 825 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr"); 826 return tls_get_addr_libfunc; 827 } 828 829 /* Try machine-dependent ways of modifying an illegitimate address 830 to be legitimate. If we find one, return the new, valid address. */ 831 832 static rtx 833 alpha_legitimize_address_1 (rtx x, rtx scratch, enum machine_mode mode) 834 { 835 HOST_WIDE_INT addend; 836 837 /* If the address is (plus reg const_int) and the CONST_INT is not a 838 valid offset, compute the high part of the constant and add it to 839 the register. Then our address is (plus temp low-part-const). */ 840 if (GET_CODE (x) == PLUS 841 && REG_P (XEXP (x, 0)) 842 && CONST_INT_P (XEXP (x, 1)) 843 && ! CONSTANT_ADDRESS_P (XEXP (x, 1))) 844 { 845 addend = INTVAL (XEXP (x, 1)); 846 x = XEXP (x, 0); 847 goto split_addend; 848 } 849 850 /* If the address is (const (plus FOO const_int)), find the low-order 851 part of the CONST_INT. Then load FOO plus any high-order part of the 852 CONST_INT into a register. Our address is (plus reg low-part-const). 853 This is done to reduce the number of GOT entries. */ 854 if (can_create_pseudo_p () 855 && GET_CODE (x) == CONST 856 && GET_CODE (XEXP (x, 0)) == PLUS 857 && CONST_INT_P (XEXP (XEXP (x, 0), 1))) 858 { 859 addend = INTVAL (XEXP (XEXP (x, 0), 1)); 860 x = force_reg (Pmode, XEXP (XEXP (x, 0), 0)); 861 goto split_addend; 862 } 863 864 /* If we have a (plus reg const), emit the load as in (2), then add 865 the two registers, and finally generate (plus reg low-part-const) as 866 our address. */ 867 if (can_create_pseudo_p () 868 && GET_CODE (x) == PLUS 869 && REG_P (XEXP (x, 0)) 870 && GET_CODE (XEXP (x, 1)) == CONST 871 && GET_CODE (XEXP (XEXP (x, 1), 0)) == PLUS 872 && CONST_INT_P (XEXP (XEXP (XEXP (x, 1), 0), 1))) 873 { 874 addend = INTVAL (XEXP (XEXP (XEXP (x, 1), 0), 1)); 875 x = expand_simple_binop (Pmode, PLUS, XEXP (x, 0), 876 XEXP (XEXP (XEXP (x, 1), 0), 0), 877 NULL_RTX, 1, OPTAB_LIB_WIDEN); 878 goto split_addend; 879 } 880 881 /* If this is a local symbol, split the address into HIGH/LO_SUM parts. 882 Avoid modes larger than word mode since i.e. $LC0+8($1) can fold 883 around +/- 32k offset. */ 884 if (TARGET_EXPLICIT_RELOCS 885 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD 886 && symbolic_operand (x, Pmode)) 887 { 888 rtx r0, r16, eqv, tga, tp, insn, dest, seq; 889 890 switch (tls_symbolic_operand_type (x)) 891 { 892 case TLS_MODEL_NONE: 893 break; 894 895 case TLS_MODEL_GLOBAL_DYNAMIC: 896 start_sequence (); 897 898 r0 = gen_rtx_REG (Pmode, 0); 899 r16 = gen_rtx_REG (Pmode, 16); 900 tga = get_tls_get_addr (); 901 dest = gen_reg_rtx (Pmode); 902 seq = GEN_INT (alpha_next_sequence_number++); 903 904 emit_insn (gen_movdi_er_tlsgd (r16, pic_offset_table_rtx, x, seq)); 905 insn = gen_call_value_osf_tlsgd (r0, tga, seq); 906 insn = emit_call_insn (insn); 907 RTL_CONST_CALL_P (insn) = 1; 908 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), r16); 909 910 insn = get_insns (); 911 end_sequence (); 912 913 emit_libcall_block (insn, dest, r0, x); 914 return dest; 915 916 case TLS_MODEL_LOCAL_DYNAMIC: 917 start_sequence (); 918 919 r0 = gen_rtx_REG (Pmode, 0); 920 r16 = gen_rtx_REG (Pmode, 16); 921 tga = get_tls_get_addr (); 922 scratch = gen_reg_rtx (Pmode); 923 seq = GEN_INT (alpha_next_sequence_number++); 924 925 emit_insn (gen_movdi_er_tlsldm (r16, pic_offset_table_rtx, seq)); 926 insn = gen_call_value_osf_tlsldm (r0, tga, seq); 927 insn = emit_call_insn (insn); 928 RTL_CONST_CALL_P (insn) = 1; 929 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), r16); 930 931 insn = get_insns (); 932 end_sequence (); 933 934 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), 935 UNSPEC_TLSLDM_CALL); 936 emit_libcall_block (insn, scratch, r0, eqv); 937 938 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPREL); 939 eqv = gen_rtx_CONST (Pmode, eqv); 940 941 if (alpha_tls_size == 64) 942 { 943 dest = gen_reg_rtx (Pmode); 944 emit_insn (gen_rtx_SET (VOIDmode, dest, eqv)); 945 emit_insn (gen_adddi3 (dest, dest, scratch)); 946 return dest; 947 } 948 if (alpha_tls_size == 32) 949 { 950 insn = gen_rtx_HIGH (Pmode, eqv); 951 insn = gen_rtx_PLUS (Pmode, scratch, insn); 952 scratch = gen_reg_rtx (Pmode); 953 emit_insn (gen_rtx_SET (VOIDmode, scratch, insn)); 954 } 955 return gen_rtx_LO_SUM (Pmode, scratch, eqv); 956 957 case TLS_MODEL_INITIAL_EXEC: 958 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_TPREL); 959 eqv = gen_rtx_CONST (Pmode, eqv); 960 tp = gen_reg_rtx (Pmode); 961 scratch = gen_reg_rtx (Pmode); 962 dest = gen_reg_rtx (Pmode); 963 964 emit_insn (gen_get_thread_pointerdi (tp)); 965 emit_insn (gen_rtx_SET (VOIDmode, scratch, eqv)); 966 emit_insn (gen_adddi3 (dest, tp, scratch)); 967 return dest; 968 969 case TLS_MODEL_LOCAL_EXEC: 970 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_TPREL); 971 eqv = gen_rtx_CONST (Pmode, eqv); 972 tp = gen_reg_rtx (Pmode); 973 974 emit_insn (gen_get_thread_pointerdi (tp)); 975 if (alpha_tls_size == 32) 976 { 977 insn = gen_rtx_HIGH (Pmode, eqv); 978 insn = gen_rtx_PLUS (Pmode, tp, insn); 979 tp = gen_reg_rtx (Pmode); 980 emit_insn (gen_rtx_SET (VOIDmode, tp, insn)); 981 } 982 return gen_rtx_LO_SUM (Pmode, tp, eqv); 983 984 default: 985 gcc_unreachable (); 986 } 987 988 if (local_symbolic_operand (x, Pmode)) 989 { 990 if (small_symbolic_operand (x, Pmode)) 991 return x; 992 else 993 { 994 if (can_create_pseudo_p ()) 995 scratch = gen_reg_rtx (Pmode); 996 emit_insn (gen_rtx_SET (VOIDmode, scratch, 997 gen_rtx_HIGH (Pmode, x))); 998 return gen_rtx_LO_SUM (Pmode, scratch, x); 999 } 1000 } 1001 } 1002 1003 return NULL; 1004 1005 split_addend: 1006 { 1007 HOST_WIDE_INT low, high; 1008 1009 low = ((addend & 0xffff) ^ 0x8000) - 0x8000; 1010 addend -= low; 1011 high = ((addend & 0xffffffff) ^ 0x80000000) - 0x80000000; 1012 addend -= high; 1013 1014 if (addend) 1015 x = expand_simple_binop (Pmode, PLUS, x, GEN_INT (addend), 1016 (!can_create_pseudo_p () ? scratch : NULL_RTX), 1017 1, OPTAB_LIB_WIDEN); 1018 if (high) 1019 x = expand_simple_binop (Pmode, PLUS, x, GEN_INT (high), 1020 (!can_create_pseudo_p () ? scratch : NULL_RTX), 1021 1, OPTAB_LIB_WIDEN); 1022 1023 return plus_constant (Pmode, x, low); 1024 } 1025 } 1026 1027 1028 /* Try machine-dependent ways of modifying an illegitimate address 1029 to be legitimate. Return X or the new, valid address. */ 1030 1031 static rtx 1032 alpha_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, 1033 enum machine_mode mode) 1034 { 1035 rtx new_x = alpha_legitimize_address_1 (x, NULL_RTX, mode); 1036 return new_x ? new_x : x; 1037 } 1038 1039 /* Return true if ADDR has an effect that depends on the machine mode it 1040 is used for. On the Alpha this is true only for the unaligned modes. 1041 We can simplify the test since we know that the address must be valid. */ 1042 1043 static bool 1044 alpha_mode_dependent_address_p (const_rtx addr, 1045 addr_space_t as ATTRIBUTE_UNUSED) 1046 { 1047 return GET_CODE (addr) == AND; 1048 } 1049 1050 /* Primarily this is required for TLS symbols, but given that our move 1051 patterns *ought* to be able to handle any symbol at any time, we 1052 should never be spilling symbolic operands to the constant pool, ever. */ 1053 1054 static bool 1055 alpha_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x) 1056 { 1057 enum rtx_code code = GET_CODE (x); 1058 return code == SYMBOL_REF || code == LABEL_REF || code == CONST; 1059 } 1060 1061 /* We do not allow indirect calls to be optimized into sibling calls, nor 1062 can we allow a call to a function with a different GP to be optimized 1063 into a sibcall. */ 1064 1065 static bool 1066 alpha_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED) 1067 { 1068 /* Can't do indirect tail calls, since we don't know if the target 1069 uses the same GP. */ 1070 if (!decl) 1071 return false; 1072 1073 /* Otherwise, we can make a tail call if the target function shares 1074 the same GP. */ 1075 return decl_has_samegp (decl); 1076 } 1077 1078 int 1079 some_small_symbolic_operand_int (rtx *px, void *data ATTRIBUTE_UNUSED) 1080 { 1081 rtx x = *px; 1082 1083 /* Don't re-split. */ 1084 if (GET_CODE (x) == LO_SUM) 1085 return -1; 1086 1087 return small_symbolic_operand (x, Pmode) != 0; 1088 } 1089 1090 static int 1091 split_small_symbolic_operand_1 (rtx *px, void *data ATTRIBUTE_UNUSED) 1092 { 1093 rtx x = *px; 1094 1095 /* Don't re-split. */ 1096 if (GET_CODE (x) == LO_SUM) 1097 return -1; 1098 1099 if (small_symbolic_operand (x, Pmode)) 1100 { 1101 x = gen_rtx_LO_SUM (Pmode, pic_offset_table_rtx, x); 1102 *px = x; 1103 return -1; 1104 } 1105 1106 return 0; 1107 } 1108 1109 rtx 1110 split_small_symbolic_operand (rtx x) 1111 { 1112 x = copy_insn (x); 1113 for_each_rtx (&x, split_small_symbolic_operand_1, NULL); 1114 return x; 1115 } 1116 1117 /* Indicate that INSN cannot be duplicated. This is true for any insn 1118 that we've marked with gpdisp relocs, since those have to stay in 1119 1-1 correspondence with one another. 1120 1121 Technically we could copy them if we could set up a mapping from one 1122 sequence number to another, across the set of insns to be duplicated. 1123 This seems overly complicated and error-prone since interblock motion 1124 from sched-ebb could move one of the pair of insns to a different block. 1125 1126 Also cannot allow jsr insns to be duplicated. If they throw exceptions, 1127 then they'll be in a different block from their ldgp. Which could lead 1128 the bb reorder code to think that it would be ok to copy just the block 1129 containing the call and branch to the block containing the ldgp. */ 1130 1131 static bool 1132 alpha_cannot_copy_insn_p (rtx insn) 1133 { 1134 if (!reload_completed || !TARGET_EXPLICIT_RELOCS) 1135 return false; 1136 if (recog_memoized (insn) >= 0) 1137 return get_attr_cannot_copy (insn); 1138 else 1139 return false; 1140 } 1141 1142 1143 /* Try a machine-dependent way of reloading an illegitimate address 1144 operand. If we find one, push the reload and return the new rtx. */ 1145 1146 rtx 1147 alpha_legitimize_reload_address (rtx x, 1148 enum machine_mode mode ATTRIBUTE_UNUSED, 1149 int opnum, int type, 1150 int ind_levels ATTRIBUTE_UNUSED) 1151 { 1152 /* We must recognize output that we have already generated ourselves. */ 1153 if (GET_CODE (x) == PLUS 1154 && GET_CODE (XEXP (x, 0)) == PLUS 1155 && REG_P (XEXP (XEXP (x, 0), 0)) 1156 && CONST_INT_P (XEXP (XEXP (x, 0), 1)) 1157 && CONST_INT_P (XEXP (x, 1))) 1158 { 1159 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL, 1160 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0, 1161 opnum, (enum reload_type) type); 1162 return x; 1163 } 1164 1165 /* We wish to handle large displacements off a base register by 1166 splitting the addend across an ldah and the mem insn. This 1167 cuts number of extra insns needed from 3 to 1. */ 1168 if (GET_CODE (x) == PLUS 1169 && REG_P (XEXP (x, 0)) 1170 && REGNO (XEXP (x, 0)) < FIRST_PSEUDO_REGISTER 1171 && REGNO_OK_FOR_BASE_P (REGNO (XEXP (x, 0))) 1172 && GET_CODE (XEXP (x, 1)) == CONST_INT) 1173 { 1174 HOST_WIDE_INT val = INTVAL (XEXP (x, 1)); 1175 HOST_WIDE_INT low = ((val & 0xffff) ^ 0x8000) - 0x8000; 1176 HOST_WIDE_INT high 1177 = (((val - low) & 0xffffffff) ^ 0x80000000) - 0x80000000; 1178 1179 /* Check for 32-bit overflow. */ 1180 if (high + low != val) 1181 return NULL_RTX; 1182 1183 /* Reload the high part into a base reg; leave the low part 1184 in the mem directly. */ 1185 x = gen_rtx_PLUS (GET_MODE (x), 1186 gen_rtx_PLUS (GET_MODE (x), XEXP (x, 0), 1187 GEN_INT (high)), 1188 GEN_INT (low)); 1189 1190 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL, 1191 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0, 1192 opnum, (enum reload_type) type); 1193 return x; 1194 } 1195 1196 return NULL_RTX; 1197 } 1198 1199 /* Compute a (partial) cost for rtx X. Return true if the complete 1200 cost has been computed, and false if subexpressions should be 1201 scanned. In either case, *TOTAL contains the cost result. */ 1202 1203 static bool 1204 alpha_rtx_costs (rtx x, int code, int outer_code, int opno, int *total, 1205 bool speed) 1206 { 1207 enum machine_mode mode = GET_MODE (x); 1208 bool float_mode_p = FLOAT_MODE_P (mode); 1209 const struct alpha_rtx_cost_data *cost_data; 1210 1211 if (!speed) 1212 cost_data = &alpha_rtx_cost_size; 1213 else 1214 cost_data = &alpha_rtx_cost_data[alpha_tune]; 1215 1216 switch (code) 1217 { 1218 case CONST_INT: 1219 /* If this is an 8-bit constant, return zero since it can be used 1220 nearly anywhere with no cost. If it is a valid operand for an 1221 ADD or AND, likewise return 0 if we know it will be used in that 1222 context. Otherwise, return 2 since it might be used there later. 1223 All other constants take at least two insns. */ 1224 if (INTVAL (x) >= 0 && INTVAL (x) < 256) 1225 { 1226 *total = 0; 1227 return true; 1228 } 1229 /* FALLTHRU */ 1230 1231 case CONST_DOUBLE: 1232 if (x == CONST0_RTX (mode)) 1233 *total = 0; 1234 else if ((outer_code == PLUS && add_operand (x, VOIDmode)) 1235 || (outer_code == AND && and_operand (x, VOIDmode))) 1236 *total = 0; 1237 else if (add_operand (x, VOIDmode) || and_operand (x, VOIDmode)) 1238 *total = 2; 1239 else 1240 *total = COSTS_N_INSNS (2); 1241 return true; 1242 1243 case CONST: 1244 case SYMBOL_REF: 1245 case LABEL_REF: 1246 if (TARGET_EXPLICIT_RELOCS && small_symbolic_operand (x, VOIDmode)) 1247 *total = COSTS_N_INSNS (outer_code != MEM); 1248 else if (TARGET_EXPLICIT_RELOCS && local_symbolic_operand (x, VOIDmode)) 1249 *total = COSTS_N_INSNS (1 + (outer_code != MEM)); 1250 else if (tls_symbolic_operand_type (x)) 1251 /* Estimate of cost for call_pal rduniq. */ 1252 /* ??? How many insns do we emit here? More than one... */ 1253 *total = COSTS_N_INSNS (15); 1254 else 1255 /* Otherwise we do a load from the GOT. */ 1256 *total = COSTS_N_INSNS (!speed ? 1 : alpha_memory_latency); 1257 return true; 1258 1259 case HIGH: 1260 /* This is effectively an add_operand. */ 1261 *total = 2; 1262 return true; 1263 1264 case PLUS: 1265 case MINUS: 1266 if (float_mode_p) 1267 *total = cost_data->fp_add; 1268 else if (GET_CODE (XEXP (x, 0)) == MULT 1269 && const48_operand (XEXP (XEXP (x, 0), 1), VOIDmode)) 1270 { 1271 *total = (rtx_cost (XEXP (XEXP (x, 0), 0), 1272 (enum rtx_code) outer_code, opno, speed) 1273 + rtx_cost (XEXP (x, 1), 1274 (enum rtx_code) outer_code, opno, speed) 1275 + COSTS_N_INSNS (1)); 1276 return true; 1277 } 1278 return false; 1279 1280 case MULT: 1281 if (float_mode_p) 1282 *total = cost_data->fp_mult; 1283 else if (mode == DImode) 1284 *total = cost_data->int_mult_di; 1285 else 1286 *total = cost_data->int_mult_si; 1287 return false; 1288 1289 case ASHIFT: 1290 if (CONST_INT_P (XEXP (x, 1)) 1291 && INTVAL (XEXP (x, 1)) <= 3) 1292 { 1293 *total = COSTS_N_INSNS (1); 1294 return false; 1295 } 1296 /* FALLTHRU */ 1297 1298 case ASHIFTRT: 1299 case LSHIFTRT: 1300 *total = cost_data->int_shift; 1301 return false; 1302 1303 case IF_THEN_ELSE: 1304 if (float_mode_p) 1305 *total = cost_data->fp_add; 1306 else 1307 *total = cost_data->int_cmov; 1308 return false; 1309 1310 case DIV: 1311 case UDIV: 1312 case MOD: 1313 case UMOD: 1314 if (!float_mode_p) 1315 *total = cost_data->int_div; 1316 else if (mode == SFmode) 1317 *total = cost_data->fp_div_sf; 1318 else 1319 *total = cost_data->fp_div_df; 1320 return false; 1321 1322 case MEM: 1323 *total = COSTS_N_INSNS (!speed ? 1 : alpha_memory_latency); 1324 return true; 1325 1326 case NEG: 1327 if (! float_mode_p) 1328 { 1329 *total = COSTS_N_INSNS (1); 1330 return false; 1331 } 1332 /* FALLTHRU */ 1333 1334 case ABS: 1335 if (! float_mode_p) 1336 { 1337 *total = COSTS_N_INSNS (1) + cost_data->int_cmov; 1338 return false; 1339 } 1340 /* FALLTHRU */ 1341 1342 case FLOAT: 1343 case UNSIGNED_FLOAT: 1344 case FIX: 1345 case UNSIGNED_FIX: 1346 case FLOAT_TRUNCATE: 1347 *total = cost_data->fp_add; 1348 return false; 1349 1350 case FLOAT_EXTEND: 1351 if (MEM_P (XEXP (x, 0))) 1352 *total = 0; 1353 else 1354 *total = cost_data->fp_add; 1355 return false; 1356 1357 default: 1358 return false; 1359 } 1360 } 1361 1362 /* REF is an alignable memory location. Place an aligned SImode 1363 reference into *PALIGNED_MEM and the number of bits to shift into 1364 *PBITNUM. SCRATCH is a free register for use in reloading out 1365 of range stack slots. */ 1366 1367 void 1368 get_aligned_mem (rtx ref, rtx *paligned_mem, rtx *pbitnum) 1369 { 1370 rtx base; 1371 HOST_WIDE_INT disp, offset; 1372 1373 gcc_assert (MEM_P (ref)); 1374 1375 if (reload_in_progress) 1376 { 1377 base = find_replacement (&XEXP (ref, 0)); 1378 gcc_assert (memory_address_p (GET_MODE (ref), base)); 1379 } 1380 else 1381 base = XEXP (ref, 0); 1382 1383 if (GET_CODE (base) == PLUS) 1384 disp = INTVAL (XEXP (base, 1)), base = XEXP (base, 0); 1385 else 1386 disp = 0; 1387 1388 /* Find the byte offset within an aligned word. If the memory itself is 1389 claimed to be aligned, believe it. Otherwise, aligned_memory_operand 1390 will have examined the base register and determined it is aligned, and 1391 thus displacements from it are naturally alignable. */ 1392 if (MEM_ALIGN (ref) >= 32) 1393 offset = 0; 1394 else 1395 offset = disp & 3; 1396 1397 /* The location should not cross aligned word boundary. */ 1398 gcc_assert (offset + GET_MODE_SIZE (GET_MODE (ref)) 1399 <= GET_MODE_SIZE (SImode)); 1400 1401 /* Access the entire aligned word. */ 1402 *paligned_mem = widen_memory_access (ref, SImode, -offset); 1403 1404 /* Convert the byte offset within the word to a bit offset. */ 1405 offset *= BITS_PER_UNIT; 1406 *pbitnum = GEN_INT (offset); 1407 } 1408 1409 /* Similar, but just get the address. Handle the two reload cases. 1410 Add EXTRA_OFFSET to the address we return. */ 1411 1412 rtx 1413 get_unaligned_address (rtx ref) 1414 { 1415 rtx base; 1416 HOST_WIDE_INT offset = 0; 1417 1418 gcc_assert (MEM_P (ref)); 1419 1420 if (reload_in_progress) 1421 { 1422 base = find_replacement (&XEXP (ref, 0)); 1423 gcc_assert (memory_address_p (GET_MODE (ref), base)); 1424 } 1425 else 1426 base = XEXP (ref, 0); 1427 1428 if (GET_CODE (base) == PLUS) 1429 offset += INTVAL (XEXP (base, 1)), base = XEXP (base, 0); 1430 1431 return plus_constant (Pmode, base, offset); 1432 } 1433 1434 /* Compute a value X, such that X & 7 == (ADDR + OFS) & 7. 1435 X is always returned in a register. */ 1436 1437 rtx 1438 get_unaligned_offset (rtx addr, HOST_WIDE_INT ofs) 1439 { 1440 if (GET_CODE (addr) == PLUS) 1441 { 1442 ofs += INTVAL (XEXP (addr, 1)); 1443 addr = XEXP (addr, 0); 1444 } 1445 1446 return expand_simple_binop (Pmode, PLUS, addr, GEN_INT (ofs & 7), 1447 NULL_RTX, 1, OPTAB_LIB_WIDEN); 1448 } 1449 1450 /* On the Alpha, all (non-symbolic) constants except zero go into 1451 a floating-point register via memory. Note that we cannot 1452 return anything that is not a subset of RCLASS, and that some 1453 symbolic constants cannot be dropped to memory. */ 1454 1455 enum reg_class 1456 alpha_preferred_reload_class(rtx x, enum reg_class rclass) 1457 { 1458 /* Zero is present in any register class. */ 1459 if (x == CONST0_RTX (GET_MODE (x))) 1460 return rclass; 1461 1462 /* These sorts of constants we can easily drop to memory. */ 1463 if (CONST_INT_P (x) 1464 || GET_CODE (x) == CONST_DOUBLE 1465 || GET_CODE (x) == CONST_VECTOR) 1466 { 1467 if (rclass == FLOAT_REGS) 1468 return NO_REGS; 1469 if (rclass == ALL_REGS) 1470 return GENERAL_REGS; 1471 return rclass; 1472 } 1473 1474 /* All other kinds of constants should not (and in the case of HIGH 1475 cannot) be dropped to memory -- instead we use a GENERAL_REGS 1476 secondary reload. */ 1477 if (CONSTANT_P (x)) 1478 return (rclass == ALL_REGS ? GENERAL_REGS : rclass); 1479 1480 return rclass; 1481 } 1482 1483 /* Inform reload about cases where moving X with a mode MODE to a register in 1484 RCLASS requires an extra scratch or immediate register. Return the class 1485 needed for the immediate register. */ 1486 1487 static reg_class_t 1488 alpha_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i, 1489 enum machine_mode mode, secondary_reload_info *sri) 1490 { 1491 enum reg_class rclass = (enum reg_class) rclass_i; 1492 1493 /* Loading and storing HImode or QImode values to and from memory 1494 usually requires a scratch register. */ 1495 if (!TARGET_BWX && (mode == QImode || mode == HImode || mode == CQImode)) 1496 { 1497 if (any_memory_operand (x, mode)) 1498 { 1499 if (in_p) 1500 { 1501 if (!aligned_memory_operand (x, mode)) 1502 sri->icode = direct_optab_handler (reload_in_optab, mode); 1503 } 1504 else 1505 sri->icode = direct_optab_handler (reload_out_optab, mode); 1506 return NO_REGS; 1507 } 1508 } 1509 1510 /* We also cannot do integral arithmetic into FP regs, as might result 1511 from register elimination into a DImode fp register. */ 1512 if (rclass == FLOAT_REGS) 1513 { 1514 if (MEM_P (x) && GET_CODE (XEXP (x, 0)) == AND) 1515 return GENERAL_REGS; 1516 if (in_p && INTEGRAL_MODE_P (mode) 1517 && !MEM_P (x) && !REG_P (x) && !CONST_INT_P (x)) 1518 return GENERAL_REGS; 1519 } 1520 1521 return NO_REGS; 1522 } 1523 1524 /* Subfunction of the following function. Update the flags of any MEM 1525 found in part of X. */ 1526 1527 static int 1528 alpha_set_memflags_1 (rtx *xp, void *data) 1529 { 1530 rtx x = *xp, orig = (rtx) data; 1531 1532 if (!MEM_P (x)) 1533 return 0; 1534 1535 MEM_VOLATILE_P (x) = MEM_VOLATILE_P (orig); 1536 MEM_NOTRAP_P (x) = MEM_NOTRAP_P (orig); 1537 MEM_READONLY_P (x) = MEM_READONLY_P (orig); 1538 1539 /* Sadly, we cannot use alias sets because the extra aliasing 1540 produced by the AND interferes. Given that two-byte quantities 1541 are the only thing we would be able to differentiate anyway, 1542 there does not seem to be any point in convoluting the early 1543 out of the alias check. */ 1544 1545 return -1; 1546 } 1547 1548 /* Given SEQ, which is an INSN list, look for any MEMs in either 1549 a SET_DEST or a SET_SRC and copy the in-struct, unchanging, and 1550 volatile flags from REF into each of the MEMs found. If REF is not 1551 a MEM, don't do anything. */ 1552 1553 void 1554 alpha_set_memflags (rtx seq, rtx ref) 1555 { 1556 rtx insn; 1557 1558 if (!MEM_P (ref)) 1559 return; 1560 1561 /* This is only called from alpha.md, after having had something 1562 generated from one of the insn patterns. So if everything is 1563 zero, the pattern is already up-to-date. */ 1564 if (!MEM_VOLATILE_P (ref) 1565 && !MEM_NOTRAP_P (ref) 1566 && !MEM_READONLY_P (ref)) 1567 return; 1568 1569 for (insn = seq; insn; insn = NEXT_INSN (insn)) 1570 if (INSN_P (insn)) 1571 for_each_rtx (&PATTERN (insn), alpha_set_memflags_1, (void *) ref); 1572 else 1573 gcc_unreachable (); 1574 } 1575 1576 static rtx alpha_emit_set_const (rtx, enum machine_mode, HOST_WIDE_INT, 1577 int, bool); 1578 1579 /* Internal routine for alpha_emit_set_const to check for N or below insns. 1580 If NO_OUTPUT is true, then we only check to see if N insns are possible, 1581 and return pc_rtx if successful. */ 1582 1583 static rtx 1584 alpha_emit_set_const_1 (rtx target, enum machine_mode mode, 1585 HOST_WIDE_INT c, int n, bool no_output) 1586 { 1587 HOST_WIDE_INT new_const; 1588 int i, bits; 1589 /* Use a pseudo if highly optimizing and still generating RTL. */ 1590 rtx subtarget 1591 = (flag_expensive_optimizations && can_create_pseudo_p () ? 0 : target); 1592 rtx temp, insn; 1593 1594 /* If this is a sign-extended 32-bit constant, we can do this in at most 1595 three insns, so do it if we have enough insns left. We always have 1596 a sign-extended 32-bit constant when compiling on a narrow machine. */ 1597 1598 if (HOST_BITS_PER_WIDE_INT != 64 1599 || c >> 31 == -1 || c >> 31 == 0) 1600 { 1601 HOST_WIDE_INT low = ((c & 0xffff) ^ 0x8000) - 0x8000; 1602 HOST_WIDE_INT tmp1 = c - low; 1603 HOST_WIDE_INT high = (((tmp1 >> 16) & 0xffff) ^ 0x8000) - 0x8000; 1604 HOST_WIDE_INT extra = 0; 1605 1606 /* If HIGH will be interpreted as negative but the constant is 1607 positive, we must adjust it to do two ldha insns. */ 1608 1609 if ((high & 0x8000) != 0 && c >= 0) 1610 { 1611 extra = 0x4000; 1612 tmp1 -= 0x40000000; 1613 high = ((tmp1 >> 16) & 0xffff) - 2 * ((tmp1 >> 16) & 0x8000); 1614 } 1615 1616 if (c == low || (low == 0 && extra == 0)) 1617 { 1618 /* We used to use copy_to_suggested_reg (GEN_INT (c), target, mode) 1619 but that meant that we can't handle INT_MIN on 32-bit machines 1620 (like NT/Alpha), because we recurse indefinitely through 1621 emit_move_insn to gen_movdi. So instead, since we know exactly 1622 what we want, create it explicitly. */ 1623 1624 if (no_output) 1625 return pc_rtx; 1626 if (target == NULL) 1627 target = gen_reg_rtx (mode); 1628 emit_insn (gen_rtx_SET (VOIDmode, target, GEN_INT (c))); 1629 return target; 1630 } 1631 else if (n >= 2 + (extra != 0)) 1632 { 1633 if (no_output) 1634 return pc_rtx; 1635 if (!can_create_pseudo_p ()) 1636 { 1637 emit_insn (gen_rtx_SET (VOIDmode, target, GEN_INT (high << 16))); 1638 temp = target; 1639 } 1640 else 1641 temp = copy_to_suggested_reg (GEN_INT (high << 16), 1642 subtarget, mode); 1643 1644 /* As of 2002-02-23, addsi3 is only available when not optimizing. 1645 This means that if we go through expand_binop, we'll try to 1646 generate extensions, etc, which will require new pseudos, which 1647 will fail during some split phases. The SImode add patterns 1648 still exist, but are not named. So build the insns by hand. */ 1649 1650 if (extra != 0) 1651 { 1652 if (! subtarget) 1653 subtarget = gen_reg_rtx (mode); 1654 insn = gen_rtx_PLUS (mode, temp, GEN_INT (extra << 16)); 1655 insn = gen_rtx_SET (VOIDmode, subtarget, insn); 1656 emit_insn (insn); 1657 temp = subtarget; 1658 } 1659 1660 if (target == NULL) 1661 target = gen_reg_rtx (mode); 1662 insn = gen_rtx_PLUS (mode, temp, GEN_INT (low)); 1663 insn = gen_rtx_SET (VOIDmode, target, insn); 1664 emit_insn (insn); 1665 return target; 1666 } 1667 } 1668 1669 /* If we couldn't do it that way, try some other methods. But if we have 1670 no instructions left, don't bother. Likewise, if this is SImode and 1671 we can't make pseudos, we can't do anything since the expand_binop 1672 and expand_unop calls will widen and try to make pseudos. */ 1673 1674 if (n == 1 || (mode == SImode && !can_create_pseudo_p ())) 1675 return 0; 1676 1677 /* Next, see if we can load a related constant and then shift and possibly 1678 negate it to get the constant we want. Try this once each increasing 1679 numbers of insns. */ 1680 1681 for (i = 1; i < n; i++) 1682 { 1683 /* First, see if minus some low bits, we've an easy load of 1684 high bits. */ 1685 1686 new_const = ((c & 0xffff) ^ 0x8000) - 0x8000; 1687 if (new_const != 0) 1688 { 1689 temp = alpha_emit_set_const (subtarget, mode, c - new_const, i, no_output); 1690 if (temp) 1691 { 1692 if (no_output) 1693 return temp; 1694 return expand_binop (mode, add_optab, temp, GEN_INT (new_const), 1695 target, 0, OPTAB_WIDEN); 1696 } 1697 } 1698 1699 /* Next try complementing. */ 1700 temp = alpha_emit_set_const (subtarget, mode, ~c, i, no_output); 1701 if (temp) 1702 { 1703 if (no_output) 1704 return temp; 1705 return expand_unop (mode, one_cmpl_optab, temp, target, 0); 1706 } 1707 1708 /* Next try to form a constant and do a left shift. We can do this 1709 if some low-order bits are zero; the exact_log2 call below tells 1710 us that information. The bits we are shifting out could be any 1711 value, but here we'll just try the 0- and sign-extended forms of 1712 the constant. To try to increase the chance of having the same 1713 constant in more than one insn, start at the highest number of 1714 bits to shift, but try all possibilities in case a ZAPNOT will 1715 be useful. */ 1716 1717 bits = exact_log2 (c & -c); 1718 if (bits > 0) 1719 for (; bits > 0; bits--) 1720 { 1721 new_const = c >> bits; 1722 temp = alpha_emit_set_const (subtarget, mode, new_const, i, no_output); 1723 if (!temp && c < 0) 1724 { 1725 new_const = (unsigned HOST_WIDE_INT)c >> bits; 1726 temp = alpha_emit_set_const (subtarget, mode, new_const, 1727 i, no_output); 1728 } 1729 if (temp) 1730 { 1731 if (no_output) 1732 return temp; 1733 return expand_binop (mode, ashl_optab, temp, GEN_INT (bits), 1734 target, 0, OPTAB_WIDEN); 1735 } 1736 } 1737 1738 /* Now try high-order zero bits. Here we try the shifted-in bits as 1739 all zero and all ones. Be careful to avoid shifting outside the 1740 mode and to avoid shifting outside the host wide int size. */ 1741 /* On narrow hosts, don't shift a 1 into the high bit, since we'll 1742 confuse the recursive call and set all of the high 32 bits. */ 1743 1744 bits = (MIN (HOST_BITS_PER_WIDE_INT, GET_MODE_SIZE (mode) * 8) 1745 - floor_log2 (c) - 1 - (HOST_BITS_PER_WIDE_INT < 64)); 1746 if (bits > 0) 1747 for (; bits > 0; bits--) 1748 { 1749 new_const = c << bits; 1750 temp = alpha_emit_set_const (subtarget, mode, new_const, i, no_output); 1751 if (!temp) 1752 { 1753 new_const = (c << bits) | (((HOST_WIDE_INT) 1 << bits) - 1); 1754 temp = alpha_emit_set_const (subtarget, mode, new_const, 1755 i, no_output); 1756 } 1757 if (temp) 1758 { 1759 if (no_output) 1760 return temp; 1761 return expand_binop (mode, lshr_optab, temp, GEN_INT (bits), 1762 target, 1, OPTAB_WIDEN); 1763 } 1764 } 1765 1766 /* Now try high-order 1 bits. We get that with a sign-extension. 1767 But one bit isn't enough here. Be careful to avoid shifting outside 1768 the mode and to avoid shifting outside the host wide int size. */ 1769 1770 bits = (MIN (HOST_BITS_PER_WIDE_INT, GET_MODE_SIZE (mode) * 8) 1771 - floor_log2 (~ c) - 2); 1772 if (bits > 0) 1773 for (; bits > 0; bits--) 1774 { 1775 new_const = c << bits; 1776 temp = alpha_emit_set_const (subtarget, mode, new_const, i, no_output); 1777 if (!temp) 1778 { 1779 new_const = (c << bits) | (((HOST_WIDE_INT) 1 << bits) - 1); 1780 temp = alpha_emit_set_const (subtarget, mode, new_const, 1781 i, no_output); 1782 } 1783 if (temp) 1784 { 1785 if (no_output) 1786 return temp; 1787 return expand_binop (mode, ashr_optab, temp, GEN_INT (bits), 1788 target, 0, OPTAB_WIDEN); 1789 } 1790 } 1791 } 1792 1793 #if HOST_BITS_PER_WIDE_INT == 64 1794 /* Finally, see if can load a value into the target that is the same as the 1795 constant except that all bytes that are 0 are changed to be 0xff. If we 1796 can, then we can do a ZAPNOT to obtain the desired constant. */ 1797 1798 new_const = c; 1799 for (i = 0; i < 64; i += 8) 1800 if ((new_const & ((HOST_WIDE_INT) 0xff << i)) == 0) 1801 new_const |= (HOST_WIDE_INT) 0xff << i; 1802 1803 /* We are only called for SImode and DImode. If this is SImode, ensure that 1804 we are sign extended to a full word. */ 1805 1806 if (mode == SImode) 1807 new_const = ((new_const & 0xffffffff) ^ 0x80000000) - 0x80000000; 1808 1809 if (new_const != c) 1810 { 1811 temp = alpha_emit_set_const (subtarget, mode, new_const, n - 1, no_output); 1812 if (temp) 1813 { 1814 if (no_output) 1815 return temp; 1816 return expand_binop (mode, and_optab, temp, GEN_INT (c | ~ new_const), 1817 target, 0, OPTAB_WIDEN); 1818 } 1819 } 1820 #endif 1821 1822 return 0; 1823 } 1824 1825 /* Try to output insns to set TARGET equal to the constant C if it can be 1826 done in less than N insns. Do all computations in MODE. Returns the place 1827 where the output has been placed if it can be done and the insns have been 1828 emitted. If it would take more than N insns, zero is returned and no 1829 insns and emitted. */ 1830 1831 static rtx 1832 alpha_emit_set_const (rtx target, enum machine_mode mode, 1833 HOST_WIDE_INT c, int n, bool no_output) 1834 { 1835 enum machine_mode orig_mode = mode; 1836 rtx orig_target = target; 1837 rtx result = 0; 1838 int i; 1839 1840 /* If we can't make any pseudos, TARGET is an SImode hard register, we 1841 can't load this constant in one insn, do this in DImode. */ 1842 if (!can_create_pseudo_p () && mode == SImode 1843 && REG_P (target) && REGNO (target) < FIRST_PSEUDO_REGISTER) 1844 { 1845 result = alpha_emit_set_const_1 (target, mode, c, 1, no_output); 1846 if (result) 1847 return result; 1848 1849 target = no_output ? NULL : gen_lowpart (DImode, target); 1850 mode = DImode; 1851 } 1852 else if (mode == V8QImode || mode == V4HImode || mode == V2SImode) 1853 { 1854 target = no_output ? NULL : gen_lowpart (DImode, target); 1855 mode = DImode; 1856 } 1857 1858 /* Try 1 insn, then 2, then up to N. */ 1859 for (i = 1; i <= n; i++) 1860 { 1861 result = alpha_emit_set_const_1 (target, mode, c, i, no_output); 1862 if (result) 1863 { 1864 rtx insn, set; 1865 1866 if (no_output) 1867 return result; 1868 1869 insn = get_last_insn (); 1870 set = single_set (insn); 1871 if (! CONSTANT_P (SET_SRC (set))) 1872 set_unique_reg_note (get_last_insn (), REG_EQUAL, GEN_INT (c)); 1873 break; 1874 } 1875 } 1876 1877 /* Allow for the case where we changed the mode of TARGET. */ 1878 if (result) 1879 { 1880 if (result == target) 1881 result = orig_target; 1882 else if (mode != orig_mode) 1883 result = gen_lowpart (orig_mode, result); 1884 } 1885 1886 return result; 1887 } 1888 1889 /* Having failed to find a 3 insn sequence in alpha_emit_set_const, 1890 fall back to a straight forward decomposition. We do this to avoid 1891 exponential run times encountered when looking for longer sequences 1892 with alpha_emit_set_const. */ 1893 1894 static rtx 1895 alpha_emit_set_long_const (rtx target, HOST_WIDE_INT c1, HOST_WIDE_INT c2) 1896 { 1897 HOST_WIDE_INT d1, d2, d3, d4; 1898 1899 /* Decompose the entire word */ 1900 #if HOST_BITS_PER_WIDE_INT >= 64 1901 gcc_assert (c2 == -(c1 < 0)); 1902 d1 = ((c1 & 0xffff) ^ 0x8000) - 0x8000; 1903 c1 -= d1; 1904 d2 = ((c1 & 0xffffffff) ^ 0x80000000) - 0x80000000; 1905 c1 = (c1 - d2) >> 32; 1906 d3 = ((c1 & 0xffff) ^ 0x8000) - 0x8000; 1907 c1 -= d3; 1908 d4 = ((c1 & 0xffffffff) ^ 0x80000000) - 0x80000000; 1909 gcc_assert (c1 == d4); 1910 #else 1911 d1 = ((c1 & 0xffff) ^ 0x8000) - 0x8000; 1912 c1 -= d1; 1913 d2 = ((c1 & 0xffffffff) ^ 0x80000000) - 0x80000000; 1914 gcc_assert (c1 == d2); 1915 c2 += (d2 < 0); 1916 d3 = ((c2 & 0xffff) ^ 0x8000) - 0x8000; 1917 c2 -= d3; 1918 d4 = ((c2 & 0xffffffff) ^ 0x80000000) - 0x80000000; 1919 gcc_assert (c2 == d4); 1920 #endif 1921 1922 /* Construct the high word */ 1923 if (d4) 1924 { 1925 emit_move_insn (target, GEN_INT (d4)); 1926 if (d3) 1927 emit_move_insn (target, gen_rtx_PLUS (DImode, target, GEN_INT (d3))); 1928 } 1929 else 1930 emit_move_insn (target, GEN_INT (d3)); 1931 1932 /* Shift it into place */ 1933 emit_move_insn (target, gen_rtx_ASHIFT (DImode, target, GEN_INT (32))); 1934 1935 /* Add in the low bits. */ 1936 if (d2) 1937 emit_move_insn (target, gen_rtx_PLUS (DImode, target, GEN_INT (d2))); 1938 if (d1) 1939 emit_move_insn (target, gen_rtx_PLUS (DImode, target, GEN_INT (d1))); 1940 1941 return target; 1942 } 1943 1944 /* Given an integral CONST_INT, CONST_DOUBLE, or CONST_VECTOR, return 1945 the low 64 bits. */ 1946 1947 static void 1948 alpha_extract_integer (rtx x, HOST_WIDE_INT *p0, HOST_WIDE_INT *p1) 1949 { 1950 HOST_WIDE_INT i0, i1; 1951 1952 if (GET_CODE (x) == CONST_VECTOR) 1953 x = simplify_subreg (DImode, x, GET_MODE (x), 0); 1954 1955 1956 if (CONST_INT_P (x)) 1957 { 1958 i0 = INTVAL (x); 1959 i1 = -(i0 < 0); 1960 } 1961 else if (HOST_BITS_PER_WIDE_INT >= 64) 1962 { 1963 i0 = CONST_DOUBLE_LOW (x); 1964 i1 = -(i0 < 0); 1965 } 1966 else 1967 { 1968 i0 = CONST_DOUBLE_LOW (x); 1969 i1 = CONST_DOUBLE_HIGH (x); 1970 } 1971 1972 *p0 = i0; 1973 *p1 = i1; 1974 } 1975 1976 /* Implement TARGET_LEGITIMATE_CONSTANT_P. This is all constants for which 1977 we are willing to load the value into a register via a move pattern. 1978 Normally this is all symbolic constants, integral constants that 1979 take three or fewer instructions, and floating-point zero. */ 1980 1981 bool 1982 alpha_legitimate_constant_p (enum machine_mode mode, rtx x) 1983 { 1984 HOST_WIDE_INT i0, i1; 1985 1986 switch (GET_CODE (x)) 1987 { 1988 case LABEL_REF: 1989 case HIGH: 1990 return true; 1991 1992 case CONST: 1993 if (GET_CODE (XEXP (x, 0)) == PLUS 1994 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT) 1995 x = XEXP (XEXP (x, 0), 0); 1996 else 1997 return true; 1998 1999 if (GET_CODE (x) != SYMBOL_REF) 2000 return true; 2001 2002 /* FALLTHRU */ 2003 2004 case SYMBOL_REF: 2005 /* TLS symbols are never valid. */ 2006 return SYMBOL_REF_TLS_MODEL (x) == 0; 2007 2008 case CONST_DOUBLE: 2009 if (x == CONST0_RTX (mode)) 2010 return true; 2011 if (FLOAT_MODE_P (mode)) 2012 return false; 2013 goto do_integer; 2014 2015 case CONST_VECTOR: 2016 if (x == CONST0_RTX (mode)) 2017 return true; 2018 if (GET_MODE_CLASS (mode) != MODE_VECTOR_INT) 2019 return false; 2020 if (GET_MODE_SIZE (mode) != 8) 2021 return false; 2022 goto do_integer; 2023 2024 case CONST_INT: 2025 do_integer: 2026 if (TARGET_BUILD_CONSTANTS) 2027 return true; 2028 alpha_extract_integer (x, &i0, &i1); 2029 if (HOST_BITS_PER_WIDE_INT >= 64 || i1 == (-i0 < 0)) 2030 return alpha_emit_set_const_1 (x, mode, i0, 3, true) != NULL; 2031 return false; 2032 2033 default: 2034 return false; 2035 } 2036 } 2037 2038 /* Operand 1 is known to be a constant, and should require more than one 2039 instruction to load. Emit that multi-part load. */ 2040 2041 bool 2042 alpha_split_const_mov (enum machine_mode mode, rtx *operands) 2043 { 2044 HOST_WIDE_INT i0, i1; 2045 rtx temp = NULL_RTX; 2046 2047 alpha_extract_integer (operands[1], &i0, &i1); 2048 2049 if (HOST_BITS_PER_WIDE_INT >= 64 || i1 == -(i0 < 0)) 2050 temp = alpha_emit_set_const (operands[0], mode, i0, 3, false); 2051 2052 if (!temp && TARGET_BUILD_CONSTANTS) 2053 temp = alpha_emit_set_long_const (operands[0], i0, i1); 2054 2055 if (temp) 2056 { 2057 if (!rtx_equal_p (operands[0], temp)) 2058 emit_move_insn (operands[0], temp); 2059 return true; 2060 } 2061 2062 return false; 2063 } 2064 2065 /* Expand a move instruction; return true if all work is done. 2066 We don't handle non-bwx subword loads here. */ 2067 2068 bool 2069 alpha_expand_mov (enum machine_mode mode, rtx *operands) 2070 { 2071 rtx tmp; 2072 2073 /* If the output is not a register, the input must be. */ 2074 if (MEM_P (operands[0]) 2075 && ! reg_or_0_operand (operands[1], mode)) 2076 operands[1] = force_reg (mode, operands[1]); 2077 2078 /* Allow legitimize_address to perform some simplifications. */ 2079 if (mode == Pmode && symbolic_operand (operands[1], mode)) 2080 { 2081 tmp = alpha_legitimize_address_1 (operands[1], operands[0], mode); 2082 if (tmp) 2083 { 2084 if (tmp == operands[0]) 2085 return true; 2086 operands[1] = tmp; 2087 return false; 2088 } 2089 } 2090 2091 /* Early out for non-constants and valid constants. */ 2092 if (! CONSTANT_P (operands[1]) || input_operand (operands[1], mode)) 2093 return false; 2094 2095 /* Split large integers. */ 2096 if (CONST_INT_P (operands[1]) 2097 || GET_CODE (operands[1]) == CONST_DOUBLE 2098 || GET_CODE (operands[1]) == CONST_VECTOR) 2099 { 2100 if (alpha_split_const_mov (mode, operands)) 2101 return true; 2102 } 2103 2104 /* Otherwise we've nothing left but to drop the thing to memory. */ 2105 tmp = force_const_mem (mode, operands[1]); 2106 2107 if (tmp == NULL_RTX) 2108 return false; 2109 2110 if (reload_in_progress) 2111 { 2112 emit_move_insn (operands[0], XEXP (tmp, 0)); 2113 operands[1] = replace_equiv_address (tmp, operands[0]); 2114 } 2115 else 2116 operands[1] = validize_mem (tmp); 2117 return false; 2118 } 2119 2120 /* Expand a non-bwx QImode or HImode move instruction; 2121 return true if all work is done. */ 2122 2123 bool 2124 alpha_expand_mov_nobwx (enum machine_mode mode, rtx *operands) 2125 { 2126 rtx seq; 2127 2128 /* If the output is not a register, the input must be. */ 2129 if (MEM_P (operands[0])) 2130 operands[1] = force_reg (mode, operands[1]); 2131 2132 /* Handle four memory cases, unaligned and aligned for either the input 2133 or the output. The only case where we can be called during reload is 2134 for aligned loads; all other cases require temporaries. */ 2135 2136 if (any_memory_operand (operands[1], mode)) 2137 { 2138 if (aligned_memory_operand (operands[1], mode)) 2139 { 2140 if (reload_in_progress) 2141 { 2142 if (mode == QImode) 2143 seq = gen_reload_inqi_aligned (operands[0], operands[1]); 2144 else 2145 seq = gen_reload_inhi_aligned (operands[0], operands[1]); 2146 emit_insn (seq); 2147 } 2148 else 2149 { 2150 rtx aligned_mem, bitnum; 2151 rtx scratch = gen_reg_rtx (SImode); 2152 rtx subtarget; 2153 bool copyout; 2154 2155 get_aligned_mem (operands[1], &aligned_mem, &bitnum); 2156 2157 subtarget = operands[0]; 2158 if (REG_P (subtarget)) 2159 subtarget = gen_lowpart (DImode, subtarget), copyout = false; 2160 else 2161 subtarget = gen_reg_rtx (DImode), copyout = true; 2162 2163 if (mode == QImode) 2164 seq = gen_aligned_loadqi (subtarget, aligned_mem, 2165 bitnum, scratch); 2166 else 2167 seq = gen_aligned_loadhi (subtarget, aligned_mem, 2168 bitnum, scratch); 2169 emit_insn (seq); 2170 2171 if (copyout) 2172 emit_move_insn (operands[0], gen_lowpart (mode, subtarget)); 2173 } 2174 } 2175 else 2176 { 2177 /* Don't pass these as parameters since that makes the generated 2178 code depend on parameter evaluation order which will cause 2179 bootstrap failures. */ 2180 2181 rtx temp1, temp2, subtarget, ua; 2182 bool copyout; 2183 2184 temp1 = gen_reg_rtx (DImode); 2185 temp2 = gen_reg_rtx (DImode); 2186 2187 subtarget = operands[0]; 2188 if (REG_P (subtarget)) 2189 subtarget = gen_lowpart (DImode, subtarget), copyout = false; 2190 else 2191 subtarget = gen_reg_rtx (DImode), copyout = true; 2192 2193 ua = get_unaligned_address (operands[1]); 2194 if (mode == QImode) 2195 seq = gen_unaligned_loadqi (subtarget, ua, temp1, temp2); 2196 else 2197 seq = gen_unaligned_loadhi (subtarget, ua, temp1, temp2); 2198 2199 alpha_set_memflags (seq, operands[1]); 2200 emit_insn (seq); 2201 2202 if (copyout) 2203 emit_move_insn (operands[0], gen_lowpart (mode, subtarget)); 2204 } 2205 return true; 2206 } 2207 2208 if (any_memory_operand (operands[0], mode)) 2209 { 2210 if (aligned_memory_operand (operands[0], mode)) 2211 { 2212 rtx aligned_mem, bitnum; 2213 rtx temp1 = gen_reg_rtx (SImode); 2214 rtx temp2 = gen_reg_rtx (SImode); 2215 2216 get_aligned_mem (operands[0], &aligned_mem, &bitnum); 2217 2218 emit_insn (gen_aligned_store (aligned_mem, operands[1], bitnum, 2219 temp1, temp2)); 2220 } 2221 else 2222 { 2223 rtx temp1 = gen_reg_rtx (DImode); 2224 rtx temp2 = gen_reg_rtx (DImode); 2225 rtx temp3 = gen_reg_rtx (DImode); 2226 rtx ua = get_unaligned_address (operands[0]); 2227 2228 if (mode == QImode) 2229 seq = gen_unaligned_storeqi (ua, operands[1], temp1, temp2, temp3); 2230 else 2231 seq = gen_unaligned_storehi (ua, operands[1], temp1, temp2, temp3); 2232 2233 alpha_set_memflags (seq, operands[0]); 2234 emit_insn (seq); 2235 } 2236 return true; 2237 } 2238 2239 return false; 2240 } 2241 2242 /* Implement the movmisalign patterns. One of the operands is a memory 2243 that is not naturally aligned. Emit instructions to load it. */ 2244 2245 void 2246 alpha_expand_movmisalign (enum machine_mode mode, rtx *operands) 2247 { 2248 /* Honor misaligned loads, for those we promised to do so. */ 2249 if (MEM_P (operands[1])) 2250 { 2251 rtx tmp; 2252 2253 if (register_operand (operands[0], mode)) 2254 tmp = operands[0]; 2255 else 2256 tmp = gen_reg_rtx (mode); 2257 2258 alpha_expand_unaligned_load (tmp, operands[1], 8, 0, 0); 2259 if (tmp != operands[0]) 2260 emit_move_insn (operands[0], tmp); 2261 } 2262 else if (MEM_P (operands[0])) 2263 { 2264 if (!reg_or_0_operand (operands[1], mode)) 2265 operands[1] = force_reg (mode, operands[1]); 2266 alpha_expand_unaligned_store (operands[0], operands[1], 8, 0); 2267 } 2268 else 2269 gcc_unreachable (); 2270 } 2271 2272 /* Generate an unsigned DImode to FP conversion. This is the same code 2273 optabs would emit if we didn't have TFmode patterns. 2274 2275 For SFmode, this is the only construction I've found that can pass 2276 gcc.c-torture/execute/ieee/rbug.c. No scenario that uses DFmode 2277 intermediates will work, because you'll get intermediate rounding 2278 that ruins the end result. Some of this could be fixed by turning 2279 on round-to-positive-infinity, but that requires diddling the fpsr, 2280 which kills performance. I tried turning this around and converting 2281 to a negative number, so that I could turn on /m, but either I did 2282 it wrong or there's something else cause I wound up with the exact 2283 same single-bit error. There is a branch-less form of this same code: 2284 2285 srl $16,1,$1 2286 and $16,1,$2 2287 cmplt $16,0,$3 2288 or $1,$2,$2 2289 cmovge $16,$16,$2 2290 itoft $3,$f10 2291 itoft $2,$f11 2292 cvtqs $f11,$f11 2293 adds $f11,$f11,$f0 2294 fcmoveq $f10,$f11,$f0 2295 2296 I'm not using it because it's the same number of instructions as 2297 this branch-full form, and it has more serialized long latency 2298 instructions on the critical path. 2299 2300 For DFmode, we can avoid rounding errors by breaking up the word 2301 into two pieces, converting them separately, and adding them back: 2302 2303 LC0: .long 0,0x5f800000 2304 2305 itoft $16,$f11 2306 lda $2,LC0 2307 cmplt $16,0,$1 2308 cpyse $f11,$f31,$f10 2309 cpyse $f31,$f11,$f11 2310 s4addq $1,$2,$1 2311 lds $f12,0($1) 2312 cvtqt $f10,$f10 2313 cvtqt $f11,$f11 2314 addt $f12,$f10,$f0 2315 addt $f0,$f11,$f0 2316 2317 This doesn't seem to be a clear-cut win over the optabs form. 2318 It probably all depends on the distribution of numbers being 2319 converted -- in the optabs form, all but high-bit-set has a 2320 much lower minimum execution time. */ 2321 2322 void 2323 alpha_emit_floatuns (rtx operands[2]) 2324 { 2325 rtx neglab, donelab, i0, i1, f0, in, out; 2326 enum machine_mode mode; 2327 2328 out = operands[0]; 2329 in = force_reg (DImode, operands[1]); 2330 mode = GET_MODE (out); 2331 neglab = gen_label_rtx (); 2332 donelab = gen_label_rtx (); 2333 i0 = gen_reg_rtx (DImode); 2334 i1 = gen_reg_rtx (DImode); 2335 f0 = gen_reg_rtx (mode); 2336 2337 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, DImode, 0, neglab); 2338 2339 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in))); 2340 emit_jump_insn (gen_jump (donelab)); 2341 emit_barrier (); 2342 2343 emit_label (neglab); 2344 2345 emit_insn (gen_lshrdi3 (i0, in, const1_rtx)); 2346 emit_insn (gen_anddi3 (i1, in, const1_rtx)); 2347 emit_insn (gen_iordi3 (i0, i0, i1)); 2348 emit_insn (gen_rtx_SET (VOIDmode, f0, gen_rtx_FLOAT (mode, i0))); 2349 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0))); 2350 2351 emit_label (donelab); 2352 } 2353 2354 /* Generate the comparison for a conditional branch. */ 2355 2356 void 2357 alpha_emit_conditional_branch (rtx operands[], enum machine_mode cmp_mode) 2358 { 2359 enum rtx_code cmp_code, branch_code; 2360 enum machine_mode branch_mode = VOIDmode; 2361 enum rtx_code code = GET_CODE (operands[0]); 2362 rtx op0 = operands[1], op1 = operands[2]; 2363 rtx tem; 2364 2365 if (cmp_mode == TFmode) 2366 { 2367 op0 = alpha_emit_xfloating_compare (&code, op0, op1); 2368 op1 = const0_rtx; 2369 cmp_mode = DImode; 2370 } 2371 2372 /* The general case: fold the comparison code to the types of compares 2373 that we have, choosing the branch as necessary. */ 2374 switch (code) 2375 { 2376 case EQ: case LE: case LT: case LEU: case LTU: 2377 case UNORDERED: 2378 /* We have these compares. */ 2379 cmp_code = code, branch_code = NE; 2380 break; 2381 2382 case NE: 2383 case ORDERED: 2384 /* These must be reversed. */ 2385 cmp_code = reverse_condition (code), branch_code = EQ; 2386 break; 2387 2388 case GE: case GT: case GEU: case GTU: 2389 /* For FP, we swap them, for INT, we reverse them. */ 2390 if (cmp_mode == DFmode) 2391 { 2392 cmp_code = swap_condition (code); 2393 branch_code = NE; 2394 tem = op0, op0 = op1, op1 = tem; 2395 } 2396 else 2397 { 2398 cmp_code = reverse_condition (code); 2399 branch_code = EQ; 2400 } 2401 break; 2402 2403 default: 2404 gcc_unreachable (); 2405 } 2406 2407 if (cmp_mode == DFmode) 2408 { 2409 if (flag_unsafe_math_optimizations && cmp_code != UNORDERED) 2410 { 2411 /* When we are not as concerned about non-finite values, and we 2412 are comparing against zero, we can branch directly. */ 2413 if (op1 == CONST0_RTX (DFmode)) 2414 cmp_code = UNKNOWN, branch_code = code; 2415 else if (op0 == CONST0_RTX (DFmode)) 2416 { 2417 /* Undo the swap we probably did just above. */ 2418 tem = op0, op0 = op1, op1 = tem; 2419 branch_code = swap_condition (cmp_code); 2420 cmp_code = UNKNOWN; 2421 } 2422 } 2423 else 2424 { 2425 /* ??? We mark the branch mode to be CCmode to prevent the 2426 compare and branch from being combined, since the compare 2427 insn follows IEEE rules that the branch does not. */ 2428 branch_mode = CCmode; 2429 } 2430 } 2431 else 2432 { 2433 /* The following optimizations are only for signed compares. */ 2434 if (code != LEU && code != LTU && code != GEU && code != GTU) 2435 { 2436 /* Whee. Compare and branch against 0 directly. */ 2437 if (op1 == const0_rtx) 2438 cmp_code = UNKNOWN, branch_code = code; 2439 2440 /* If the constants doesn't fit into an immediate, but can 2441 be generated by lda/ldah, we adjust the argument and 2442 compare against zero, so we can use beq/bne directly. */ 2443 /* ??? Don't do this when comparing against symbols, otherwise 2444 we'll reduce (&x == 0x1234) to (&x-0x1234 == 0), which will 2445 be declared false out of hand (at least for non-weak). */ 2446 else if (CONST_INT_P (op1) 2447 && (code == EQ || code == NE) 2448 && !(symbolic_operand (op0, VOIDmode) 2449 || (REG_P (op0) && REG_POINTER (op0)))) 2450 { 2451 rtx n_op1 = GEN_INT (-INTVAL (op1)); 2452 2453 if (! satisfies_constraint_I (op1) 2454 && (satisfies_constraint_K (n_op1) 2455 || satisfies_constraint_L (n_op1))) 2456 cmp_code = PLUS, branch_code = code, op1 = n_op1; 2457 } 2458 } 2459 2460 if (!reg_or_0_operand (op0, DImode)) 2461 op0 = force_reg (DImode, op0); 2462 if (cmp_code != PLUS && !reg_or_8bit_operand (op1, DImode)) 2463 op1 = force_reg (DImode, op1); 2464 } 2465 2466 /* Emit an initial compare instruction, if necessary. */ 2467 tem = op0; 2468 if (cmp_code != UNKNOWN) 2469 { 2470 tem = gen_reg_rtx (cmp_mode); 2471 emit_move_insn (tem, gen_rtx_fmt_ee (cmp_code, cmp_mode, op0, op1)); 2472 } 2473 2474 /* Emit the branch instruction. */ 2475 tem = gen_rtx_SET (VOIDmode, pc_rtx, 2476 gen_rtx_IF_THEN_ELSE (VOIDmode, 2477 gen_rtx_fmt_ee (branch_code, 2478 branch_mode, tem, 2479 CONST0_RTX (cmp_mode)), 2480 gen_rtx_LABEL_REF (VOIDmode, 2481 operands[3]), 2482 pc_rtx)); 2483 emit_jump_insn (tem); 2484 } 2485 2486 /* Certain simplifications can be done to make invalid setcc operations 2487 valid. Return the final comparison, or NULL if we can't work. */ 2488 2489 bool 2490 alpha_emit_setcc (rtx operands[], enum machine_mode cmp_mode) 2491 { 2492 enum rtx_code cmp_code; 2493 enum rtx_code code = GET_CODE (operands[1]); 2494 rtx op0 = operands[2], op1 = operands[3]; 2495 rtx tmp; 2496 2497 if (cmp_mode == TFmode) 2498 { 2499 op0 = alpha_emit_xfloating_compare (&code, op0, op1); 2500 op1 = const0_rtx; 2501 cmp_mode = DImode; 2502 } 2503 2504 if (cmp_mode == DFmode && !TARGET_FIX) 2505 return 0; 2506 2507 /* The general case: fold the comparison code to the types of compares 2508 that we have, choosing the branch as necessary. */ 2509 2510 cmp_code = UNKNOWN; 2511 switch (code) 2512 { 2513 case EQ: case LE: case LT: case LEU: case LTU: 2514 case UNORDERED: 2515 /* We have these compares. */ 2516 if (cmp_mode == DFmode) 2517 cmp_code = code, code = NE; 2518 break; 2519 2520 case NE: 2521 if (cmp_mode == DImode && op1 == const0_rtx) 2522 break; 2523 /* FALLTHRU */ 2524 2525 case ORDERED: 2526 cmp_code = reverse_condition (code); 2527 code = EQ; 2528 break; 2529 2530 case GE: case GT: case GEU: case GTU: 2531 /* These normally need swapping, but for integer zero we have 2532 special patterns that recognize swapped operands. */ 2533 if (cmp_mode == DImode && op1 == const0_rtx) 2534 break; 2535 code = swap_condition (code); 2536 if (cmp_mode == DFmode) 2537 cmp_code = code, code = NE; 2538 tmp = op0, op0 = op1, op1 = tmp; 2539 break; 2540 2541 default: 2542 gcc_unreachable (); 2543 } 2544 2545 if (cmp_mode == DImode) 2546 { 2547 if (!register_operand (op0, DImode)) 2548 op0 = force_reg (DImode, op0); 2549 if (!reg_or_8bit_operand (op1, DImode)) 2550 op1 = force_reg (DImode, op1); 2551 } 2552 2553 /* Emit an initial compare instruction, if necessary. */ 2554 if (cmp_code != UNKNOWN) 2555 { 2556 tmp = gen_reg_rtx (cmp_mode); 2557 emit_insn (gen_rtx_SET (VOIDmode, tmp, 2558 gen_rtx_fmt_ee (cmp_code, cmp_mode, op0, op1))); 2559 2560 op0 = cmp_mode != DImode ? gen_lowpart (DImode, tmp) : tmp; 2561 op1 = const0_rtx; 2562 } 2563 2564 /* Emit the setcc instruction. */ 2565 emit_insn (gen_rtx_SET (VOIDmode, operands[0], 2566 gen_rtx_fmt_ee (code, DImode, op0, op1))); 2567 return true; 2568 } 2569 2570 2571 /* Rewrite a comparison against zero CMP of the form 2572 (CODE (cc0) (const_int 0)) so it can be written validly in 2573 a conditional move (if_then_else CMP ...). 2574 If both of the operands that set cc0 are nonzero we must emit 2575 an insn to perform the compare (it can't be done within 2576 the conditional move). */ 2577 2578 rtx 2579 alpha_emit_conditional_move (rtx cmp, enum machine_mode mode) 2580 { 2581 enum rtx_code code = GET_CODE (cmp); 2582 enum rtx_code cmov_code = NE; 2583 rtx op0 = XEXP (cmp, 0); 2584 rtx op1 = XEXP (cmp, 1); 2585 enum machine_mode cmp_mode 2586 = (GET_MODE (op0) == VOIDmode ? DImode : GET_MODE (op0)); 2587 enum machine_mode cmov_mode = VOIDmode; 2588 int local_fast_math = flag_unsafe_math_optimizations; 2589 rtx tem; 2590 2591 if (cmp_mode == TFmode) 2592 { 2593 op0 = alpha_emit_xfloating_compare (&code, op0, op1); 2594 op1 = const0_rtx; 2595 cmp_mode = DImode; 2596 } 2597 2598 gcc_assert (cmp_mode == DFmode || cmp_mode == DImode); 2599 2600 if (FLOAT_MODE_P (cmp_mode) != FLOAT_MODE_P (mode)) 2601 { 2602 enum rtx_code cmp_code; 2603 2604 if (! TARGET_FIX) 2605 return 0; 2606 2607 /* If we have fp<->int register move instructions, do a cmov by 2608 performing the comparison in fp registers, and move the 2609 zero/nonzero value to integer registers, where we can then 2610 use a normal cmov, or vice-versa. */ 2611 2612 switch (code) 2613 { 2614 case EQ: case LE: case LT: case LEU: case LTU: 2615 case UNORDERED: 2616 /* We have these compares. */ 2617 cmp_code = code, code = NE; 2618 break; 2619 2620 case NE: 2621 case ORDERED: 2622 /* These must be reversed. */ 2623 cmp_code = reverse_condition (code), code = EQ; 2624 break; 2625 2626 case GE: case GT: case GEU: case GTU: 2627 /* These normally need swapping, but for integer zero we have 2628 special patterns that recognize swapped operands. */ 2629 if (cmp_mode == DImode && op1 == const0_rtx) 2630 cmp_code = code, code = NE; 2631 else 2632 { 2633 cmp_code = swap_condition (code); 2634 code = NE; 2635 tem = op0, op0 = op1, op1 = tem; 2636 } 2637 break; 2638 2639 default: 2640 gcc_unreachable (); 2641 } 2642 2643 if (cmp_mode == DImode) 2644 { 2645 if (!reg_or_0_operand (op0, DImode)) 2646 op0 = force_reg (DImode, op0); 2647 if (!reg_or_8bit_operand (op1, DImode)) 2648 op1 = force_reg (DImode, op1); 2649 } 2650 2651 tem = gen_reg_rtx (cmp_mode); 2652 emit_insn (gen_rtx_SET (VOIDmode, tem, 2653 gen_rtx_fmt_ee (cmp_code, cmp_mode, 2654 op0, op1))); 2655 2656 cmp_mode = cmp_mode == DImode ? DFmode : DImode; 2657 op0 = gen_lowpart (cmp_mode, tem); 2658 op1 = CONST0_RTX (cmp_mode); 2659 cmp = gen_rtx_fmt_ee (code, VOIDmode, op0, op1); 2660 local_fast_math = 1; 2661 } 2662 2663 if (cmp_mode == DImode) 2664 { 2665 if (!reg_or_0_operand (op0, DImode)) 2666 op0 = force_reg (DImode, op0); 2667 if (!reg_or_8bit_operand (op1, DImode)) 2668 op1 = force_reg (DImode, op1); 2669 } 2670 2671 /* We may be able to use a conditional move directly. 2672 This avoids emitting spurious compares. */ 2673 if (signed_comparison_operator (cmp, VOIDmode) 2674 && (cmp_mode == DImode || local_fast_math) 2675 && (op0 == CONST0_RTX (cmp_mode) || op1 == CONST0_RTX (cmp_mode))) 2676 return gen_rtx_fmt_ee (code, VOIDmode, op0, op1); 2677 2678 /* We can't put the comparison inside the conditional move; 2679 emit a compare instruction and put that inside the 2680 conditional move. Make sure we emit only comparisons we have; 2681 swap or reverse as necessary. */ 2682 2683 if (!can_create_pseudo_p ()) 2684 return NULL_RTX; 2685 2686 switch (code) 2687 { 2688 case EQ: case LE: case LT: case LEU: case LTU: 2689 case UNORDERED: 2690 /* We have these compares: */ 2691 break; 2692 2693 case NE: 2694 case ORDERED: 2695 /* These must be reversed. */ 2696 code = reverse_condition (code); 2697 cmov_code = EQ; 2698 break; 2699 2700 case GE: case GT: case GEU: case GTU: 2701 /* These normally need swapping, but for integer zero we have 2702 special patterns that recognize swapped operands. */ 2703 if (cmp_mode == DImode && op1 == const0_rtx) 2704 break; 2705 code = swap_condition (code); 2706 tem = op0, op0 = op1, op1 = tem; 2707 break; 2708 2709 default: 2710 gcc_unreachable (); 2711 } 2712 2713 if (cmp_mode == DImode) 2714 { 2715 if (!reg_or_0_operand (op0, DImode)) 2716 op0 = force_reg (DImode, op0); 2717 if (!reg_or_8bit_operand (op1, DImode)) 2718 op1 = force_reg (DImode, op1); 2719 } 2720 2721 /* ??? We mark the branch mode to be CCmode to prevent the compare 2722 and cmov from being combined, since the compare insn follows IEEE 2723 rules that the cmov does not. */ 2724 if (cmp_mode == DFmode && !local_fast_math) 2725 cmov_mode = CCmode; 2726 2727 tem = gen_reg_rtx (cmp_mode); 2728 emit_move_insn (tem, gen_rtx_fmt_ee (code, cmp_mode, op0, op1)); 2729 return gen_rtx_fmt_ee (cmov_code, cmov_mode, tem, CONST0_RTX (cmp_mode)); 2730 } 2731 2732 /* Simplify a conditional move of two constants into a setcc with 2733 arithmetic. This is done with a splitter since combine would 2734 just undo the work if done during code generation. It also catches 2735 cases we wouldn't have before cse. */ 2736 2737 int 2738 alpha_split_conditional_move (enum rtx_code code, rtx dest, rtx cond, 2739 rtx t_rtx, rtx f_rtx) 2740 { 2741 HOST_WIDE_INT t, f, diff; 2742 enum machine_mode mode; 2743 rtx target, subtarget, tmp; 2744 2745 mode = GET_MODE (dest); 2746 t = INTVAL (t_rtx); 2747 f = INTVAL (f_rtx); 2748 diff = t - f; 2749 2750 if (((code == NE || code == EQ) && diff < 0) 2751 || (code == GE || code == GT)) 2752 { 2753 code = reverse_condition (code); 2754 diff = t, t = f, f = diff; 2755 diff = t - f; 2756 } 2757 2758 subtarget = target = dest; 2759 if (mode != DImode) 2760 { 2761 target = gen_lowpart (DImode, dest); 2762 if (can_create_pseudo_p ()) 2763 subtarget = gen_reg_rtx (DImode); 2764 else 2765 subtarget = target; 2766 } 2767 /* Below, we must be careful to use copy_rtx on target and subtarget 2768 in intermediate insns, as they may be a subreg rtx, which may not 2769 be shared. */ 2770 2771 if (f == 0 && exact_log2 (diff) > 0 2772 /* On EV6, we've got enough shifters to make non-arithmetic shifts 2773 viable over a longer latency cmove. On EV5, the E0 slot is a 2774 scarce resource, and on EV4 shift has the same latency as a cmove. */ 2775 && (diff <= 8 || alpha_tune == PROCESSOR_EV6)) 2776 { 2777 tmp = gen_rtx_fmt_ee (code, DImode, cond, const0_rtx); 2778 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (subtarget), tmp)); 2779 2780 tmp = gen_rtx_ASHIFT (DImode, copy_rtx (subtarget), 2781 GEN_INT (exact_log2 (t))); 2782 emit_insn (gen_rtx_SET (VOIDmode, target, tmp)); 2783 } 2784 else if (f == 0 && t == -1) 2785 { 2786 tmp = gen_rtx_fmt_ee (code, DImode, cond, const0_rtx); 2787 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (subtarget), tmp)); 2788 2789 emit_insn (gen_negdi2 (target, copy_rtx (subtarget))); 2790 } 2791 else if (diff == 1 || diff == 4 || diff == 8) 2792 { 2793 rtx add_op; 2794 2795 tmp = gen_rtx_fmt_ee (code, DImode, cond, const0_rtx); 2796 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (subtarget), tmp)); 2797 2798 if (diff == 1) 2799 emit_insn (gen_adddi3 (target, copy_rtx (subtarget), GEN_INT (f))); 2800 else 2801 { 2802 add_op = GEN_INT (f); 2803 if (sext_add_operand (add_op, mode)) 2804 { 2805 tmp = gen_rtx_MULT (DImode, copy_rtx (subtarget), 2806 GEN_INT (diff)); 2807 tmp = gen_rtx_PLUS (DImode, tmp, add_op); 2808 emit_insn (gen_rtx_SET (VOIDmode, target, tmp)); 2809 } 2810 else 2811 return 0; 2812 } 2813 } 2814 else 2815 return 0; 2816 2817 return 1; 2818 } 2819 2820 /* Look up the function X_floating library function name for the 2821 given operation. */ 2822 2823 struct GTY(()) xfloating_op 2824 { 2825 const enum rtx_code code; 2826 const char *const GTY((skip)) osf_func; 2827 const char *const GTY((skip)) vms_func; 2828 rtx libcall; 2829 }; 2830 2831 static GTY(()) struct xfloating_op xfloating_ops[] = 2832 { 2833 { PLUS, "_OtsAddX", "OTS$ADD_X", 0 }, 2834 { MINUS, "_OtsSubX", "OTS$SUB_X", 0 }, 2835 { MULT, "_OtsMulX", "OTS$MUL_X", 0 }, 2836 { DIV, "_OtsDivX", "OTS$DIV_X", 0 }, 2837 { EQ, "_OtsEqlX", "OTS$EQL_X", 0 }, 2838 { NE, "_OtsNeqX", "OTS$NEQ_X", 0 }, 2839 { LT, "_OtsLssX", "OTS$LSS_X", 0 }, 2840 { LE, "_OtsLeqX", "OTS$LEQ_X", 0 }, 2841 { GT, "_OtsGtrX", "OTS$GTR_X", 0 }, 2842 { GE, "_OtsGeqX", "OTS$GEQ_X", 0 }, 2843 { FIX, "_OtsCvtXQ", "OTS$CVTXQ", 0 }, 2844 { FLOAT, "_OtsCvtQX", "OTS$CVTQX", 0 }, 2845 { UNSIGNED_FLOAT, "_OtsCvtQUX", "OTS$CVTQUX", 0 }, 2846 { FLOAT_EXTEND, "_OtsConvertFloatTX", "OTS$CVT_FLOAT_T_X", 0 }, 2847 { FLOAT_TRUNCATE, "_OtsConvertFloatXT", "OTS$CVT_FLOAT_X_T", 0 } 2848 }; 2849 2850 static GTY(()) struct xfloating_op vax_cvt_ops[] = 2851 { 2852 { FLOAT_EXTEND, "_OtsConvertFloatGX", "OTS$CVT_FLOAT_G_X", 0 }, 2853 { FLOAT_TRUNCATE, "_OtsConvertFloatXG", "OTS$CVT_FLOAT_X_G", 0 } 2854 }; 2855 2856 static rtx 2857 alpha_lookup_xfloating_lib_func (enum rtx_code code) 2858 { 2859 struct xfloating_op *ops = xfloating_ops; 2860 long n = ARRAY_SIZE (xfloating_ops); 2861 long i; 2862 2863 gcc_assert (TARGET_HAS_XFLOATING_LIBS); 2864 2865 /* How irritating. Nothing to key off for the main table. */ 2866 if (TARGET_FLOAT_VAX && (code == FLOAT_EXTEND || code == FLOAT_TRUNCATE)) 2867 { 2868 ops = vax_cvt_ops; 2869 n = ARRAY_SIZE (vax_cvt_ops); 2870 } 2871 2872 for (i = 0; i < n; ++i, ++ops) 2873 if (ops->code == code) 2874 { 2875 rtx func = ops->libcall; 2876 if (!func) 2877 { 2878 func = init_one_libfunc (TARGET_ABI_OPEN_VMS 2879 ? ops->vms_func : ops->osf_func); 2880 ops->libcall = func; 2881 } 2882 return func; 2883 } 2884 2885 gcc_unreachable (); 2886 } 2887 2888 /* Most X_floating operations take the rounding mode as an argument. 2889 Compute that here. */ 2890 2891 static int 2892 alpha_compute_xfloating_mode_arg (enum rtx_code code, 2893 enum alpha_fp_rounding_mode round) 2894 { 2895 int mode; 2896 2897 switch (round) 2898 { 2899 case ALPHA_FPRM_NORM: 2900 mode = 2; 2901 break; 2902 case ALPHA_FPRM_MINF: 2903 mode = 1; 2904 break; 2905 case ALPHA_FPRM_CHOP: 2906 mode = 0; 2907 break; 2908 case ALPHA_FPRM_DYN: 2909 mode = 4; 2910 break; 2911 default: 2912 gcc_unreachable (); 2913 2914 /* XXX For reference, round to +inf is mode = 3. */ 2915 } 2916 2917 if (code == FLOAT_TRUNCATE && alpha_fptm == ALPHA_FPTM_N) 2918 mode |= 0x10000; 2919 2920 return mode; 2921 } 2922 2923 /* Emit an X_floating library function call. 2924 2925 Note that these functions do not follow normal calling conventions: 2926 TFmode arguments are passed in two integer registers (as opposed to 2927 indirect); TFmode return values appear in R16+R17. 2928 2929 FUNC is the function to call. 2930 TARGET is where the output belongs. 2931 OPERANDS are the inputs. 2932 NOPERANDS is the count of inputs. 2933 EQUIV is the expression equivalent for the function. 2934 */ 2935 2936 static void 2937 alpha_emit_xfloating_libcall (rtx func, rtx target, rtx operands[], 2938 int noperands, rtx equiv) 2939 { 2940 rtx usage = NULL_RTX, tmp, reg; 2941 int regno = 16, i; 2942 2943 start_sequence (); 2944 2945 for (i = 0; i < noperands; ++i) 2946 { 2947 switch (GET_MODE (operands[i])) 2948 { 2949 case TFmode: 2950 reg = gen_rtx_REG (TFmode, regno); 2951 regno += 2; 2952 break; 2953 2954 case DFmode: 2955 reg = gen_rtx_REG (DFmode, regno + 32); 2956 regno += 1; 2957 break; 2958 2959 case VOIDmode: 2960 gcc_assert (CONST_INT_P (operands[i])); 2961 /* FALLTHRU */ 2962 case DImode: 2963 reg = gen_rtx_REG (DImode, regno); 2964 regno += 1; 2965 break; 2966 2967 default: 2968 gcc_unreachable (); 2969 } 2970 2971 emit_move_insn (reg, operands[i]); 2972 use_reg (&usage, reg); 2973 } 2974 2975 switch (GET_MODE (target)) 2976 { 2977 case TFmode: 2978 reg = gen_rtx_REG (TFmode, 16); 2979 break; 2980 case DFmode: 2981 reg = gen_rtx_REG (DFmode, 32); 2982 break; 2983 case DImode: 2984 reg = gen_rtx_REG (DImode, 0); 2985 break; 2986 default: 2987 gcc_unreachable (); 2988 } 2989 2990 tmp = gen_rtx_MEM (QImode, func); 2991 tmp = emit_call_insn (GEN_CALL_VALUE (reg, tmp, const0_rtx, 2992 const0_rtx, const0_rtx)); 2993 CALL_INSN_FUNCTION_USAGE (tmp) = usage; 2994 RTL_CONST_CALL_P (tmp) = 1; 2995 2996 tmp = get_insns (); 2997 end_sequence (); 2998 2999 emit_libcall_block (tmp, target, reg, equiv); 3000 } 3001 3002 /* Emit an X_floating library function call for arithmetic (+,-,*,/). */ 3003 3004 void 3005 alpha_emit_xfloating_arith (enum rtx_code code, rtx operands[]) 3006 { 3007 rtx func; 3008 int mode; 3009 rtx out_operands[3]; 3010 3011 func = alpha_lookup_xfloating_lib_func (code); 3012 mode = alpha_compute_xfloating_mode_arg (code, alpha_fprm); 3013 3014 out_operands[0] = operands[1]; 3015 out_operands[1] = operands[2]; 3016 out_operands[2] = GEN_INT (mode); 3017 alpha_emit_xfloating_libcall (func, operands[0], out_operands, 3, 3018 gen_rtx_fmt_ee (code, TFmode, operands[1], 3019 operands[2])); 3020 } 3021 3022 /* Emit an X_floating library function call for a comparison. */ 3023 3024 static rtx 3025 alpha_emit_xfloating_compare (enum rtx_code *pcode, rtx op0, rtx op1) 3026 { 3027 enum rtx_code cmp_code, res_code; 3028 rtx func, out, operands[2], note; 3029 3030 /* X_floating library comparison functions return 3031 -1 unordered 3032 0 false 3033 1 true 3034 Convert the compare against the raw return value. */ 3035 3036 cmp_code = *pcode; 3037 switch (cmp_code) 3038 { 3039 case UNORDERED: 3040 cmp_code = EQ; 3041 res_code = LT; 3042 break; 3043 case ORDERED: 3044 cmp_code = EQ; 3045 res_code = GE; 3046 break; 3047 case NE: 3048 res_code = NE; 3049 break; 3050 case EQ: 3051 case LT: 3052 case GT: 3053 case LE: 3054 case GE: 3055 res_code = GT; 3056 break; 3057 default: 3058 gcc_unreachable (); 3059 } 3060 *pcode = res_code; 3061 3062 func = alpha_lookup_xfloating_lib_func (cmp_code); 3063 3064 operands[0] = op0; 3065 operands[1] = op1; 3066 out = gen_reg_rtx (DImode); 3067 3068 /* What's actually returned is -1,0,1, not a proper boolean value. */ 3069 note = gen_rtx_fmt_ee (cmp_code, VOIDmode, op0, op1); 3070 note = gen_rtx_UNSPEC (DImode, gen_rtvec (1, note), UNSPEC_XFLT_COMPARE); 3071 alpha_emit_xfloating_libcall (func, out, operands, 2, note); 3072 3073 return out; 3074 } 3075 3076 /* Emit an X_floating library function call for a conversion. */ 3077 3078 void 3079 alpha_emit_xfloating_cvt (enum rtx_code orig_code, rtx operands[]) 3080 { 3081 int noperands = 1, mode; 3082 rtx out_operands[2]; 3083 rtx func; 3084 enum rtx_code code = orig_code; 3085 3086 if (code == UNSIGNED_FIX) 3087 code = FIX; 3088 3089 func = alpha_lookup_xfloating_lib_func (code); 3090 3091 out_operands[0] = operands[1]; 3092 3093 switch (code) 3094 { 3095 case FIX: 3096 mode = alpha_compute_xfloating_mode_arg (code, ALPHA_FPRM_CHOP); 3097 out_operands[1] = GEN_INT (mode); 3098 noperands = 2; 3099 break; 3100 case FLOAT_TRUNCATE: 3101 mode = alpha_compute_xfloating_mode_arg (code, alpha_fprm); 3102 out_operands[1] = GEN_INT (mode); 3103 noperands = 2; 3104 break; 3105 default: 3106 break; 3107 } 3108 3109 alpha_emit_xfloating_libcall (func, operands[0], out_operands, noperands, 3110 gen_rtx_fmt_e (orig_code, 3111 GET_MODE (operands[0]), 3112 operands[1])); 3113 } 3114 3115 /* Split a TImode or TFmode move from OP[1] to OP[0] into a pair of 3116 DImode moves from OP[2,3] to OP[0,1]. If FIXUP_OVERLAP is true, 3117 guarantee that the sequence 3118 set (OP[0] OP[2]) 3119 set (OP[1] OP[3]) 3120 is valid. Naturally, output operand ordering is little-endian. 3121 This is used by *movtf_internal and *movti_internal. */ 3122 3123 void 3124 alpha_split_tmode_pair (rtx operands[4], enum machine_mode mode, 3125 bool fixup_overlap) 3126 { 3127 switch (GET_CODE (operands[1])) 3128 { 3129 case REG: 3130 operands[3] = gen_rtx_REG (DImode, REGNO (operands[1]) + 1); 3131 operands[2] = gen_rtx_REG (DImode, REGNO (operands[1])); 3132 break; 3133 3134 case MEM: 3135 operands[3] = adjust_address (operands[1], DImode, 8); 3136 operands[2] = adjust_address (operands[1], DImode, 0); 3137 break; 3138 3139 case CONST_INT: 3140 case CONST_DOUBLE: 3141 gcc_assert (operands[1] == CONST0_RTX (mode)); 3142 operands[2] = operands[3] = const0_rtx; 3143 break; 3144 3145 default: 3146 gcc_unreachable (); 3147 } 3148 3149 switch (GET_CODE (operands[0])) 3150 { 3151 case REG: 3152 operands[1] = gen_rtx_REG (DImode, REGNO (operands[0]) + 1); 3153 operands[0] = gen_rtx_REG (DImode, REGNO (operands[0])); 3154 break; 3155 3156 case MEM: 3157 operands[1] = adjust_address (operands[0], DImode, 8); 3158 operands[0] = adjust_address (operands[0], DImode, 0); 3159 break; 3160 3161 default: 3162 gcc_unreachable (); 3163 } 3164 3165 if (fixup_overlap && reg_overlap_mentioned_p (operands[0], operands[3])) 3166 { 3167 rtx tmp; 3168 tmp = operands[0], operands[0] = operands[1], operands[1] = tmp; 3169 tmp = operands[2], operands[2] = operands[3], operands[3] = tmp; 3170 } 3171 } 3172 3173 /* Implement negtf2 or abstf2. Op0 is destination, op1 is source, 3174 op2 is a register containing the sign bit, operation is the 3175 logical operation to be performed. */ 3176 3177 void 3178 alpha_split_tfmode_frobsign (rtx operands[3], rtx (*operation) (rtx, rtx, rtx)) 3179 { 3180 rtx high_bit = operands[2]; 3181 rtx scratch; 3182 int move; 3183 3184 alpha_split_tmode_pair (operands, TFmode, false); 3185 3186 /* Detect three flavors of operand overlap. */ 3187 move = 1; 3188 if (rtx_equal_p (operands[0], operands[2])) 3189 move = 0; 3190 else if (rtx_equal_p (operands[1], operands[2])) 3191 { 3192 if (rtx_equal_p (operands[0], high_bit)) 3193 move = 2; 3194 else 3195 move = -1; 3196 } 3197 3198 if (move < 0) 3199 emit_move_insn (operands[0], operands[2]); 3200 3201 /* ??? If the destination overlaps both source tf and high_bit, then 3202 assume source tf is dead in its entirety and use the other half 3203 for a scratch register. Otherwise "scratch" is just the proper 3204 destination register. */ 3205 scratch = operands[move < 2 ? 1 : 3]; 3206 3207 emit_insn ((*operation) (scratch, high_bit, operands[3])); 3208 3209 if (move > 0) 3210 { 3211 emit_move_insn (operands[0], operands[2]); 3212 if (move > 1) 3213 emit_move_insn (operands[1], scratch); 3214 } 3215 } 3216 3217 /* Use ext[wlq][lh] as the Architecture Handbook describes for extracting 3218 unaligned data: 3219 3220 unsigned: signed: 3221 word: ldq_u r1,X(r11) ldq_u r1,X(r11) 3222 ldq_u r2,X+1(r11) ldq_u r2,X+1(r11) 3223 lda r3,X(r11) lda r3,X+2(r11) 3224 extwl r1,r3,r1 extql r1,r3,r1 3225 extwh r2,r3,r2 extqh r2,r3,r2 3226 or r1.r2.r1 or r1,r2,r1 3227 sra r1,48,r1 3228 3229 long: ldq_u r1,X(r11) ldq_u r1,X(r11) 3230 ldq_u r2,X+3(r11) ldq_u r2,X+3(r11) 3231 lda r3,X(r11) lda r3,X(r11) 3232 extll r1,r3,r1 extll r1,r3,r1 3233 extlh r2,r3,r2 extlh r2,r3,r2 3234 or r1.r2.r1 addl r1,r2,r1 3235 3236 quad: ldq_u r1,X(r11) 3237 ldq_u r2,X+7(r11) 3238 lda r3,X(r11) 3239 extql r1,r3,r1 3240 extqh r2,r3,r2 3241 or r1.r2.r1 3242 */ 3243 3244 void 3245 alpha_expand_unaligned_load (rtx tgt, rtx mem, HOST_WIDE_INT size, 3246 HOST_WIDE_INT ofs, int sign) 3247 { 3248 rtx meml, memh, addr, extl, exth, tmp, mema; 3249 enum machine_mode mode; 3250 3251 if (TARGET_BWX && size == 2) 3252 { 3253 meml = adjust_address (mem, QImode, ofs); 3254 memh = adjust_address (mem, QImode, ofs+1); 3255 extl = gen_reg_rtx (DImode); 3256 exth = gen_reg_rtx (DImode); 3257 emit_insn (gen_zero_extendqidi2 (extl, meml)); 3258 emit_insn (gen_zero_extendqidi2 (exth, memh)); 3259 exth = expand_simple_binop (DImode, ASHIFT, exth, GEN_INT (8), 3260 NULL, 1, OPTAB_LIB_WIDEN); 3261 addr = expand_simple_binop (DImode, IOR, extl, exth, 3262 NULL, 1, OPTAB_LIB_WIDEN); 3263 3264 if (sign && GET_MODE (tgt) != HImode) 3265 { 3266 addr = gen_lowpart (HImode, addr); 3267 emit_insn (gen_extend_insn (tgt, addr, GET_MODE (tgt), HImode, 0)); 3268 } 3269 else 3270 { 3271 if (GET_MODE (tgt) != DImode) 3272 addr = gen_lowpart (GET_MODE (tgt), addr); 3273 emit_move_insn (tgt, addr); 3274 } 3275 return; 3276 } 3277 3278 meml = gen_reg_rtx (DImode); 3279 memh = gen_reg_rtx (DImode); 3280 addr = gen_reg_rtx (DImode); 3281 extl = gen_reg_rtx (DImode); 3282 exth = gen_reg_rtx (DImode); 3283 3284 mema = XEXP (mem, 0); 3285 if (GET_CODE (mema) == LO_SUM) 3286 mema = force_reg (Pmode, mema); 3287 3288 /* AND addresses cannot be in any alias set, since they may implicitly 3289 alias surrounding code. Ideally we'd have some alias set that 3290 covered all types except those with alignment 8 or higher. */ 3291 3292 tmp = change_address (mem, DImode, 3293 gen_rtx_AND (DImode, 3294 plus_constant (DImode, mema, ofs), 3295 GEN_INT (-8))); 3296 set_mem_alias_set (tmp, 0); 3297 emit_move_insn (meml, tmp); 3298 3299 tmp = change_address (mem, DImode, 3300 gen_rtx_AND (DImode, 3301 plus_constant (DImode, mema, 3302 ofs + size - 1), 3303 GEN_INT (-8))); 3304 set_mem_alias_set (tmp, 0); 3305 emit_move_insn (memh, tmp); 3306 3307 if (sign && size == 2) 3308 { 3309 emit_move_insn (addr, plus_constant (Pmode, mema, ofs+2)); 3310 3311 emit_insn (gen_extql (extl, meml, addr)); 3312 emit_insn (gen_extqh (exth, memh, addr)); 3313 3314 /* We must use tgt here for the target. Alpha-vms port fails if we use 3315 addr for the target, because addr is marked as a pointer and combine 3316 knows that pointers are always sign-extended 32-bit values. */ 3317 addr = expand_binop (DImode, ior_optab, extl, exth, tgt, 1, OPTAB_WIDEN); 3318 addr = expand_binop (DImode, ashr_optab, addr, GEN_INT (48), 3319 addr, 1, OPTAB_WIDEN); 3320 } 3321 else 3322 { 3323 emit_move_insn (addr, plus_constant (Pmode, mema, ofs)); 3324 emit_insn (gen_extxl (extl, meml, GEN_INT (size*8), addr)); 3325 switch ((int) size) 3326 { 3327 case 2: 3328 emit_insn (gen_extwh (exth, memh, addr)); 3329 mode = HImode; 3330 break; 3331 case 4: 3332 emit_insn (gen_extlh (exth, memh, addr)); 3333 mode = SImode; 3334 break; 3335 case 8: 3336 emit_insn (gen_extqh (exth, memh, addr)); 3337 mode = DImode; 3338 break; 3339 default: 3340 gcc_unreachable (); 3341 } 3342 3343 addr = expand_binop (mode, ior_optab, gen_lowpart (mode, extl), 3344 gen_lowpart (mode, exth), gen_lowpart (mode, tgt), 3345 sign, OPTAB_WIDEN); 3346 } 3347 3348 if (addr != tgt) 3349 emit_move_insn (tgt, gen_lowpart (GET_MODE (tgt), addr)); 3350 } 3351 3352 /* Similarly, use ins and msk instructions to perform unaligned stores. */ 3353 3354 void 3355 alpha_expand_unaligned_store (rtx dst, rtx src, 3356 HOST_WIDE_INT size, HOST_WIDE_INT ofs) 3357 { 3358 rtx dstl, dsth, addr, insl, insh, meml, memh, dsta; 3359 3360 if (TARGET_BWX && size == 2) 3361 { 3362 if (src != const0_rtx) 3363 { 3364 dstl = gen_lowpart (QImode, src); 3365 dsth = expand_simple_binop (DImode, LSHIFTRT, src, GEN_INT (8), 3366 NULL, 1, OPTAB_LIB_WIDEN); 3367 dsth = gen_lowpart (QImode, dsth); 3368 } 3369 else 3370 dstl = dsth = const0_rtx; 3371 3372 meml = adjust_address (dst, QImode, ofs); 3373 memh = adjust_address (dst, QImode, ofs+1); 3374 3375 emit_move_insn (meml, dstl); 3376 emit_move_insn (memh, dsth); 3377 return; 3378 } 3379 3380 dstl = gen_reg_rtx (DImode); 3381 dsth = gen_reg_rtx (DImode); 3382 insl = gen_reg_rtx (DImode); 3383 insh = gen_reg_rtx (DImode); 3384 3385 dsta = XEXP (dst, 0); 3386 if (GET_CODE (dsta) == LO_SUM) 3387 dsta = force_reg (Pmode, dsta); 3388 3389 /* AND addresses cannot be in any alias set, since they may implicitly 3390 alias surrounding code. Ideally we'd have some alias set that 3391 covered all types except those with alignment 8 or higher. */ 3392 3393 meml = change_address (dst, DImode, 3394 gen_rtx_AND (DImode, 3395 plus_constant (DImode, dsta, ofs), 3396 GEN_INT (-8))); 3397 set_mem_alias_set (meml, 0); 3398 3399 memh = change_address (dst, DImode, 3400 gen_rtx_AND (DImode, 3401 plus_constant (DImode, dsta, 3402 ofs + size - 1), 3403 GEN_INT (-8))); 3404 set_mem_alias_set (memh, 0); 3405 3406 emit_move_insn (dsth, memh); 3407 emit_move_insn (dstl, meml); 3408 3409 addr = copy_addr_to_reg (plus_constant (Pmode, dsta, ofs)); 3410 3411 if (src != CONST0_RTX (GET_MODE (src))) 3412 { 3413 emit_insn (gen_insxh (insh, gen_lowpart (DImode, src), 3414 GEN_INT (size*8), addr)); 3415 3416 switch ((int) size) 3417 { 3418 case 2: 3419 emit_insn (gen_inswl (insl, gen_lowpart (HImode, src), addr)); 3420 break; 3421 case 4: 3422 emit_insn (gen_insll (insl, gen_lowpart (SImode, src), addr)); 3423 break; 3424 case 8: 3425 emit_insn (gen_insql (insl, gen_lowpart (DImode, src), addr)); 3426 break; 3427 default: 3428 gcc_unreachable (); 3429 } 3430 } 3431 3432 emit_insn (gen_mskxh (dsth, dsth, GEN_INT (size*8), addr)); 3433 3434 switch ((int) size) 3435 { 3436 case 2: 3437 emit_insn (gen_mskwl (dstl, dstl, addr)); 3438 break; 3439 case 4: 3440 emit_insn (gen_mskll (dstl, dstl, addr)); 3441 break; 3442 case 8: 3443 emit_insn (gen_mskql (dstl, dstl, addr)); 3444 break; 3445 default: 3446 gcc_unreachable (); 3447 } 3448 3449 if (src != CONST0_RTX (GET_MODE (src))) 3450 { 3451 dsth = expand_binop (DImode, ior_optab, insh, dsth, dsth, 0, OPTAB_WIDEN); 3452 dstl = expand_binop (DImode, ior_optab, insl, dstl, dstl, 0, OPTAB_WIDEN); 3453 } 3454 3455 /* Must store high before low for degenerate case of aligned. */ 3456 emit_move_insn (memh, dsth); 3457 emit_move_insn (meml, dstl); 3458 } 3459 3460 /* The block move code tries to maximize speed by separating loads and 3461 stores at the expense of register pressure: we load all of the data 3462 before we store it back out. There are two secondary effects worth 3463 mentioning, that this speeds copying to/from aligned and unaligned 3464 buffers, and that it makes the code significantly easier to write. */ 3465 3466 #define MAX_MOVE_WORDS 8 3467 3468 /* Load an integral number of consecutive unaligned quadwords. */ 3469 3470 static void 3471 alpha_expand_unaligned_load_words (rtx *out_regs, rtx smem, 3472 HOST_WIDE_INT words, HOST_WIDE_INT ofs) 3473 { 3474 rtx const im8 = GEN_INT (-8); 3475 rtx ext_tmps[MAX_MOVE_WORDS], data_regs[MAX_MOVE_WORDS+1]; 3476 rtx sreg, areg, tmp, smema; 3477 HOST_WIDE_INT i; 3478 3479 smema = XEXP (smem, 0); 3480 if (GET_CODE (smema) == LO_SUM) 3481 smema = force_reg (Pmode, smema); 3482 3483 /* Generate all the tmp registers we need. */ 3484 for (i = 0; i < words; ++i) 3485 { 3486 data_regs[i] = out_regs[i]; 3487 ext_tmps[i] = gen_reg_rtx (DImode); 3488 } 3489 data_regs[words] = gen_reg_rtx (DImode); 3490 3491 if (ofs != 0) 3492 smem = adjust_address (smem, GET_MODE (smem), ofs); 3493 3494 /* Load up all of the source data. */ 3495 for (i = 0; i < words; ++i) 3496 { 3497 tmp = change_address (smem, DImode, 3498 gen_rtx_AND (DImode, 3499 plus_constant (DImode, smema, 8*i), 3500 im8)); 3501 set_mem_alias_set (tmp, 0); 3502 emit_move_insn (data_regs[i], tmp); 3503 } 3504 3505 tmp = change_address (smem, DImode, 3506 gen_rtx_AND (DImode, 3507 plus_constant (DImode, smema, 3508 8*words - 1), 3509 im8)); 3510 set_mem_alias_set (tmp, 0); 3511 emit_move_insn (data_regs[words], tmp); 3512 3513 /* Extract the half-word fragments. Unfortunately DEC decided to make 3514 extxh with offset zero a noop instead of zeroing the register, so 3515 we must take care of that edge condition ourselves with cmov. */ 3516 3517 sreg = copy_addr_to_reg (smema); 3518 areg = expand_binop (DImode, and_optab, sreg, GEN_INT (7), NULL, 3519 1, OPTAB_WIDEN); 3520 for (i = 0; i < words; ++i) 3521 { 3522 emit_insn (gen_extql (data_regs[i], data_regs[i], sreg)); 3523 emit_insn (gen_extqh (ext_tmps[i], data_regs[i+1], sreg)); 3524 emit_insn (gen_rtx_SET (VOIDmode, ext_tmps[i], 3525 gen_rtx_IF_THEN_ELSE (DImode, 3526 gen_rtx_EQ (DImode, areg, 3527 const0_rtx), 3528 const0_rtx, ext_tmps[i]))); 3529 } 3530 3531 /* Merge the half-words into whole words. */ 3532 for (i = 0; i < words; ++i) 3533 { 3534 out_regs[i] = expand_binop (DImode, ior_optab, data_regs[i], 3535 ext_tmps[i], data_regs[i], 1, OPTAB_WIDEN); 3536 } 3537 } 3538 3539 /* Store an integral number of consecutive unaligned quadwords. DATA_REGS 3540 may be NULL to store zeros. */ 3541 3542 static void 3543 alpha_expand_unaligned_store_words (rtx *data_regs, rtx dmem, 3544 HOST_WIDE_INT words, HOST_WIDE_INT ofs) 3545 { 3546 rtx const im8 = GEN_INT (-8); 3547 rtx ins_tmps[MAX_MOVE_WORDS]; 3548 rtx st_tmp_1, st_tmp_2, dreg; 3549 rtx st_addr_1, st_addr_2, dmema; 3550 HOST_WIDE_INT i; 3551 3552 dmema = XEXP (dmem, 0); 3553 if (GET_CODE (dmema) == LO_SUM) 3554 dmema = force_reg (Pmode, dmema); 3555 3556 /* Generate all the tmp registers we need. */ 3557 if (data_regs != NULL) 3558 for (i = 0; i < words; ++i) 3559 ins_tmps[i] = gen_reg_rtx(DImode); 3560 st_tmp_1 = gen_reg_rtx(DImode); 3561 st_tmp_2 = gen_reg_rtx(DImode); 3562 3563 if (ofs != 0) 3564 dmem = adjust_address (dmem, GET_MODE (dmem), ofs); 3565 3566 st_addr_2 = change_address (dmem, DImode, 3567 gen_rtx_AND (DImode, 3568 plus_constant (DImode, dmema, 3569 words*8 - 1), 3570 im8)); 3571 set_mem_alias_set (st_addr_2, 0); 3572 3573 st_addr_1 = change_address (dmem, DImode, 3574 gen_rtx_AND (DImode, dmema, im8)); 3575 set_mem_alias_set (st_addr_1, 0); 3576 3577 /* Load up the destination end bits. */ 3578 emit_move_insn (st_tmp_2, st_addr_2); 3579 emit_move_insn (st_tmp_1, st_addr_1); 3580 3581 /* Shift the input data into place. */ 3582 dreg = copy_addr_to_reg (dmema); 3583 if (data_regs != NULL) 3584 { 3585 for (i = words-1; i >= 0; --i) 3586 { 3587 emit_insn (gen_insqh (ins_tmps[i], data_regs[i], dreg)); 3588 emit_insn (gen_insql (data_regs[i], data_regs[i], dreg)); 3589 } 3590 for (i = words-1; i > 0; --i) 3591 { 3592 ins_tmps[i-1] = expand_binop (DImode, ior_optab, data_regs[i], 3593 ins_tmps[i-1], ins_tmps[i-1], 1, 3594 OPTAB_WIDEN); 3595 } 3596 } 3597 3598 /* Split and merge the ends with the destination data. */ 3599 emit_insn (gen_mskqh (st_tmp_2, st_tmp_2, dreg)); 3600 emit_insn (gen_mskql (st_tmp_1, st_tmp_1, dreg)); 3601 3602 if (data_regs != NULL) 3603 { 3604 st_tmp_2 = expand_binop (DImode, ior_optab, st_tmp_2, ins_tmps[words-1], 3605 st_tmp_2, 1, OPTAB_WIDEN); 3606 st_tmp_1 = expand_binop (DImode, ior_optab, st_tmp_1, data_regs[0], 3607 st_tmp_1, 1, OPTAB_WIDEN); 3608 } 3609 3610 /* Store it all. */ 3611 emit_move_insn (st_addr_2, st_tmp_2); 3612 for (i = words-1; i > 0; --i) 3613 { 3614 rtx tmp = change_address (dmem, DImode, 3615 gen_rtx_AND (DImode, 3616 plus_constant (DImode, 3617 dmema, i*8), 3618 im8)); 3619 set_mem_alias_set (tmp, 0); 3620 emit_move_insn (tmp, data_regs ? ins_tmps[i-1] : const0_rtx); 3621 } 3622 emit_move_insn (st_addr_1, st_tmp_1); 3623 } 3624 3625 3626 /* Expand string/block move operations. 3627 3628 operands[0] is the pointer to the destination. 3629 operands[1] is the pointer to the source. 3630 operands[2] is the number of bytes to move. 3631 operands[3] is the alignment. */ 3632 3633 int 3634 alpha_expand_block_move (rtx operands[]) 3635 { 3636 rtx bytes_rtx = operands[2]; 3637 rtx align_rtx = operands[3]; 3638 HOST_WIDE_INT orig_bytes = INTVAL (bytes_rtx); 3639 HOST_WIDE_INT bytes = orig_bytes; 3640 HOST_WIDE_INT src_align = INTVAL (align_rtx) * BITS_PER_UNIT; 3641 HOST_WIDE_INT dst_align = src_align; 3642 rtx orig_src = operands[1]; 3643 rtx orig_dst = operands[0]; 3644 rtx data_regs[2 * MAX_MOVE_WORDS + 16]; 3645 rtx tmp; 3646 unsigned int i, words, ofs, nregs = 0; 3647 3648 if (orig_bytes <= 0) 3649 return 1; 3650 else if (orig_bytes > MAX_MOVE_WORDS * UNITS_PER_WORD) 3651 return 0; 3652 3653 /* Look for additional alignment information from recorded register info. */ 3654 3655 tmp = XEXP (orig_src, 0); 3656 if (REG_P (tmp)) 3657 src_align = MAX (src_align, REGNO_POINTER_ALIGN (REGNO (tmp))); 3658 else if (GET_CODE (tmp) == PLUS 3659 && REG_P (XEXP (tmp, 0)) 3660 && CONST_INT_P (XEXP (tmp, 1))) 3661 { 3662 unsigned HOST_WIDE_INT c = INTVAL (XEXP (tmp, 1)); 3663 unsigned int a = REGNO_POINTER_ALIGN (REGNO (XEXP (tmp, 0))); 3664 3665 if (a > src_align) 3666 { 3667 if (a >= 64 && c % 8 == 0) 3668 src_align = 64; 3669 else if (a >= 32 && c % 4 == 0) 3670 src_align = 32; 3671 else if (a >= 16 && c % 2 == 0) 3672 src_align = 16; 3673 } 3674 } 3675 3676 tmp = XEXP (orig_dst, 0); 3677 if (REG_P (tmp)) 3678 dst_align = MAX (dst_align, REGNO_POINTER_ALIGN (REGNO (tmp))); 3679 else if (GET_CODE (tmp) == PLUS 3680 && REG_P (XEXP (tmp, 0)) 3681 && CONST_INT_P (XEXP (tmp, 1))) 3682 { 3683 unsigned HOST_WIDE_INT c = INTVAL (XEXP (tmp, 1)); 3684 unsigned int a = REGNO_POINTER_ALIGN (REGNO (XEXP (tmp, 0))); 3685 3686 if (a > dst_align) 3687 { 3688 if (a >= 64 && c % 8 == 0) 3689 dst_align = 64; 3690 else if (a >= 32 && c % 4 == 0) 3691 dst_align = 32; 3692 else if (a >= 16 && c % 2 == 0) 3693 dst_align = 16; 3694 } 3695 } 3696 3697 ofs = 0; 3698 if (src_align >= 64 && bytes >= 8) 3699 { 3700 words = bytes / 8; 3701 3702 for (i = 0; i < words; ++i) 3703 data_regs[nregs + i] = gen_reg_rtx (DImode); 3704 3705 for (i = 0; i < words; ++i) 3706 emit_move_insn (data_regs[nregs + i], 3707 adjust_address (orig_src, DImode, ofs + i * 8)); 3708 3709 nregs += words; 3710 bytes -= words * 8; 3711 ofs += words * 8; 3712 } 3713 3714 if (src_align >= 32 && bytes >= 4) 3715 { 3716 words = bytes / 4; 3717 3718 for (i = 0; i < words; ++i) 3719 data_regs[nregs + i] = gen_reg_rtx (SImode); 3720 3721 for (i = 0; i < words; ++i) 3722 emit_move_insn (data_regs[nregs + i], 3723 adjust_address (orig_src, SImode, ofs + i * 4)); 3724 3725 nregs += words; 3726 bytes -= words * 4; 3727 ofs += words * 4; 3728 } 3729 3730 if (bytes >= 8) 3731 { 3732 words = bytes / 8; 3733 3734 for (i = 0; i < words+1; ++i) 3735 data_regs[nregs + i] = gen_reg_rtx (DImode); 3736 3737 alpha_expand_unaligned_load_words (data_regs + nregs, orig_src, 3738 words, ofs); 3739 3740 nregs += words; 3741 bytes -= words * 8; 3742 ofs += words * 8; 3743 } 3744 3745 if (! TARGET_BWX && bytes >= 4) 3746 { 3747 data_regs[nregs++] = tmp = gen_reg_rtx (SImode); 3748 alpha_expand_unaligned_load (tmp, orig_src, 4, ofs, 0); 3749 bytes -= 4; 3750 ofs += 4; 3751 } 3752 3753 if (bytes >= 2) 3754 { 3755 if (src_align >= 16) 3756 { 3757 do { 3758 data_regs[nregs++] = tmp = gen_reg_rtx (HImode); 3759 emit_move_insn (tmp, adjust_address (orig_src, HImode, ofs)); 3760 bytes -= 2; 3761 ofs += 2; 3762 } while (bytes >= 2); 3763 } 3764 else if (! TARGET_BWX) 3765 { 3766 data_regs[nregs++] = tmp = gen_reg_rtx (HImode); 3767 alpha_expand_unaligned_load (tmp, orig_src, 2, ofs, 0); 3768 bytes -= 2; 3769 ofs += 2; 3770 } 3771 } 3772 3773 while (bytes > 0) 3774 { 3775 data_regs[nregs++] = tmp = gen_reg_rtx (QImode); 3776 emit_move_insn (tmp, adjust_address (orig_src, QImode, ofs)); 3777 bytes -= 1; 3778 ofs += 1; 3779 } 3780 3781 gcc_assert (nregs <= ARRAY_SIZE (data_regs)); 3782 3783 /* Now save it back out again. */ 3784 3785 i = 0, ofs = 0; 3786 3787 /* Write out the data in whatever chunks reading the source allowed. */ 3788 if (dst_align >= 64) 3789 { 3790 while (i < nregs && GET_MODE (data_regs[i]) == DImode) 3791 { 3792 emit_move_insn (adjust_address (orig_dst, DImode, ofs), 3793 data_regs[i]); 3794 ofs += 8; 3795 i++; 3796 } 3797 } 3798 3799 if (dst_align >= 32) 3800 { 3801 /* If the source has remaining DImode regs, write them out in 3802 two pieces. */ 3803 while (i < nregs && GET_MODE (data_regs[i]) == DImode) 3804 { 3805 tmp = expand_binop (DImode, lshr_optab, data_regs[i], GEN_INT (32), 3806 NULL_RTX, 1, OPTAB_WIDEN); 3807 3808 emit_move_insn (adjust_address (orig_dst, SImode, ofs), 3809 gen_lowpart (SImode, data_regs[i])); 3810 emit_move_insn (adjust_address (orig_dst, SImode, ofs + 4), 3811 gen_lowpart (SImode, tmp)); 3812 ofs += 8; 3813 i++; 3814 } 3815 3816 while (i < nregs && GET_MODE (data_regs[i]) == SImode) 3817 { 3818 emit_move_insn (adjust_address (orig_dst, SImode, ofs), 3819 data_regs[i]); 3820 ofs += 4; 3821 i++; 3822 } 3823 } 3824 3825 if (i < nregs && GET_MODE (data_regs[i]) == DImode) 3826 { 3827 /* Write out a remaining block of words using unaligned methods. */ 3828 3829 for (words = 1; i + words < nregs; words++) 3830 if (GET_MODE (data_regs[i + words]) != DImode) 3831 break; 3832 3833 if (words == 1) 3834 alpha_expand_unaligned_store (orig_dst, data_regs[i], 8, ofs); 3835 else 3836 alpha_expand_unaligned_store_words (data_regs + i, orig_dst, 3837 words, ofs); 3838 3839 i += words; 3840 ofs += words * 8; 3841 } 3842 3843 /* Due to the above, this won't be aligned. */ 3844 /* ??? If we have more than one of these, consider constructing full 3845 words in registers and using alpha_expand_unaligned_store_words. */ 3846 while (i < nregs && GET_MODE (data_regs[i]) == SImode) 3847 { 3848 alpha_expand_unaligned_store (orig_dst, data_regs[i], 4, ofs); 3849 ofs += 4; 3850 i++; 3851 } 3852 3853 if (dst_align >= 16) 3854 while (i < nregs && GET_MODE (data_regs[i]) == HImode) 3855 { 3856 emit_move_insn (adjust_address (orig_dst, HImode, ofs), data_regs[i]); 3857 i++; 3858 ofs += 2; 3859 } 3860 else 3861 while (i < nregs && GET_MODE (data_regs[i]) == HImode) 3862 { 3863 alpha_expand_unaligned_store (orig_dst, data_regs[i], 2, ofs); 3864 i++; 3865 ofs += 2; 3866 } 3867 3868 /* The remainder must be byte copies. */ 3869 while (i < nregs) 3870 { 3871 gcc_assert (GET_MODE (data_regs[i]) == QImode); 3872 emit_move_insn (adjust_address (orig_dst, QImode, ofs), data_regs[i]); 3873 i++; 3874 ofs += 1; 3875 } 3876 3877 return 1; 3878 } 3879 3880 int 3881 alpha_expand_block_clear (rtx operands[]) 3882 { 3883 rtx bytes_rtx = operands[1]; 3884 rtx align_rtx = operands[3]; 3885 HOST_WIDE_INT orig_bytes = INTVAL (bytes_rtx); 3886 HOST_WIDE_INT bytes = orig_bytes; 3887 HOST_WIDE_INT align = INTVAL (align_rtx) * BITS_PER_UNIT; 3888 HOST_WIDE_INT alignofs = 0; 3889 rtx orig_dst = operands[0]; 3890 rtx tmp; 3891 int i, words, ofs = 0; 3892 3893 if (orig_bytes <= 0) 3894 return 1; 3895 if (orig_bytes > MAX_MOVE_WORDS * UNITS_PER_WORD) 3896 return 0; 3897 3898 /* Look for stricter alignment. */ 3899 tmp = XEXP (orig_dst, 0); 3900 if (REG_P (tmp)) 3901 align = MAX (align, REGNO_POINTER_ALIGN (REGNO (tmp))); 3902 else if (GET_CODE (tmp) == PLUS 3903 && REG_P (XEXP (tmp, 0)) 3904 && CONST_INT_P (XEXP (tmp, 1))) 3905 { 3906 HOST_WIDE_INT c = INTVAL (XEXP (tmp, 1)); 3907 int a = REGNO_POINTER_ALIGN (REGNO (XEXP (tmp, 0))); 3908 3909 if (a > align) 3910 { 3911 if (a >= 64) 3912 align = a, alignofs = 8 - c % 8; 3913 else if (a >= 32) 3914 align = a, alignofs = 4 - c % 4; 3915 else if (a >= 16) 3916 align = a, alignofs = 2 - c % 2; 3917 } 3918 } 3919 3920 /* Handle an unaligned prefix first. */ 3921 3922 if (alignofs > 0) 3923 { 3924 #if HOST_BITS_PER_WIDE_INT >= 64 3925 /* Given that alignofs is bounded by align, the only time BWX could 3926 generate three stores is for a 7 byte fill. Prefer two individual 3927 stores over a load/mask/store sequence. */ 3928 if ((!TARGET_BWX || alignofs == 7) 3929 && align >= 32 3930 && !(alignofs == 4 && bytes >= 4)) 3931 { 3932 enum machine_mode mode = (align >= 64 ? DImode : SImode); 3933 int inv_alignofs = (align >= 64 ? 8 : 4) - alignofs; 3934 rtx mem, tmp; 3935 HOST_WIDE_INT mask; 3936 3937 mem = adjust_address (orig_dst, mode, ofs - inv_alignofs); 3938 set_mem_alias_set (mem, 0); 3939 3940 mask = ~(~(HOST_WIDE_INT)0 << (inv_alignofs * 8)); 3941 if (bytes < alignofs) 3942 { 3943 mask |= ~(HOST_WIDE_INT)0 << ((inv_alignofs + bytes) * 8); 3944 ofs += bytes; 3945 bytes = 0; 3946 } 3947 else 3948 { 3949 bytes -= alignofs; 3950 ofs += alignofs; 3951 } 3952 alignofs = 0; 3953 3954 tmp = expand_binop (mode, and_optab, mem, GEN_INT (mask), 3955 NULL_RTX, 1, OPTAB_WIDEN); 3956 3957 emit_move_insn (mem, tmp); 3958 } 3959 #endif 3960 3961 if (TARGET_BWX && (alignofs & 1) && bytes >= 1) 3962 { 3963 emit_move_insn (adjust_address (orig_dst, QImode, ofs), const0_rtx); 3964 bytes -= 1; 3965 ofs += 1; 3966 alignofs -= 1; 3967 } 3968 if (TARGET_BWX && align >= 16 && (alignofs & 3) == 2 && bytes >= 2) 3969 { 3970 emit_move_insn (adjust_address (orig_dst, HImode, ofs), const0_rtx); 3971 bytes -= 2; 3972 ofs += 2; 3973 alignofs -= 2; 3974 } 3975 if (alignofs == 4 && bytes >= 4) 3976 { 3977 emit_move_insn (adjust_address (orig_dst, SImode, ofs), const0_rtx); 3978 bytes -= 4; 3979 ofs += 4; 3980 alignofs = 0; 3981 } 3982 3983 /* If we've not used the extra lead alignment information by now, 3984 we won't be able to. Downgrade align to match what's left over. */ 3985 if (alignofs > 0) 3986 { 3987 alignofs = alignofs & -alignofs; 3988 align = MIN (align, alignofs * BITS_PER_UNIT); 3989 } 3990 } 3991 3992 /* Handle a block of contiguous long-words. */ 3993 3994 if (align >= 64 && bytes >= 8) 3995 { 3996 words = bytes / 8; 3997 3998 for (i = 0; i < words; ++i) 3999 emit_move_insn (adjust_address (orig_dst, DImode, ofs + i * 8), 4000 const0_rtx); 4001 4002 bytes -= words * 8; 4003 ofs += words * 8; 4004 } 4005 4006 /* If the block is large and appropriately aligned, emit a single 4007 store followed by a sequence of stq_u insns. */ 4008 4009 if (align >= 32 && bytes > 16) 4010 { 4011 rtx orig_dsta; 4012 4013 emit_move_insn (adjust_address (orig_dst, SImode, ofs), const0_rtx); 4014 bytes -= 4; 4015 ofs += 4; 4016 4017 orig_dsta = XEXP (orig_dst, 0); 4018 if (GET_CODE (orig_dsta) == LO_SUM) 4019 orig_dsta = force_reg (Pmode, orig_dsta); 4020 4021 words = bytes / 8; 4022 for (i = 0; i < words; ++i) 4023 { 4024 rtx mem 4025 = change_address (orig_dst, DImode, 4026 gen_rtx_AND (DImode, 4027 plus_constant (DImode, orig_dsta, 4028 ofs + i*8), 4029 GEN_INT (-8))); 4030 set_mem_alias_set (mem, 0); 4031 emit_move_insn (mem, const0_rtx); 4032 } 4033 4034 /* Depending on the alignment, the first stq_u may have overlapped 4035 with the initial stl, which means that the last stq_u didn't 4036 write as much as it would appear. Leave those questionable bytes 4037 unaccounted for. */ 4038 bytes -= words * 8 - 4; 4039 ofs += words * 8 - 4; 4040 } 4041 4042 /* Handle a smaller block of aligned words. */ 4043 4044 if ((align >= 64 && bytes == 4) 4045 || (align == 32 && bytes >= 4)) 4046 { 4047 words = bytes / 4; 4048 4049 for (i = 0; i < words; ++i) 4050 emit_move_insn (adjust_address (orig_dst, SImode, ofs + i * 4), 4051 const0_rtx); 4052 4053 bytes -= words * 4; 4054 ofs += words * 4; 4055 } 4056 4057 /* An unaligned block uses stq_u stores for as many as possible. */ 4058 4059 if (bytes >= 8) 4060 { 4061 words = bytes / 8; 4062 4063 alpha_expand_unaligned_store_words (NULL, orig_dst, words, ofs); 4064 4065 bytes -= words * 8; 4066 ofs += words * 8; 4067 } 4068 4069 /* Next clean up any trailing pieces. */ 4070 4071 #if HOST_BITS_PER_WIDE_INT >= 64 4072 /* Count the number of bits in BYTES for which aligned stores could 4073 be emitted. */ 4074 words = 0; 4075 for (i = (TARGET_BWX ? 1 : 4); i * BITS_PER_UNIT <= align ; i <<= 1) 4076 if (bytes & i) 4077 words += 1; 4078 4079 /* If we have appropriate alignment (and it wouldn't take too many 4080 instructions otherwise), mask out the bytes we need. */ 4081 if (TARGET_BWX ? words > 2 : bytes > 0) 4082 { 4083 if (align >= 64) 4084 { 4085 rtx mem, tmp; 4086 HOST_WIDE_INT mask; 4087 4088 mem = adjust_address (orig_dst, DImode, ofs); 4089 set_mem_alias_set (mem, 0); 4090 4091 mask = ~(HOST_WIDE_INT)0 << (bytes * 8); 4092 4093 tmp = expand_binop (DImode, and_optab, mem, GEN_INT (mask), 4094 NULL_RTX, 1, OPTAB_WIDEN); 4095 4096 emit_move_insn (mem, tmp); 4097 return 1; 4098 } 4099 else if (align >= 32 && bytes < 4) 4100 { 4101 rtx mem, tmp; 4102 HOST_WIDE_INT mask; 4103 4104 mem = adjust_address (orig_dst, SImode, ofs); 4105 set_mem_alias_set (mem, 0); 4106 4107 mask = ~(HOST_WIDE_INT)0 << (bytes * 8); 4108 4109 tmp = expand_binop (SImode, and_optab, mem, GEN_INT (mask), 4110 NULL_RTX, 1, OPTAB_WIDEN); 4111 4112 emit_move_insn (mem, tmp); 4113 return 1; 4114 } 4115 } 4116 #endif 4117 4118 if (!TARGET_BWX && bytes >= 4) 4119 { 4120 alpha_expand_unaligned_store (orig_dst, const0_rtx, 4, ofs); 4121 bytes -= 4; 4122 ofs += 4; 4123 } 4124 4125 if (bytes >= 2) 4126 { 4127 if (align >= 16) 4128 { 4129 do { 4130 emit_move_insn (adjust_address (orig_dst, HImode, ofs), 4131 const0_rtx); 4132 bytes -= 2; 4133 ofs += 2; 4134 } while (bytes >= 2); 4135 } 4136 else if (! TARGET_BWX) 4137 { 4138 alpha_expand_unaligned_store (orig_dst, const0_rtx, 2, ofs); 4139 bytes -= 2; 4140 ofs += 2; 4141 } 4142 } 4143 4144 while (bytes > 0) 4145 { 4146 emit_move_insn (adjust_address (orig_dst, QImode, ofs), const0_rtx); 4147 bytes -= 1; 4148 ofs += 1; 4149 } 4150 4151 return 1; 4152 } 4153 4154 /* Returns a mask so that zap(x, value) == x & mask. */ 4155 4156 rtx 4157 alpha_expand_zap_mask (HOST_WIDE_INT value) 4158 { 4159 rtx result; 4160 int i; 4161 4162 if (HOST_BITS_PER_WIDE_INT >= 64) 4163 { 4164 HOST_WIDE_INT mask = 0; 4165 4166 for (i = 7; i >= 0; --i) 4167 { 4168 mask <<= 8; 4169 if (!((value >> i) & 1)) 4170 mask |= 0xff; 4171 } 4172 4173 result = gen_int_mode (mask, DImode); 4174 } 4175 else 4176 { 4177 HOST_WIDE_INT mask_lo = 0, mask_hi = 0; 4178 4179 gcc_assert (HOST_BITS_PER_WIDE_INT == 32); 4180 4181 for (i = 7; i >= 4; --i) 4182 { 4183 mask_hi <<= 8; 4184 if (!((value >> i) & 1)) 4185 mask_hi |= 0xff; 4186 } 4187 4188 for (i = 3; i >= 0; --i) 4189 { 4190 mask_lo <<= 8; 4191 if (!((value >> i) & 1)) 4192 mask_lo |= 0xff; 4193 } 4194 4195 result = immed_double_const (mask_lo, mask_hi, DImode); 4196 } 4197 4198 return result; 4199 } 4200 4201 void 4202 alpha_expand_builtin_vector_binop (rtx (*gen) (rtx, rtx, rtx), 4203 enum machine_mode mode, 4204 rtx op0, rtx op1, rtx op2) 4205 { 4206 op0 = gen_lowpart (mode, op0); 4207 4208 if (op1 == const0_rtx) 4209 op1 = CONST0_RTX (mode); 4210 else 4211 op1 = gen_lowpart (mode, op1); 4212 4213 if (op2 == const0_rtx) 4214 op2 = CONST0_RTX (mode); 4215 else 4216 op2 = gen_lowpart (mode, op2); 4217 4218 emit_insn ((*gen) (op0, op1, op2)); 4219 } 4220 4221 /* A subroutine of the atomic operation splitters. Jump to LABEL if 4222 COND is true. Mark the jump as unlikely to be taken. */ 4223 4224 static void 4225 emit_unlikely_jump (rtx cond, rtx label) 4226 { 4227 rtx very_unlikely = GEN_INT (REG_BR_PROB_BASE / 100 - 1); 4228 rtx x; 4229 4230 x = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, label, pc_rtx); 4231 x = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, x)); 4232 add_reg_note (x, REG_BR_PROB, very_unlikely); 4233 } 4234 4235 /* A subroutine of the atomic operation splitters. Emit a load-locked 4236 instruction in MODE. */ 4237 4238 static void 4239 emit_load_locked (enum machine_mode mode, rtx reg, rtx mem) 4240 { 4241 rtx (*fn) (rtx, rtx) = NULL; 4242 if (mode == SImode) 4243 fn = gen_load_locked_si; 4244 else if (mode == DImode) 4245 fn = gen_load_locked_di; 4246 emit_insn (fn (reg, mem)); 4247 } 4248 4249 /* A subroutine of the atomic operation splitters. Emit a store-conditional 4250 instruction in MODE. */ 4251 4252 static void 4253 emit_store_conditional (enum machine_mode mode, rtx res, rtx mem, rtx val) 4254 { 4255 rtx (*fn) (rtx, rtx, rtx) = NULL; 4256 if (mode == SImode) 4257 fn = gen_store_conditional_si; 4258 else if (mode == DImode) 4259 fn = gen_store_conditional_di; 4260 emit_insn (fn (res, mem, val)); 4261 } 4262 4263 /* Subroutines of the atomic operation splitters. Emit barriers 4264 as needed for the memory MODEL. */ 4265 4266 static void 4267 alpha_pre_atomic_barrier (enum memmodel model) 4268 { 4269 if (need_atomic_barrier_p (model, true)) 4270 emit_insn (gen_memory_barrier ()); 4271 } 4272 4273 static void 4274 alpha_post_atomic_barrier (enum memmodel model) 4275 { 4276 if (need_atomic_barrier_p (model, false)) 4277 emit_insn (gen_memory_barrier ()); 4278 } 4279 4280 /* A subroutine of the atomic operation splitters. Emit an insxl 4281 instruction in MODE. */ 4282 4283 static rtx 4284 emit_insxl (enum machine_mode mode, rtx op1, rtx op2) 4285 { 4286 rtx ret = gen_reg_rtx (DImode); 4287 rtx (*fn) (rtx, rtx, rtx); 4288 4289 switch (mode) 4290 { 4291 case QImode: 4292 fn = gen_insbl; 4293 break; 4294 case HImode: 4295 fn = gen_inswl; 4296 break; 4297 case SImode: 4298 fn = gen_insll; 4299 break; 4300 case DImode: 4301 fn = gen_insql; 4302 break; 4303 default: 4304 gcc_unreachable (); 4305 } 4306 4307 op1 = force_reg (mode, op1); 4308 emit_insn (fn (ret, op1, op2)); 4309 4310 return ret; 4311 } 4312 4313 /* Expand an atomic fetch-and-operate pattern. CODE is the binary operation 4314 to perform. MEM is the memory on which to operate. VAL is the second 4315 operand of the binary operator. BEFORE and AFTER are optional locations to 4316 return the value of MEM either before of after the operation. SCRATCH is 4317 a scratch register. */ 4318 4319 void 4320 alpha_split_atomic_op (enum rtx_code code, rtx mem, rtx val, rtx before, 4321 rtx after, rtx scratch, enum memmodel model) 4322 { 4323 enum machine_mode mode = GET_MODE (mem); 4324 rtx label, x, cond = gen_rtx_REG (DImode, REGNO (scratch)); 4325 4326 alpha_pre_atomic_barrier (model); 4327 4328 label = gen_label_rtx (); 4329 emit_label (label); 4330 label = gen_rtx_LABEL_REF (DImode, label); 4331 4332 if (before == NULL) 4333 before = scratch; 4334 emit_load_locked (mode, before, mem); 4335 4336 if (code == NOT) 4337 { 4338 x = gen_rtx_AND (mode, before, val); 4339 emit_insn (gen_rtx_SET (VOIDmode, val, x)); 4340 4341 x = gen_rtx_NOT (mode, val); 4342 } 4343 else 4344 x = gen_rtx_fmt_ee (code, mode, before, val); 4345 if (after) 4346 emit_insn (gen_rtx_SET (VOIDmode, after, copy_rtx (x))); 4347 emit_insn (gen_rtx_SET (VOIDmode, scratch, x)); 4348 4349 emit_store_conditional (mode, cond, mem, scratch); 4350 4351 x = gen_rtx_EQ (DImode, cond, const0_rtx); 4352 emit_unlikely_jump (x, label); 4353 4354 alpha_post_atomic_barrier (model); 4355 } 4356 4357 /* Expand a compare and swap operation. */ 4358 4359 void 4360 alpha_split_compare_and_swap (rtx operands[]) 4361 { 4362 rtx cond, retval, mem, oldval, newval; 4363 bool is_weak; 4364 enum memmodel mod_s, mod_f; 4365 enum machine_mode mode; 4366 rtx label1, label2, x; 4367 4368 cond = operands[0]; 4369 retval = operands[1]; 4370 mem = operands[2]; 4371 oldval = operands[3]; 4372 newval = operands[4]; 4373 is_weak = (operands[5] != const0_rtx); 4374 mod_s = (enum memmodel) INTVAL (operands[6]); 4375 mod_f = (enum memmodel) INTVAL (operands[7]); 4376 mode = GET_MODE (mem); 4377 4378 alpha_pre_atomic_barrier (mod_s); 4379 4380 label1 = NULL_RTX; 4381 if (!is_weak) 4382 { 4383 label1 = gen_rtx_LABEL_REF (DImode, gen_label_rtx ()); 4384 emit_label (XEXP (label1, 0)); 4385 } 4386 label2 = gen_rtx_LABEL_REF (DImode, gen_label_rtx ()); 4387 4388 emit_load_locked (mode, retval, mem); 4389 4390 x = gen_lowpart (DImode, retval); 4391 if (oldval == const0_rtx) 4392 { 4393 emit_move_insn (cond, const0_rtx); 4394 x = gen_rtx_NE (DImode, x, const0_rtx); 4395 } 4396 else 4397 { 4398 x = gen_rtx_EQ (DImode, x, oldval); 4399 emit_insn (gen_rtx_SET (VOIDmode, cond, x)); 4400 x = gen_rtx_EQ (DImode, cond, const0_rtx); 4401 } 4402 emit_unlikely_jump (x, label2); 4403 4404 emit_move_insn (cond, newval); 4405 emit_store_conditional (mode, cond, mem, gen_lowpart (mode, cond)); 4406 4407 if (!is_weak) 4408 { 4409 x = gen_rtx_EQ (DImode, cond, const0_rtx); 4410 emit_unlikely_jump (x, label1); 4411 } 4412 4413 if (mod_f != MEMMODEL_RELAXED) 4414 emit_label (XEXP (label2, 0)); 4415 4416 alpha_post_atomic_barrier (mod_s); 4417 4418 if (mod_f == MEMMODEL_RELAXED) 4419 emit_label (XEXP (label2, 0)); 4420 } 4421 4422 void 4423 alpha_expand_compare_and_swap_12 (rtx operands[]) 4424 { 4425 rtx cond, dst, mem, oldval, newval, is_weak, mod_s, mod_f; 4426 enum machine_mode mode; 4427 rtx addr, align, wdst; 4428 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx); 4429 4430 cond = operands[0]; 4431 dst = operands[1]; 4432 mem = operands[2]; 4433 oldval = operands[3]; 4434 newval = operands[4]; 4435 is_weak = operands[5]; 4436 mod_s = operands[6]; 4437 mod_f = operands[7]; 4438 mode = GET_MODE (mem); 4439 4440 /* We forced the address into a register via mem_noofs_operand. */ 4441 addr = XEXP (mem, 0); 4442 gcc_assert (register_operand (addr, DImode)); 4443 4444 align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-8), 4445 NULL_RTX, 1, OPTAB_DIRECT); 4446 4447 oldval = convert_modes (DImode, mode, oldval, 1); 4448 4449 if (newval != const0_rtx) 4450 newval = emit_insxl (mode, newval, addr); 4451 4452 wdst = gen_reg_rtx (DImode); 4453 if (mode == QImode) 4454 gen = gen_atomic_compare_and_swapqi_1; 4455 else 4456 gen = gen_atomic_compare_and_swaphi_1; 4457 emit_insn (gen (cond, wdst, mem, oldval, newval, align, 4458 is_weak, mod_s, mod_f)); 4459 4460 emit_move_insn (dst, gen_lowpart (mode, wdst)); 4461 } 4462 4463 void 4464 alpha_split_compare_and_swap_12 (rtx operands[]) 4465 { 4466 rtx cond, dest, orig_mem, oldval, newval, align, scratch; 4467 enum machine_mode mode; 4468 bool is_weak; 4469 enum memmodel mod_s, mod_f; 4470 rtx label1, label2, mem, addr, width, mask, x; 4471 4472 cond = operands[0]; 4473 dest = operands[1]; 4474 orig_mem = operands[2]; 4475 oldval = operands[3]; 4476 newval = operands[4]; 4477 align = operands[5]; 4478 is_weak = (operands[6] != const0_rtx); 4479 mod_s = (enum memmodel) INTVAL (operands[7]); 4480 mod_f = (enum memmodel) INTVAL (operands[8]); 4481 scratch = operands[9]; 4482 mode = GET_MODE (orig_mem); 4483 addr = XEXP (orig_mem, 0); 4484 4485 mem = gen_rtx_MEM (DImode, align); 4486 MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_mem); 4487 if (MEM_ALIAS_SET (orig_mem) == ALIAS_SET_MEMORY_BARRIER) 4488 set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER); 4489 4490 alpha_pre_atomic_barrier (mod_s); 4491 4492 label1 = NULL_RTX; 4493 if (!is_weak) 4494 { 4495 label1 = gen_rtx_LABEL_REF (DImode, gen_label_rtx ()); 4496 emit_label (XEXP (label1, 0)); 4497 } 4498 label2 = gen_rtx_LABEL_REF (DImode, gen_label_rtx ()); 4499 4500 emit_load_locked (DImode, scratch, mem); 4501 4502 width = GEN_INT (GET_MODE_BITSIZE (mode)); 4503 mask = GEN_INT (mode == QImode ? 0xff : 0xffff); 4504 emit_insn (gen_extxl (dest, scratch, width, addr)); 4505 4506 if (oldval == const0_rtx) 4507 { 4508 emit_move_insn (cond, const0_rtx); 4509 x = gen_rtx_NE (DImode, dest, const0_rtx); 4510 } 4511 else 4512 { 4513 x = gen_rtx_EQ (DImode, dest, oldval); 4514 emit_insn (gen_rtx_SET (VOIDmode, cond, x)); 4515 x = gen_rtx_EQ (DImode, cond, const0_rtx); 4516 } 4517 emit_unlikely_jump (x, label2); 4518 4519 emit_insn (gen_mskxl (cond, scratch, mask, addr)); 4520 4521 if (newval != const0_rtx) 4522 emit_insn (gen_iordi3 (cond, cond, newval)); 4523 4524 emit_store_conditional (DImode, cond, mem, cond); 4525 4526 if (!is_weak) 4527 { 4528 x = gen_rtx_EQ (DImode, cond, const0_rtx); 4529 emit_unlikely_jump (x, label1); 4530 } 4531 4532 if (mod_f != MEMMODEL_RELAXED) 4533 emit_label (XEXP (label2, 0)); 4534 4535 alpha_post_atomic_barrier (mod_s); 4536 4537 if (mod_f == MEMMODEL_RELAXED) 4538 emit_label (XEXP (label2, 0)); 4539 } 4540 4541 /* Expand an atomic exchange operation. */ 4542 4543 void 4544 alpha_split_atomic_exchange (rtx operands[]) 4545 { 4546 rtx retval, mem, val, scratch; 4547 enum memmodel model; 4548 enum machine_mode mode; 4549 rtx label, x, cond; 4550 4551 retval = operands[0]; 4552 mem = operands[1]; 4553 val = operands[2]; 4554 model = (enum memmodel) INTVAL (operands[3]); 4555 scratch = operands[4]; 4556 mode = GET_MODE (mem); 4557 cond = gen_lowpart (DImode, scratch); 4558 4559 alpha_pre_atomic_barrier (model); 4560 4561 label = gen_rtx_LABEL_REF (DImode, gen_label_rtx ()); 4562 emit_label (XEXP (label, 0)); 4563 4564 emit_load_locked (mode, retval, mem); 4565 emit_move_insn (scratch, val); 4566 emit_store_conditional (mode, cond, mem, scratch); 4567 4568 x = gen_rtx_EQ (DImode, cond, const0_rtx); 4569 emit_unlikely_jump (x, label); 4570 4571 alpha_post_atomic_barrier (model); 4572 } 4573 4574 void 4575 alpha_expand_atomic_exchange_12 (rtx operands[]) 4576 { 4577 rtx dst, mem, val, model; 4578 enum machine_mode mode; 4579 rtx addr, align, wdst; 4580 rtx (*gen) (rtx, rtx, rtx, rtx, rtx); 4581 4582 dst = operands[0]; 4583 mem = operands[1]; 4584 val = operands[2]; 4585 model = operands[3]; 4586 mode = GET_MODE (mem); 4587 4588 /* We forced the address into a register via mem_noofs_operand. */ 4589 addr = XEXP (mem, 0); 4590 gcc_assert (register_operand (addr, DImode)); 4591 4592 align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-8), 4593 NULL_RTX, 1, OPTAB_DIRECT); 4594 4595 /* Insert val into the correct byte location within the word. */ 4596 if (val != const0_rtx) 4597 val = emit_insxl (mode, val, addr); 4598 4599 wdst = gen_reg_rtx (DImode); 4600 if (mode == QImode) 4601 gen = gen_atomic_exchangeqi_1; 4602 else 4603 gen = gen_atomic_exchangehi_1; 4604 emit_insn (gen (wdst, mem, val, align, model)); 4605 4606 emit_move_insn (dst, gen_lowpart (mode, wdst)); 4607 } 4608 4609 void 4610 alpha_split_atomic_exchange_12 (rtx operands[]) 4611 { 4612 rtx dest, orig_mem, addr, val, align, scratch; 4613 rtx label, mem, width, mask, x; 4614 enum machine_mode mode; 4615 enum memmodel model; 4616 4617 dest = operands[0]; 4618 orig_mem = operands[1]; 4619 val = operands[2]; 4620 align = operands[3]; 4621 model = (enum memmodel) INTVAL (operands[4]); 4622 scratch = operands[5]; 4623 mode = GET_MODE (orig_mem); 4624 addr = XEXP (orig_mem, 0); 4625 4626 mem = gen_rtx_MEM (DImode, align); 4627 MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_mem); 4628 if (MEM_ALIAS_SET (orig_mem) == ALIAS_SET_MEMORY_BARRIER) 4629 set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER); 4630 4631 alpha_pre_atomic_barrier (model); 4632 4633 label = gen_rtx_LABEL_REF (DImode, gen_label_rtx ()); 4634 emit_label (XEXP (label, 0)); 4635 4636 emit_load_locked (DImode, scratch, mem); 4637 4638 width = GEN_INT (GET_MODE_BITSIZE (mode)); 4639 mask = GEN_INT (mode == QImode ? 0xff : 0xffff); 4640 emit_insn (gen_extxl (dest, scratch, width, addr)); 4641 emit_insn (gen_mskxl (scratch, scratch, mask, addr)); 4642 if (val != const0_rtx) 4643 emit_insn (gen_iordi3 (scratch, scratch, val)); 4644 4645 emit_store_conditional (DImode, scratch, mem, scratch); 4646 4647 x = gen_rtx_EQ (DImode, scratch, const0_rtx); 4648 emit_unlikely_jump (x, label); 4649 4650 alpha_post_atomic_barrier (model); 4651 } 4652 4653 /* Adjust the cost of a scheduling dependency. Return the new cost of 4654 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */ 4655 4656 static int 4657 alpha_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost) 4658 { 4659 enum attr_type dep_insn_type; 4660 4661 /* If the dependence is an anti-dependence, there is no cost. For an 4662 output dependence, there is sometimes a cost, but it doesn't seem 4663 worth handling those few cases. */ 4664 if (REG_NOTE_KIND (link) != 0) 4665 return cost; 4666 4667 /* If we can't recognize the insns, we can't really do anything. */ 4668 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0) 4669 return cost; 4670 4671 dep_insn_type = get_attr_type (dep_insn); 4672 4673 /* Bring in the user-defined memory latency. */ 4674 if (dep_insn_type == TYPE_ILD 4675 || dep_insn_type == TYPE_FLD 4676 || dep_insn_type == TYPE_LDSYM) 4677 cost += alpha_memory_latency-1; 4678 4679 /* Everything else handled in DFA bypasses now. */ 4680 4681 return cost; 4682 } 4683 4684 /* The number of instructions that can be issued per cycle. */ 4685 4686 static int 4687 alpha_issue_rate (void) 4688 { 4689 return (alpha_tune == PROCESSOR_EV4 ? 2 : 4); 4690 } 4691 4692 /* How many alternative schedules to try. This should be as wide as the 4693 scheduling freedom in the DFA, but no wider. Making this value too 4694 large results extra work for the scheduler. 4695 4696 For EV4, loads can be issued to either IB0 or IB1, thus we have 2 4697 alternative schedules. For EV5, we can choose between E0/E1 and 4698 FA/FM. For EV6, an arithmetic insn can be issued to U0/U1/L0/L1. */ 4699 4700 static int 4701 alpha_multipass_dfa_lookahead (void) 4702 { 4703 return (alpha_tune == PROCESSOR_EV6 ? 4 : 2); 4704 } 4705 4706 /* Machine-specific function data. */ 4707 4708 struct GTY(()) alpha_links; 4709 4710 struct GTY(()) machine_function 4711 { 4712 /* For OSF. */ 4713 const char *some_ld_name; 4714 4715 /* For flag_reorder_blocks_and_partition. */ 4716 rtx gp_save_rtx; 4717 4718 /* For VMS condition handlers. */ 4719 bool uses_condition_handler; 4720 4721 /* Linkage entries. */ 4722 splay_tree GTY ((param1_is (char *), param2_is (struct alpha_links *))) 4723 links; 4724 }; 4725 4726 /* How to allocate a 'struct machine_function'. */ 4727 4728 static struct machine_function * 4729 alpha_init_machine_status (void) 4730 { 4731 return ggc_alloc_cleared_machine_function (); 4732 } 4733 4734 /* Support for frame based VMS condition handlers. */ 4735 4736 /* A VMS condition handler may be established for a function with a call to 4737 __builtin_establish_vms_condition_handler, and cancelled with a call to 4738 __builtin_revert_vms_condition_handler. 4739 4740 The VMS Condition Handling Facility knows about the existence of a handler 4741 from the procedure descriptor .handler field. As the VMS native compilers, 4742 we store the user specified handler's address at a fixed location in the 4743 stack frame and point the procedure descriptor at a common wrapper which 4744 fetches the real handler's address and issues an indirect call. 4745 4746 The indirection wrapper is "__gcc_shell_handler", provided by libgcc. 4747 4748 We force the procedure kind to PT_STACK, and the fixed frame location is 4749 fp+8, just before the register save area. We use the handler_data field in 4750 the procedure descriptor to state the fp offset at which the installed 4751 handler address can be found. */ 4752 4753 #define VMS_COND_HANDLER_FP_OFFSET 8 4754 4755 /* Expand code to store the currently installed user VMS condition handler 4756 into TARGET and install HANDLER as the new condition handler. */ 4757 4758 void 4759 alpha_expand_builtin_establish_vms_condition_handler (rtx target, rtx handler) 4760 { 4761 rtx handler_slot_address = plus_constant (Pmode, hard_frame_pointer_rtx, 4762 VMS_COND_HANDLER_FP_OFFSET); 4763 4764 rtx handler_slot 4765 = gen_rtx_MEM (DImode, handler_slot_address); 4766 4767 emit_move_insn (target, handler_slot); 4768 emit_move_insn (handler_slot, handler); 4769 4770 /* Notify the start/prologue/epilogue emitters that the condition handler 4771 slot is needed. In addition to reserving the slot space, this will force 4772 the procedure kind to PT_STACK so ensure that the hard_frame_pointer_rtx 4773 use above is correct. */ 4774 cfun->machine->uses_condition_handler = true; 4775 } 4776 4777 /* Expand code to store the current VMS condition handler into TARGET and 4778 nullify it. */ 4779 4780 void 4781 alpha_expand_builtin_revert_vms_condition_handler (rtx target) 4782 { 4783 /* We implement this by establishing a null condition handler, with the tiny 4784 side effect of setting uses_condition_handler. This is a little bit 4785 pessimistic if no actual builtin_establish call is ever issued, which is 4786 not a real problem and expected never to happen anyway. */ 4787 4788 alpha_expand_builtin_establish_vms_condition_handler (target, const0_rtx); 4789 } 4790 4791 /* Functions to save and restore alpha_return_addr_rtx. */ 4792 4793 /* Start the ball rolling with RETURN_ADDR_RTX. */ 4794 4795 rtx 4796 alpha_return_addr (int count, rtx frame ATTRIBUTE_UNUSED) 4797 { 4798 if (count != 0) 4799 return const0_rtx; 4800 4801 return get_hard_reg_initial_val (Pmode, REG_RA); 4802 } 4803 4804 /* Return or create a memory slot containing the gp value for the current 4805 function. Needed only if TARGET_LD_BUGGY_LDGP. */ 4806 4807 rtx 4808 alpha_gp_save_rtx (void) 4809 { 4810 rtx seq, m = cfun->machine->gp_save_rtx; 4811 4812 if (m == NULL) 4813 { 4814 start_sequence (); 4815 4816 m = assign_stack_local (DImode, UNITS_PER_WORD, BITS_PER_WORD); 4817 m = validize_mem (m); 4818 emit_move_insn (m, pic_offset_table_rtx); 4819 4820 seq = get_insns (); 4821 end_sequence (); 4822 4823 /* We used to simply emit the sequence after entry_of_function. 4824 However this breaks the CFG if the first instruction in the 4825 first block is not the NOTE_INSN_BASIC_BLOCK, for example a 4826 label. Emit the sequence properly on the edge. We are only 4827 invoked from dw2_build_landing_pads and finish_eh_generation 4828 will call commit_edge_insertions thanks to a kludge. */ 4829 insert_insn_on_edge (seq, single_succ_edge (ENTRY_BLOCK_PTR)); 4830 4831 cfun->machine->gp_save_rtx = m; 4832 } 4833 4834 return m; 4835 } 4836 4837 static void 4838 alpha_instantiate_decls (void) 4839 { 4840 if (cfun->machine->gp_save_rtx != NULL_RTX) 4841 instantiate_decl_rtl (cfun->machine->gp_save_rtx); 4842 } 4843 4844 static int 4845 alpha_ra_ever_killed (void) 4846 { 4847 rtx top; 4848 4849 if (!has_hard_reg_initial_val (Pmode, REG_RA)) 4850 return (int)df_regs_ever_live_p (REG_RA); 4851 4852 push_topmost_sequence (); 4853 top = get_insns (); 4854 pop_topmost_sequence (); 4855 4856 return reg_set_between_p (gen_rtx_REG (Pmode, REG_RA), top, NULL_RTX); 4857 } 4858 4859 4860 /* Return the trap mode suffix applicable to the current 4861 instruction, or NULL. */ 4862 4863 static const char * 4864 get_trap_mode_suffix (void) 4865 { 4866 enum attr_trap_suffix s = get_attr_trap_suffix (current_output_insn); 4867 4868 switch (s) 4869 { 4870 case TRAP_SUFFIX_NONE: 4871 return NULL; 4872 4873 case TRAP_SUFFIX_SU: 4874 if (alpha_fptm >= ALPHA_FPTM_SU) 4875 return "su"; 4876 return NULL; 4877 4878 case TRAP_SUFFIX_SUI: 4879 if (alpha_fptm >= ALPHA_FPTM_SUI) 4880 return "sui"; 4881 return NULL; 4882 4883 case TRAP_SUFFIX_V_SV: 4884 switch (alpha_fptm) 4885 { 4886 case ALPHA_FPTM_N: 4887 return NULL; 4888 case ALPHA_FPTM_U: 4889 return "v"; 4890 case ALPHA_FPTM_SU: 4891 case ALPHA_FPTM_SUI: 4892 return "sv"; 4893 default: 4894 gcc_unreachable (); 4895 } 4896 4897 case TRAP_SUFFIX_V_SV_SVI: 4898 switch (alpha_fptm) 4899 { 4900 case ALPHA_FPTM_N: 4901 return NULL; 4902 case ALPHA_FPTM_U: 4903 return "v"; 4904 case ALPHA_FPTM_SU: 4905 return "sv"; 4906 case ALPHA_FPTM_SUI: 4907 return "svi"; 4908 default: 4909 gcc_unreachable (); 4910 } 4911 break; 4912 4913 case TRAP_SUFFIX_U_SU_SUI: 4914 switch (alpha_fptm) 4915 { 4916 case ALPHA_FPTM_N: 4917 return NULL; 4918 case ALPHA_FPTM_U: 4919 return "u"; 4920 case ALPHA_FPTM_SU: 4921 return "su"; 4922 case ALPHA_FPTM_SUI: 4923 return "sui"; 4924 default: 4925 gcc_unreachable (); 4926 } 4927 break; 4928 4929 default: 4930 gcc_unreachable (); 4931 } 4932 gcc_unreachable (); 4933 } 4934 4935 /* Return the rounding mode suffix applicable to the current 4936 instruction, or NULL. */ 4937 4938 static const char * 4939 get_round_mode_suffix (void) 4940 { 4941 enum attr_round_suffix s = get_attr_round_suffix (current_output_insn); 4942 4943 switch (s) 4944 { 4945 case ROUND_SUFFIX_NONE: 4946 return NULL; 4947 case ROUND_SUFFIX_NORMAL: 4948 switch (alpha_fprm) 4949 { 4950 case ALPHA_FPRM_NORM: 4951 return NULL; 4952 case ALPHA_FPRM_MINF: 4953 return "m"; 4954 case ALPHA_FPRM_CHOP: 4955 return "c"; 4956 case ALPHA_FPRM_DYN: 4957 return "d"; 4958 default: 4959 gcc_unreachable (); 4960 } 4961 break; 4962 4963 case ROUND_SUFFIX_C: 4964 return "c"; 4965 4966 default: 4967 gcc_unreachable (); 4968 } 4969 gcc_unreachable (); 4970 } 4971 4972 /* Locate some local-dynamic symbol still in use by this function 4973 so that we can print its name in some movdi_er_tlsldm pattern. */ 4974 4975 static int 4976 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED) 4977 { 4978 rtx x = *px; 4979 4980 if (GET_CODE (x) == SYMBOL_REF 4981 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC) 4982 { 4983 cfun->machine->some_ld_name = XSTR (x, 0); 4984 return 1; 4985 } 4986 4987 return 0; 4988 } 4989 4990 static const char * 4991 get_some_local_dynamic_name (void) 4992 { 4993 rtx insn; 4994 4995 if (cfun->machine->some_ld_name) 4996 return cfun->machine->some_ld_name; 4997 4998 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn)) 4999 if (INSN_P (insn) 5000 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0)) 5001 return cfun->machine->some_ld_name; 5002 5003 gcc_unreachable (); 5004 } 5005 5006 /* Print an operand. Recognize special options, documented below. */ 5007 5008 void 5009 print_operand (FILE *file, rtx x, int code) 5010 { 5011 int i; 5012 5013 switch (code) 5014 { 5015 case '~': 5016 /* Print the assembler name of the current function. */ 5017 assemble_name (file, alpha_fnname); 5018 break; 5019 5020 case '&': 5021 assemble_name (file, get_some_local_dynamic_name ()); 5022 break; 5023 5024 case '/': 5025 { 5026 const char *trap = get_trap_mode_suffix (); 5027 const char *round = get_round_mode_suffix (); 5028 5029 if (trap || round) 5030 fprintf (file, "/%s%s", (trap ? trap : ""), (round ? round : "")); 5031 break; 5032 } 5033 5034 case ',': 5035 /* Generates single precision instruction suffix. */ 5036 fputc ((TARGET_FLOAT_VAX ? 'f' : 's'), file); 5037 break; 5038 5039 case '-': 5040 /* Generates double precision instruction suffix. */ 5041 fputc ((TARGET_FLOAT_VAX ? 'g' : 't'), file); 5042 break; 5043 5044 case '#': 5045 if (alpha_this_literal_sequence_number == 0) 5046 alpha_this_literal_sequence_number = alpha_next_sequence_number++; 5047 fprintf (file, "%d", alpha_this_literal_sequence_number); 5048 break; 5049 5050 case '*': 5051 if (alpha_this_gpdisp_sequence_number == 0) 5052 alpha_this_gpdisp_sequence_number = alpha_next_sequence_number++; 5053 fprintf (file, "%d", alpha_this_gpdisp_sequence_number); 5054 break; 5055 5056 case 'H': 5057 if (GET_CODE (x) == HIGH) 5058 output_addr_const (file, XEXP (x, 0)); 5059 else 5060 output_operand_lossage ("invalid %%H value"); 5061 break; 5062 5063 case 'J': 5064 { 5065 const char *lituse; 5066 5067 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSGD_CALL) 5068 { 5069 x = XVECEXP (x, 0, 0); 5070 lituse = "lituse_tlsgd"; 5071 } 5072 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSLDM_CALL) 5073 { 5074 x = XVECEXP (x, 0, 0); 5075 lituse = "lituse_tlsldm"; 5076 } 5077 else if (CONST_INT_P (x)) 5078 lituse = "lituse_jsr"; 5079 else 5080 { 5081 output_operand_lossage ("invalid %%J value"); 5082 break; 5083 } 5084 5085 if (x != const0_rtx) 5086 fprintf (file, "\t\t!%s!%d", lituse, (int) INTVAL (x)); 5087 } 5088 break; 5089 5090 case 'j': 5091 { 5092 const char *lituse; 5093 5094 #ifdef HAVE_AS_JSRDIRECT_RELOCS 5095 lituse = "lituse_jsrdirect"; 5096 #else 5097 lituse = "lituse_jsr"; 5098 #endif 5099 5100 gcc_assert (INTVAL (x) != 0); 5101 fprintf (file, "\t\t!%s!%d", lituse, (int) INTVAL (x)); 5102 } 5103 break; 5104 case 'r': 5105 /* If this operand is the constant zero, write it as "$31". */ 5106 if (REG_P (x)) 5107 fprintf (file, "%s", reg_names[REGNO (x)]); 5108 else if (x == CONST0_RTX (GET_MODE (x))) 5109 fprintf (file, "$31"); 5110 else 5111 output_operand_lossage ("invalid %%r value"); 5112 break; 5113 5114 case 'R': 5115 /* Similar, but for floating-point. */ 5116 if (REG_P (x)) 5117 fprintf (file, "%s", reg_names[REGNO (x)]); 5118 else if (x == CONST0_RTX (GET_MODE (x))) 5119 fprintf (file, "$f31"); 5120 else 5121 output_operand_lossage ("invalid %%R value"); 5122 break; 5123 5124 case 'N': 5125 /* Write the 1's complement of a constant. */ 5126 if (!CONST_INT_P (x)) 5127 output_operand_lossage ("invalid %%N value"); 5128 5129 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~ INTVAL (x)); 5130 break; 5131 5132 case 'P': 5133 /* Write 1 << C, for a constant C. */ 5134 if (!CONST_INT_P (x)) 5135 output_operand_lossage ("invalid %%P value"); 5136 5137 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (HOST_WIDE_INT) 1 << INTVAL (x)); 5138 break; 5139 5140 case 'h': 5141 /* Write the high-order 16 bits of a constant, sign-extended. */ 5142 if (!CONST_INT_P (x)) 5143 output_operand_lossage ("invalid %%h value"); 5144 5145 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) >> 16); 5146 break; 5147 5148 case 'L': 5149 /* Write the low-order 16 bits of a constant, sign-extended. */ 5150 if (!CONST_INT_P (x)) 5151 output_operand_lossage ("invalid %%L value"); 5152 5153 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 5154 (INTVAL (x) & 0xffff) - 2 * (INTVAL (x) & 0x8000)); 5155 break; 5156 5157 case 'm': 5158 /* Write mask for ZAP insn. */ 5159 if (GET_CODE (x) == CONST_DOUBLE) 5160 { 5161 HOST_WIDE_INT mask = 0; 5162 HOST_WIDE_INT value; 5163 5164 value = CONST_DOUBLE_LOW (x); 5165 for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR; 5166 i++, value >>= 8) 5167 if (value & 0xff) 5168 mask |= (1 << i); 5169 5170 value = CONST_DOUBLE_HIGH (x); 5171 for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR; 5172 i++, value >>= 8) 5173 if (value & 0xff) 5174 mask |= (1 << (i + sizeof (int))); 5175 5176 fprintf (file, HOST_WIDE_INT_PRINT_DEC, mask & 0xff); 5177 } 5178 5179 else if (CONST_INT_P (x)) 5180 { 5181 HOST_WIDE_INT mask = 0, value = INTVAL (x); 5182 5183 for (i = 0; i < 8; i++, value >>= 8) 5184 if (value & 0xff) 5185 mask |= (1 << i); 5186 5187 fprintf (file, HOST_WIDE_INT_PRINT_DEC, mask); 5188 } 5189 else 5190 output_operand_lossage ("invalid %%m value"); 5191 break; 5192 5193 case 'M': 5194 /* 'b', 'w', 'l', or 'q' as the value of the constant. */ 5195 if (!CONST_INT_P (x) 5196 || (INTVAL (x) != 8 && INTVAL (x) != 16 5197 && INTVAL (x) != 32 && INTVAL (x) != 64)) 5198 output_operand_lossage ("invalid %%M value"); 5199 5200 fprintf (file, "%s", 5201 (INTVAL (x) == 8 ? "b" 5202 : INTVAL (x) == 16 ? "w" 5203 : INTVAL (x) == 32 ? "l" 5204 : "q")); 5205 break; 5206 5207 case 'U': 5208 /* Similar, except do it from the mask. */ 5209 if (CONST_INT_P (x)) 5210 { 5211 HOST_WIDE_INT value = INTVAL (x); 5212 5213 if (value == 0xff) 5214 { 5215 fputc ('b', file); 5216 break; 5217 } 5218 if (value == 0xffff) 5219 { 5220 fputc ('w', file); 5221 break; 5222 } 5223 if (value == 0xffffffff) 5224 { 5225 fputc ('l', file); 5226 break; 5227 } 5228 if (value == -1) 5229 { 5230 fputc ('q', file); 5231 break; 5232 } 5233 } 5234 else if (HOST_BITS_PER_WIDE_INT == 32 5235 && GET_CODE (x) == CONST_DOUBLE 5236 && CONST_DOUBLE_LOW (x) == 0xffffffff 5237 && CONST_DOUBLE_HIGH (x) == 0) 5238 { 5239 fputc ('l', file); 5240 break; 5241 } 5242 output_operand_lossage ("invalid %%U value"); 5243 break; 5244 5245 case 's': 5246 /* Write the constant value divided by 8. */ 5247 if (!CONST_INT_P (x) 5248 || (unsigned HOST_WIDE_INT) INTVAL (x) >= 64 5249 || (INTVAL (x) & 7) != 0) 5250 output_operand_lossage ("invalid %%s value"); 5251 5252 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) / 8); 5253 break; 5254 5255 case 'S': 5256 /* Same, except compute (64 - c) / 8 */ 5257 5258 if (!CONST_INT_P (x) 5259 && (unsigned HOST_WIDE_INT) INTVAL (x) >= 64 5260 && (INTVAL (x) & 7) != 8) 5261 output_operand_lossage ("invalid %%s value"); 5262 5263 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (64 - INTVAL (x)) / 8); 5264 break; 5265 5266 case 'C': case 'D': case 'c': case 'd': 5267 /* Write out comparison name. */ 5268 { 5269 enum rtx_code c = GET_CODE (x); 5270 5271 if (!COMPARISON_P (x)) 5272 output_operand_lossage ("invalid %%C value"); 5273 5274 else if (code == 'D') 5275 c = reverse_condition (c); 5276 else if (code == 'c') 5277 c = swap_condition (c); 5278 else if (code == 'd') 5279 c = swap_condition (reverse_condition (c)); 5280 5281 if (c == LEU) 5282 fprintf (file, "ule"); 5283 else if (c == LTU) 5284 fprintf (file, "ult"); 5285 else if (c == UNORDERED) 5286 fprintf (file, "un"); 5287 else 5288 fprintf (file, "%s", GET_RTX_NAME (c)); 5289 } 5290 break; 5291 5292 case 'E': 5293 /* Write the divide or modulus operator. */ 5294 switch (GET_CODE (x)) 5295 { 5296 case DIV: 5297 fprintf (file, "div%s", GET_MODE (x) == SImode ? "l" : "q"); 5298 break; 5299 case UDIV: 5300 fprintf (file, "div%su", GET_MODE (x) == SImode ? "l" : "q"); 5301 break; 5302 case MOD: 5303 fprintf (file, "rem%s", GET_MODE (x) == SImode ? "l" : "q"); 5304 break; 5305 case UMOD: 5306 fprintf (file, "rem%su", GET_MODE (x) == SImode ? "l" : "q"); 5307 break; 5308 default: 5309 output_operand_lossage ("invalid %%E value"); 5310 break; 5311 } 5312 break; 5313 5314 case 'A': 5315 /* Write "_u" for unaligned access. */ 5316 if (MEM_P (x) && GET_CODE (XEXP (x, 0)) == AND) 5317 fprintf (file, "_u"); 5318 break; 5319 5320 case 0: 5321 if (REG_P (x)) 5322 fprintf (file, "%s", reg_names[REGNO (x)]); 5323 else if (MEM_P (x)) 5324 output_address (XEXP (x, 0)); 5325 else if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == UNSPEC) 5326 { 5327 switch (XINT (XEXP (x, 0), 1)) 5328 { 5329 case UNSPEC_DTPREL: 5330 case UNSPEC_TPREL: 5331 output_addr_const (file, XVECEXP (XEXP (x, 0), 0, 0)); 5332 break; 5333 default: 5334 output_operand_lossage ("unknown relocation unspec"); 5335 break; 5336 } 5337 } 5338 else 5339 output_addr_const (file, x); 5340 break; 5341 5342 default: 5343 output_operand_lossage ("invalid %%xn code"); 5344 } 5345 } 5346 5347 void 5348 print_operand_address (FILE *file, rtx addr) 5349 { 5350 int basereg = 31; 5351 HOST_WIDE_INT offset = 0; 5352 5353 if (GET_CODE (addr) == AND) 5354 addr = XEXP (addr, 0); 5355 5356 if (GET_CODE (addr) == PLUS 5357 && CONST_INT_P (XEXP (addr, 1))) 5358 { 5359 offset = INTVAL (XEXP (addr, 1)); 5360 addr = XEXP (addr, 0); 5361 } 5362 5363 if (GET_CODE (addr) == LO_SUM) 5364 { 5365 const char *reloc16, *reloclo; 5366 rtx op1 = XEXP (addr, 1); 5367 5368 if (GET_CODE (op1) == CONST && GET_CODE (XEXP (op1, 0)) == UNSPEC) 5369 { 5370 op1 = XEXP (op1, 0); 5371 switch (XINT (op1, 1)) 5372 { 5373 case UNSPEC_DTPREL: 5374 reloc16 = NULL; 5375 reloclo = (alpha_tls_size == 16 ? "dtprel" : "dtprello"); 5376 break; 5377 case UNSPEC_TPREL: 5378 reloc16 = NULL; 5379 reloclo = (alpha_tls_size == 16 ? "tprel" : "tprello"); 5380 break; 5381 default: 5382 output_operand_lossage ("unknown relocation unspec"); 5383 return; 5384 } 5385 5386 output_addr_const (file, XVECEXP (op1, 0, 0)); 5387 } 5388 else 5389 { 5390 reloc16 = "gprel"; 5391 reloclo = "gprellow"; 5392 output_addr_const (file, op1); 5393 } 5394 5395 if (offset) 5396 fprintf (file, "+" HOST_WIDE_INT_PRINT_DEC, offset); 5397 5398 addr = XEXP (addr, 0); 5399 switch (GET_CODE (addr)) 5400 { 5401 case REG: 5402 basereg = REGNO (addr); 5403 break; 5404 5405 case SUBREG: 5406 basereg = subreg_regno (addr); 5407 break; 5408 5409 default: 5410 gcc_unreachable (); 5411 } 5412 5413 fprintf (file, "($%d)\t\t!%s", basereg, 5414 (basereg == 29 ? reloc16 : reloclo)); 5415 return; 5416 } 5417 5418 switch (GET_CODE (addr)) 5419 { 5420 case REG: 5421 basereg = REGNO (addr); 5422 break; 5423 5424 case SUBREG: 5425 basereg = subreg_regno (addr); 5426 break; 5427 5428 case CONST_INT: 5429 offset = INTVAL (addr); 5430 break; 5431 5432 #if TARGET_ABI_OPEN_VMS 5433 case SYMBOL_REF: 5434 fprintf (file, "%s", XSTR (addr, 0)); 5435 return; 5436 5437 case CONST: 5438 gcc_assert (GET_CODE (XEXP (addr, 0)) == PLUS 5439 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF); 5440 fprintf (file, "%s+" HOST_WIDE_INT_PRINT_DEC, 5441 XSTR (XEXP (XEXP (addr, 0), 0), 0), 5442 INTVAL (XEXP (XEXP (addr, 0), 1))); 5443 return; 5444 5445 #endif 5446 default: 5447 gcc_unreachable (); 5448 } 5449 5450 fprintf (file, HOST_WIDE_INT_PRINT_DEC "($%d)", offset, basereg); 5451 } 5452 5453 /* Emit RTL insns to initialize the variable parts of a trampoline at 5454 M_TRAMP. FNDECL is target function's decl. CHAIN_VALUE is an rtx 5455 for the static chain value for the function. */ 5456 5457 static void 5458 alpha_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value) 5459 { 5460 rtx fnaddr, mem, word1, word2; 5461 5462 fnaddr = XEXP (DECL_RTL (fndecl), 0); 5463 5464 #ifdef POINTERS_EXTEND_UNSIGNED 5465 fnaddr = convert_memory_address (Pmode, fnaddr); 5466 chain_value = convert_memory_address (Pmode, chain_value); 5467 #endif 5468 5469 if (TARGET_ABI_OPEN_VMS) 5470 { 5471 const char *fnname; 5472 char *trname; 5473 5474 /* Construct the name of the trampoline entry point. */ 5475 fnname = XSTR (fnaddr, 0); 5476 trname = (char *) alloca (strlen (fnname) + 5); 5477 strcpy (trname, fnname); 5478 strcat (trname, "..tr"); 5479 fnname = ggc_alloc_string (trname, strlen (trname) + 1); 5480 word2 = gen_rtx_SYMBOL_REF (Pmode, fnname); 5481 5482 /* Trampoline (or "bounded") procedure descriptor is constructed from 5483 the function's procedure descriptor with certain fields zeroed IAW 5484 the VMS calling standard. This is stored in the first quadword. */ 5485 word1 = force_reg (DImode, gen_const_mem (DImode, fnaddr)); 5486 word1 = expand_and (DImode, word1, 5487 GEN_INT (HOST_WIDE_INT_C (0xffff0fff0000fff0)), 5488 NULL); 5489 } 5490 else 5491 { 5492 /* These 4 instructions are: 5493 ldq $1,24($27) 5494 ldq $27,16($27) 5495 jmp $31,($27),0 5496 nop 5497 We don't bother setting the HINT field of the jump; the nop 5498 is merely there for padding. */ 5499 word1 = GEN_INT (HOST_WIDE_INT_C (0xa77b0010a43b0018)); 5500 word2 = GEN_INT (HOST_WIDE_INT_C (0x47ff041f6bfb0000)); 5501 } 5502 5503 /* Store the first two words, as computed above. */ 5504 mem = adjust_address (m_tramp, DImode, 0); 5505 emit_move_insn (mem, word1); 5506 mem = adjust_address (m_tramp, DImode, 8); 5507 emit_move_insn (mem, word2); 5508 5509 /* Store function address and static chain value. */ 5510 mem = adjust_address (m_tramp, Pmode, 16); 5511 emit_move_insn (mem, fnaddr); 5512 mem = adjust_address (m_tramp, Pmode, 24); 5513 emit_move_insn (mem, chain_value); 5514 5515 if (TARGET_ABI_OSF) 5516 { 5517 emit_insn (gen_imb ()); 5518 #ifdef HAVE_ENABLE_EXECUTE_STACK 5519 emit_library_call (init_one_libfunc ("__enable_execute_stack"), 5520 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode); 5521 #endif 5522 } 5523 } 5524 5525 /* Determine where to put an argument to a function. 5526 Value is zero to push the argument on the stack, 5527 or a hard register in which to store the argument. 5528 5529 MODE is the argument's machine mode. 5530 TYPE is the data type of the argument (as a tree). 5531 This is null for libcalls where that information may 5532 not be available. 5533 CUM is a variable of type CUMULATIVE_ARGS which gives info about 5534 the preceding args and about the function being called. 5535 NAMED is nonzero if this argument is a named parameter 5536 (otherwise it is an extra parameter matching an ellipsis). 5537 5538 On Alpha the first 6 words of args are normally in registers 5539 and the rest are pushed. */ 5540 5541 static rtx 5542 alpha_function_arg (cumulative_args_t cum_v, enum machine_mode mode, 5543 const_tree type, bool named ATTRIBUTE_UNUSED) 5544 { 5545 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); 5546 int basereg; 5547 int num_args; 5548 5549 /* Don't get confused and pass small structures in FP registers. */ 5550 if (type && AGGREGATE_TYPE_P (type)) 5551 basereg = 16; 5552 else 5553 { 5554 #ifdef ENABLE_CHECKING 5555 /* With alpha_split_complex_arg, we shouldn't see any raw complex 5556 values here. */ 5557 gcc_assert (!COMPLEX_MODE_P (mode)); 5558 #endif 5559 5560 /* Set up defaults for FP operands passed in FP registers, and 5561 integral operands passed in integer registers. */ 5562 if (TARGET_FPREGS && GET_MODE_CLASS (mode) == MODE_FLOAT) 5563 basereg = 32 + 16; 5564 else 5565 basereg = 16; 5566 } 5567 5568 /* ??? Irritatingly, the definition of CUMULATIVE_ARGS is different for 5569 the two platforms, so we can't avoid conditional compilation. */ 5570 #if TARGET_ABI_OPEN_VMS 5571 { 5572 if (mode == VOIDmode) 5573 return alpha_arg_info_reg_val (*cum); 5574 5575 num_args = cum->num_args; 5576 if (num_args >= 6 5577 || targetm.calls.must_pass_in_stack (mode, type)) 5578 return NULL_RTX; 5579 } 5580 #elif TARGET_ABI_OSF 5581 { 5582 if (*cum >= 6) 5583 return NULL_RTX; 5584 num_args = *cum; 5585 5586 /* VOID is passed as a special flag for "last argument". */ 5587 if (type == void_type_node) 5588 basereg = 16; 5589 else if (targetm.calls.must_pass_in_stack (mode, type)) 5590 return NULL_RTX; 5591 } 5592 #else 5593 #error Unhandled ABI 5594 #endif 5595 5596 return gen_rtx_REG (mode, num_args + basereg); 5597 } 5598 5599 /* Update the data in CUM to advance over an argument 5600 of mode MODE and data type TYPE. 5601 (TYPE is null for libcalls where that information may not be available.) */ 5602 5603 static void 5604 alpha_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode, 5605 const_tree type, bool named ATTRIBUTE_UNUSED) 5606 { 5607 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); 5608 bool onstack = targetm.calls.must_pass_in_stack (mode, type); 5609 int increment = onstack ? 6 : ALPHA_ARG_SIZE (mode, type, named); 5610 5611 #if TARGET_ABI_OSF 5612 *cum += increment; 5613 #else 5614 if (!onstack && cum->num_args < 6) 5615 cum->atypes[cum->num_args] = alpha_arg_type (mode); 5616 cum->num_args += increment; 5617 #endif 5618 } 5619 5620 static int 5621 alpha_arg_partial_bytes (cumulative_args_t cum_v, 5622 enum machine_mode mode ATTRIBUTE_UNUSED, 5623 tree type ATTRIBUTE_UNUSED, 5624 bool named ATTRIBUTE_UNUSED) 5625 { 5626 int words = 0; 5627 CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED = get_cumulative_args (cum_v); 5628 5629 #if TARGET_ABI_OPEN_VMS 5630 if (cum->num_args < 6 5631 && 6 < cum->num_args + ALPHA_ARG_SIZE (mode, type, named)) 5632 words = 6 - cum->num_args; 5633 #elif TARGET_ABI_OSF 5634 if (*cum < 6 && 6 < *cum + ALPHA_ARG_SIZE (mode, type, named)) 5635 words = 6 - *cum; 5636 #else 5637 #error Unhandled ABI 5638 #endif 5639 5640 return words * UNITS_PER_WORD; 5641 } 5642 5643 5644 /* Return true if TYPE must be returned in memory, instead of in registers. */ 5645 5646 static bool 5647 alpha_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED) 5648 { 5649 enum machine_mode mode = VOIDmode; 5650 int size; 5651 5652 if (type) 5653 { 5654 mode = TYPE_MODE (type); 5655 5656 /* All aggregates are returned in memory, except on OpenVMS where 5657 records that fit 64 bits should be returned by immediate value 5658 as required by section 3.8.7.1 of the OpenVMS Calling Standard. */ 5659 if (TARGET_ABI_OPEN_VMS 5660 && TREE_CODE (type) != ARRAY_TYPE 5661 && (unsigned HOST_WIDE_INT) int_size_in_bytes(type) <= 8) 5662 return false; 5663 5664 if (AGGREGATE_TYPE_P (type)) 5665 return true; 5666 } 5667 5668 size = GET_MODE_SIZE (mode); 5669 switch (GET_MODE_CLASS (mode)) 5670 { 5671 case MODE_VECTOR_FLOAT: 5672 /* Pass all float vectors in memory, like an aggregate. */ 5673 return true; 5674 5675 case MODE_COMPLEX_FLOAT: 5676 /* We judge complex floats on the size of their element, 5677 not the size of the whole type. */ 5678 size = GET_MODE_UNIT_SIZE (mode); 5679 break; 5680 5681 case MODE_INT: 5682 case MODE_FLOAT: 5683 case MODE_COMPLEX_INT: 5684 case MODE_VECTOR_INT: 5685 break; 5686 5687 default: 5688 /* ??? We get called on all sorts of random stuff from 5689 aggregate_value_p. We must return something, but it's not 5690 clear what's safe to return. Pretend it's a struct I 5691 guess. */ 5692 return true; 5693 } 5694 5695 /* Otherwise types must fit in one register. */ 5696 return size > UNITS_PER_WORD; 5697 } 5698 5699 /* Return true if TYPE should be passed by invisible reference. */ 5700 5701 static bool 5702 alpha_pass_by_reference (cumulative_args_t ca ATTRIBUTE_UNUSED, 5703 enum machine_mode mode, 5704 const_tree type ATTRIBUTE_UNUSED, 5705 bool named ATTRIBUTE_UNUSED) 5706 { 5707 return mode == TFmode || mode == TCmode; 5708 } 5709 5710 /* Define how to find the value returned by a function. VALTYPE is the 5711 data type of the value (as a tree). If the precise function being 5712 called is known, FUNC is its FUNCTION_DECL; otherwise, FUNC is 0. 5713 MODE is set instead of VALTYPE for libcalls. 5714 5715 On Alpha the value is found in $0 for integer functions and 5716 $f0 for floating-point functions. */ 5717 5718 rtx 5719 function_value (const_tree valtype, const_tree func ATTRIBUTE_UNUSED, 5720 enum machine_mode mode) 5721 { 5722 unsigned int regnum, dummy ATTRIBUTE_UNUSED; 5723 enum mode_class mclass; 5724 5725 gcc_assert (!valtype || !alpha_return_in_memory (valtype, func)); 5726 5727 if (valtype) 5728 mode = TYPE_MODE (valtype); 5729 5730 mclass = GET_MODE_CLASS (mode); 5731 switch (mclass) 5732 { 5733 case MODE_INT: 5734 /* Do the same thing as PROMOTE_MODE except for libcalls on VMS, 5735 where we have them returning both SImode and DImode. */ 5736 if (!(TARGET_ABI_OPEN_VMS && valtype && AGGREGATE_TYPE_P (valtype))) 5737 PROMOTE_MODE (mode, dummy, valtype); 5738 /* FALLTHRU */ 5739 5740 case MODE_COMPLEX_INT: 5741 case MODE_VECTOR_INT: 5742 regnum = 0; 5743 break; 5744 5745 case MODE_FLOAT: 5746 regnum = 32; 5747 break; 5748 5749 case MODE_COMPLEX_FLOAT: 5750 { 5751 enum machine_mode cmode = GET_MODE_INNER (mode); 5752 5753 return gen_rtx_PARALLEL 5754 (VOIDmode, 5755 gen_rtvec (2, 5756 gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (cmode, 32), 5757 const0_rtx), 5758 gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (cmode, 33), 5759 GEN_INT (GET_MODE_SIZE (cmode))))); 5760 } 5761 5762 case MODE_RANDOM: 5763 /* We should only reach here for BLKmode on VMS. */ 5764 gcc_assert (TARGET_ABI_OPEN_VMS && mode == BLKmode); 5765 regnum = 0; 5766 break; 5767 5768 default: 5769 gcc_unreachable (); 5770 } 5771 5772 return gen_rtx_REG (mode, regnum); 5773 } 5774 5775 /* TCmode complex values are passed by invisible reference. We 5776 should not split these values. */ 5777 5778 static bool 5779 alpha_split_complex_arg (const_tree type) 5780 { 5781 return TYPE_MODE (type) != TCmode; 5782 } 5783 5784 static tree 5785 alpha_build_builtin_va_list (void) 5786 { 5787 tree base, ofs, space, record, type_decl; 5788 5789 if (TARGET_ABI_OPEN_VMS) 5790 return ptr_type_node; 5791 5792 record = (*lang_hooks.types.make_type) (RECORD_TYPE); 5793 type_decl = build_decl (BUILTINS_LOCATION, 5794 TYPE_DECL, get_identifier ("__va_list_tag"), record); 5795 TYPE_STUB_DECL (record) = type_decl; 5796 TYPE_NAME (record) = type_decl; 5797 5798 /* C++? SET_IS_AGGR_TYPE (record, 1); */ 5799 5800 /* Dummy field to prevent alignment warnings. */ 5801 space = build_decl (BUILTINS_LOCATION, 5802 FIELD_DECL, NULL_TREE, integer_type_node); 5803 DECL_FIELD_CONTEXT (space) = record; 5804 DECL_ARTIFICIAL (space) = 1; 5805 DECL_IGNORED_P (space) = 1; 5806 5807 ofs = build_decl (BUILTINS_LOCATION, 5808 FIELD_DECL, get_identifier ("__offset"), 5809 integer_type_node); 5810 DECL_FIELD_CONTEXT (ofs) = record; 5811 DECL_CHAIN (ofs) = space; 5812 /* ??? This is a hack, __offset is marked volatile to prevent 5813 DCE that confuses stdarg optimization and results in 5814 gcc.c-torture/execute/stdarg-1.c failure. See PR 41089. */ 5815 TREE_THIS_VOLATILE (ofs) = 1; 5816 5817 base = build_decl (BUILTINS_LOCATION, 5818 FIELD_DECL, get_identifier ("__base"), 5819 ptr_type_node); 5820 DECL_FIELD_CONTEXT (base) = record; 5821 DECL_CHAIN (base) = ofs; 5822 5823 TYPE_FIELDS (record) = base; 5824 layout_type (record); 5825 5826 va_list_gpr_counter_field = ofs; 5827 return record; 5828 } 5829 5830 #if TARGET_ABI_OSF 5831 /* Helper function for alpha_stdarg_optimize_hook. Skip over casts 5832 and constant additions. */ 5833 5834 static gimple 5835 va_list_skip_additions (tree lhs) 5836 { 5837 gimple stmt; 5838 5839 for (;;) 5840 { 5841 enum tree_code code; 5842 5843 stmt = SSA_NAME_DEF_STMT (lhs); 5844 5845 if (gimple_code (stmt) == GIMPLE_PHI) 5846 return stmt; 5847 5848 if (!is_gimple_assign (stmt) 5849 || gimple_assign_lhs (stmt) != lhs) 5850 return NULL; 5851 5852 if (TREE_CODE (gimple_assign_rhs1 (stmt)) != SSA_NAME) 5853 return stmt; 5854 code = gimple_assign_rhs_code (stmt); 5855 if (!CONVERT_EXPR_CODE_P (code) 5856 && ((code != PLUS_EXPR && code != POINTER_PLUS_EXPR) 5857 || TREE_CODE (gimple_assign_rhs2 (stmt)) != INTEGER_CST 5858 || !host_integerp (gimple_assign_rhs2 (stmt), 1))) 5859 return stmt; 5860 5861 lhs = gimple_assign_rhs1 (stmt); 5862 } 5863 } 5864 5865 /* Check if LHS = RHS statement is 5866 LHS = *(ap.__base + ap.__offset + cst) 5867 or 5868 LHS = *(ap.__base 5869 + ((ap.__offset + cst <= 47) 5870 ? ap.__offset + cst - 48 : ap.__offset + cst) + cst2). 5871 If the former, indicate that GPR registers are needed, 5872 if the latter, indicate that FPR registers are needed. 5873 5874 Also look for LHS = (*ptr).field, where ptr is one of the forms 5875 listed above. 5876 5877 On alpha, cfun->va_list_gpr_size is used as size of the needed 5878 regs and cfun->va_list_fpr_size is a bitmask, bit 0 set if GPR 5879 registers are needed and bit 1 set if FPR registers are needed. 5880 Return true if va_list references should not be scanned for the 5881 current statement. */ 5882 5883 static bool 5884 alpha_stdarg_optimize_hook (struct stdarg_info *si, const_gimple stmt) 5885 { 5886 tree base, offset, rhs; 5887 int offset_arg = 1; 5888 gimple base_stmt; 5889 5890 if (get_gimple_rhs_class (gimple_assign_rhs_code (stmt)) 5891 != GIMPLE_SINGLE_RHS) 5892 return false; 5893 5894 rhs = gimple_assign_rhs1 (stmt); 5895 while (handled_component_p (rhs)) 5896 rhs = TREE_OPERAND (rhs, 0); 5897 if (TREE_CODE (rhs) != MEM_REF 5898 || TREE_CODE (TREE_OPERAND (rhs, 0)) != SSA_NAME) 5899 return false; 5900 5901 stmt = va_list_skip_additions (TREE_OPERAND (rhs, 0)); 5902 if (stmt == NULL 5903 || !is_gimple_assign (stmt) 5904 || gimple_assign_rhs_code (stmt) != POINTER_PLUS_EXPR) 5905 return false; 5906 5907 base = gimple_assign_rhs1 (stmt); 5908 if (TREE_CODE (base) == SSA_NAME) 5909 { 5910 base_stmt = va_list_skip_additions (base); 5911 if (base_stmt 5912 && is_gimple_assign (base_stmt) 5913 && gimple_assign_rhs_code (base_stmt) == COMPONENT_REF) 5914 base = gimple_assign_rhs1 (base_stmt); 5915 } 5916 5917 if (TREE_CODE (base) != COMPONENT_REF 5918 || TREE_OPERAND (base, 1) != TYPE_FIELDS (va_list_type_node)) 5919 { 5920 base = gimple_assign_rhs2 (stmt); 5921 if (TREE_CODE (base) == SSA_NAME) 5922 { 5923 base_stmt = va_list_skip_additions (base); 5924 if (base_stmt 5925 && is_gimple_assign (base_stmt) 5926 && gimple_assign_rhs_code (base_stmt) == COMPONENT_REF) 5927 base = gimple_assign_rhs1 (base_stmt); 5928 } 5929 5930 if (TREE_CODE (base) != COMPONENT_REF 5931 || TREE_OPERAND (base, 1) != TYPE_FIELDS (va_list_type_node)) 5932 return false; 5933 5934 offset_arg = 0; 5935 } 5936 5937 base = get_base_address (base); 5938 if (TREE_CODE (base) != VAR_DECL 5939 || !bitmap_bit_p (si->va_list_vars, DECL_UID (base) + num_ssa_names)) 5940 return false; 5941 5942 offset = gimple_op (stmt, 1 + offset_arg); 5943 if (TREE_CODE (offset) == SSA_NAME) 5944 { 5945 gimple offset_stmt = va_list_skip_additions (offset); 5946 5947 if (offset_stmt 5948 && gimple_code (offset_stmt) == GIMPLE_PHI) 5949 { 5950 HOST_WIDE_INT sub; 5951 gimple arg1_stmt, arg2_stmt; 5952 tree arg1, arg2; 5953 enum tree_code code1, code2; 5954 5955 if (gimple_phi_num_args (offset_stmt) != 2) 5956 goto escapes; 5957 5958 arg1_stmt 5959 = va_list_skip_additions (gimple_phi_arg_def (offset_stmt, 0)); 5960 arg2_stmt 5961 = va_list_skip_additions (gimple_phi_arg_def (offset_stmt, 1)); 5962 if (arg1_stmt == NULL 5963 || !is_gimple_assign (arg1_stmt) 5964 || arg2_stmt == NULL 5965 || !is_gimple_assign (arg2_stmt)) 5966 goto escapes; 5967 5968 code1 = gimple_assign_rhs_code (arg1_stmt); 5969 code2 = gimple_assign_rhs_code (arg2_stmt); 5970 if (code1 == COMPONENT_REF 5971 && (code2 == MINUS_EXPR || code2 == PLUS_EXPR)) 5972 /* Do nothing. */; 5973 else if (code2 == COMPONENT_REF 5974 && (code1 == MINUS_EXPR || code1 == PLUS_EXPR)) 5975 { 5976 gimple tem = arg1_stmt; 5977 code2 = code1; 5978 arg1_stmt = arg2_stmt; 5979 arg2_stmt = tem; 5980 } 5981 else 5982 goto escapes; 5983 5984 if (!host_integerp (gimple_assign_rhs2 (arg2_stmt), 0)) 5985 goto escapes; 5986 5987 sub = tree_low_cst (gimple_assign_rhs2 (arg2_stmt), 0); 5988 if (code2 == MINUS_EXPR) 5989 sub = -sub; 5990 if (sub < -48 || sub > -32) 5991 goto escapes; 5992 5993 arg1 = gimple_assign_rhs1 (arg1_stmt); 5994 arg2 = gimple_assign_rhs1 (arg2_stmt); 5995 if (TREE_CODE (arg2) == SSA_NAME) 5996 { 5997 arg2_stmt = va_list_skip_additions (arg2); 5998 if (arg2_stmt == NULL 5999 || !is_gimple_assign (arg2_stmt) 6000 || gimple_assign_rhs_code (arg2_stmt) != COMPONENT_REF) 6001 goto escapes; 6002 arg2 = gimple_assign_rhs1 (arg2_stmt); 6003 } 6004 if (arg1 != arg2) 6005 goto escapes; 6006 6007 if (TREE_CODE (arg1) != COMPONENT_REF 6008 || TREE_OPERAND (arg1, 1) != va_list_gpr_counter_field 6009 || get_base_address (arg1) != base) 6010 goto escapes; 6011 6012 /* Need floating point regs. */ 6013 cfun->va_list_fpr_size |= 2; 6014 return false; 6015 } 6016 if (offset_stmt 6017 && is_gimple_assign (offset_stmt) 6018 && gimple_assign_rhs_code (offset_stmt) == COMPONENT_REF) 6019 offset = gimple_assign_rhs1 (offset_stmt); 6020 } 6021 if (TREE_CODE (offset) != COMPONENT_REF 6022 || TREE_OPERAND (offset, 1) != va_list_gpr_counter_field 6023 || get_base_address (offset) != base) 6024 goto escapes; 6025 else 6026 /* Need general regs. */ 6027 cfun->va_list_fpr_size |= 1; 6028 return false; 6029 6030 escapes: 6031 si->va_list_escapes = true; 6032 return false; 6033 } 6034 #endif 6035 6036 /* Perform any needed actions needed for a function that is receiving a 6037 variable number of arguments. */ 6038 6039 static void 6040 alpha_setup_incoming_varargs (cumulative_args_t pcum, enum machine_mode mode, 6041 tree type, int *pretend_size, int no_rtl) 6042 { 6043 CUMULATIVE_ARGS cum = *get_cumulative_args (pcum); 6044 6045 /* Skip the current argument. */ 6046 targetm.calls.function_arg_advance (pack_cumulative_args (&cum), mode, type, 6047 true); 6048 6049 #if TARGET_ABI_OPEN_VMS 6050 /* For VMS, we allocate space for all 6 arg registers plus a count. 6051 6052 However, if NO registers need to be saved, don't allocate any space. 6053 This is not only because we won't need the space, but because AP 6054 includes the current_pretend_args_size and we don't want to mess up 6055 any ap-relative addresses already made. */ 6056 if (cum.num_args < 6) 6057 { 6058 if (!no_rtl) 6059 { 6060 emit_move_insn (gen_rtx_REG (DImode, 1), virtual_incoming_args_rtx); 6061 emit_insn (gen_arg_home ()); 6062 } 6063 *pretend_size = 7 * UNITS_PER_WORD; 6064 } 6065 #else 6066 /* On OSF/1 and friends, we allocate space for all 12 arg registers, but 6067 only push those that are remaining. However, if NO registers need to 6068 be saved, don't allocate any space. This is not only because we won't 6069 need the space, but because AP includes the current_pretend_args_size 6070 and we don't want to mess up any ap-relative addresses already made. 6071 6072 If we are not to use the floating-point registers, save the integer 6073 registers where we would put the floating-point registers. This is 6074 not the most efficient way to implement varargs with just one register 6075 class, but it isn't worth doing anything more efficient in this rare 6076 case. */ 6077 if (cum >= 6) 6078 return; 6079 6080 if (!no_rtl) 6081 { 6082 int count; 6083 alias_set_type set = get_varargs_alias_set (); 6084 rtx tmp; 6085 6086 count = cfun->va_list_gpr_size / UNITS_PER_WORD; 6087 if (count > 6 - cum) 6088 count = 6 - cum; 6089 6090 /* Detect whether integer registers or floating-point registers 6091 are needed by the detected va_arg statements. See above for 6092 how these values are computed. Note that the "escape" value 6093 is VA_LIST_MAX_FPR_SIZE, which is 255, which has both of 6094 these bits set. */ 6095 gcc_assert ((VA_LIST_MAX_FPR_SIZE & 3) == 3); 6096 6097 if (cfun->va_list_fpr_size & 1) 6098 { 6099 tmp = gen_rtx_MEM (BLKmode, 6100 plus_constant (Pmode, virtual_incoming_args_rtx, 6101 (cum + 6) * UNITS_PER_WORD)); 6102 MEM_NOTRAP_P (tmp) = 1; 6103 set_mem_alias_set (tmp, set); 6104 move_block_from_reg (16 + cum, tmp, count); 6105 } 6106 6107 if (cfun->va_list_fpr_size & 2) 6108 { 6109 tmp = gen_rtx_MEM (BLKmode, 6110 plus_constant (Pmode, virtual_incoming_args_rtx, 6111 cum * UNITS_PER_WORD)); 6112 MEM_NOTRAP_P (tmp) = 1; 6113 set_mem_alias_set (tmp, set); 6114 move_block_from_reg (16 + cum + TARGET_FPREGS*32, tmp, count); 6115 } 6116 } 6117 *pretend_size = 12 * UNITS_PER_WORD; 6118 #endif 6119 } 6120 6121 static void 6122 alpha_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED) 6123 { 6124 HOST_WIDE_INT offset; 6125 tree t, offset_field, base_field; 6126 6127 if (TREE_CODE (TREE_TYPE (valist)) == ERROR_MARK) 6128 return; 6129 6130 /* For Unix, TARGET_SETUP_INCOMING_VARARGS moves the starting address base 6131 up by 48, storing fp arg registers in the first 48 bytes, and the 6132 integer arg registers in the next 48 bytes. This is only done, 6133 however, if any integer registers need to be stored. 6134 6135 If no integer registers need be stored, then we must subtract 48 6136 in order to account for the integer arg registers which are counted 6137 in argsize above, but which are not actually stored on the stack. 6138 Must further be careful here about structures straddling the last 6139 integer argument register; that futzes with pretend_args_size, 6140 which changes the meaning of AP. */ 6141 6142 if (NUM_ARGS < 6) 6143 offset = TARGET_ABI_OPEN_VMS ? UNITS_PER_WORD : 6 * UNITS_PER_WORD; 6144 else 6145 offset = -6 * UNITS_PER_WORD + crtl->args.pretend_args_size; 6146 6147 if (TARGET_ABI_OPEN_VMS) 6148 { 6149 t = make_tree (ptr_type_node, virtual_incoming_args_rtx); 6150 t = fold_build_pointer_plus_hwi (t, offset + NUM_ARGS * UNITS_PER_WORD); 6151 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist, t); 6152 TREE_SIDE_EFFECTS (t) = 1; 6153 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 6154 } 6155 else 6156 { 6157 base_field = TYPE_FIELDS (TREE_TYPE (valist)); 6158 offset_field = DECL_CHAIN (base_field); 6159 6160 base_field = build3 (COMPONENT_REF, TREE_TYPE (base_field), 6161 valist, base_field, NULL_TREE); 6162 offset_field = build3 (COMPONENT_REF, TREE_TYPE (offset_field), 6163 valist, offset_field, NULL_TREE); 6164 6165 t = make_tree (ptr_type_node, virtual_incoming_args_rtx); 6166 t = fold_build_pointer_plus_hwi (t, offset); 6167 t = build2 (MODIFY_EXPR, TREE_TYPE (base_field), base_field, t); 6168 TREE_SIDE_EFFECTS (t) = 1; 6169 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 6170 6171 t = build_int_cst (NULL_TREE, NUM_ARGS * UNITS_PER_WORD); 6172 t = build2 (MODIFY_EXPR, TREE_TYPE (offset_field), offset_field, t); 6173 TREE_SIDE_EFFECTS (t) = 1; 6174 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 6175 } 6176 } 6177 6178 static tree 6179 alpha_gimplify_va_arg_1 (tree type, tree base, tree offset, 6180 gimple_seq *pre_p) 6181 { 6182 tree type_size, ptr_type, addend, t, addr; 6183 gimple_seq internal_post; 6184 6185 /* If the type could not be passed in registers, skip the block 6186 reserved for the registers. */ 6187 if (targetm.calls.must_pass_in_stack (TYPE_MODE (type), type)) 6188 { 6189 t = build_int_cst (TREE_TYPE (offset), 6*8); 6190 gimplify_assign (offset, 6191 build2 (MAX_EXPR, TREE_TYPE (offset), offset, t), 6192 pre_p); 6193 } 6194 6195 addend = offset; 6196 ptr_type = build_pointer_type_for_mode (type, ptr_mode, true); 6197 6198 if (TREE_CODE (type) == COMPLEX_TYPE) 6199 { 6200 tree real_part, imag_part, real_temp; 6201 6202 real_part = alpha_gimplify_va_arg_1 (TREE_TYPE (type), base, 6203 offset, pre_p); 6204 6205 /* Copy the value into a new temporary, lest the formal temporary 6206 be reused out from under us. */ 6207 real_temp = get_initialized_tmp_var (real_part, pre_p, NULL); 6208 6209 imag_part = alpha_gimplify_va_arg_1 (TREE_TYPE (type), base, 6210 offset, pre_p); 6211 6212 return build2 (COMPLEX_EXPR, type, real_temp, imag_part); 6213 } 6214 else if (TREE_CODE (type) == REAL_TYPE) 6215 { 6216 tree fpaddend, cond, fourtyeight; 6217 6218 fourtyeight = build_int_cst (TREE_TYPE (addend), 6*8); 6219 fpaddend = fold_build2 (MINUS_EXPR, TREE_TYPE (addend), 6220 addend, fourtyeight); 6221 cond = fold_build2 (LT_EXPR, boolean_type_node, addend, fourtyeight); 6222 addend = fold_build3 (COND_EXPR, TREE_TYPE (addend), cond, 6223 fpaddend, addend); 6224 } 6225 6226 /* Build the final address and force that value into a temporary. */ 6227 addr = fold_build_pointer_plus (fold_convert (ptr_type, base), addend); 6228 internal_post = NULL; 6229 gimplify_expr (&addr, pre_p, &internal_post, is_gimple_val, fb_rvalue); 6230 gimple_seq_add_seq (pre_p, internal_post); 6231 6232 /* Update the offset field. */ 6233 type_size = TYPE_SIZE_UNIT (TYPE_MAIN_VARIANT (type)); 6234 if (type_size == NULL || TREE_OVERFLOW (type_size)) 6235 t = size_zero_node; 6236 else 6237 { 6238 t = size_binop (PLUS_EXPR, type_size, size_int (7)); 6239 t = size_binop (TRUNC_DIV_EXPR, t, size_int (8)); 6240 t = size_binop (MULT_EXPR, t, size_int (8)); 6241 } 6242 t = fold_convert (TREE_TYPE (offset), t); 6243 gimplify_assign (offset, build2 (PLUS_EXPR, TREE_TYPE (offset), offset, t), 6244 pre_p); 6245 6246 return build_va_arg_indirect_ref (addr); 6247 } 6248 6249 static tree 6250 alpha_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p, 6251 gimple_seq *post_p) 6252 { 6253 tree offset_field, base_field, offset, base, t, r; 6254 bool indirect; 6255 6256 if (TARGET_ABI_OPEN_VMS) 6257 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p); 6258 6259 base_field = TYPE_FIELDS (va_list_type_node); 6260 offset_field = DECL_CHAIN (base_field); 6261 base_field = build3 (COMPONENT_REF, TREE_TYPE (base_field), 6262 valist, base_field, NULL_TREE); 6263 offset_field = build3 (COMPONENT_REF, TREE_TYPE (offset_field), 6264 valist, offset_field, NULL_TREE); 6265 6266 /* Pull the fields of the structure out into temporaries. Since we never 6267 modify the base field, we can use a formal temporary. Sign-extend the 6268 offset field so that it's the proper width for pointer arithmetic. */ 6269 base = get_formal_tmp_var (base_field, pre_p); 6270 6271 t = fold_convert (build_nonstandard_integer_type (64, 0), offset_field); 6272 offset = get_initialized_tmp_var (t, pre_p, NULL); 6273 6274 indirect = pass_by_reference (NULL, TYPE_MODE (type), type, false); 6275 if (indirect) 6276 type = build_pointer_type_for_mode (type, ptr_mode, true); 6277 6278 /* Find the value. Note that this will be a stable indirection, or 6279 a composite of stable indirections in the case of complex. */ 6280 r = alpha_gimplify_va_arg_1 (type, base, offset, pre_p); 6281 6282 /* Stuff the offset temporary back into its field. */ 6283 gimplify_assign (unshare_expr (offset_field), 6284 fold_convert (TREE_TYPE (offset_field), offset), pre_p); 6285 6286 if (indirect) 6287 r = build_va_arg_indirect_ref (r); 6288 6289 return r; 6290 } 6291 6292 /* Builtins. */ 6293 6294 enum alpha_builtin 6295 { 6296 ALPHA_BUILTIN_CMPBGE, 6297 ALPHA_BUILTIN_EXTBL, 6298 ALPHA_BUILTIN_EXTWL, 6299 ALPHA_BUILTIN_EXTLL, 6300 ALPHA_BUILTIN_EXTQL, 6301 ALPHA_BUILTIN_EXTWH, 6302 ALPHA_BUILTIN_EXTLH, 6303 ALPHA_BUILTIN_EXTQH, 6304 ALPHA_BUILTIN_INSBL, 6305 ALPHA_BUILTIN_INSWL, 6306 ALPHA_BUILTIN_INSLL, 6307 ALPHA_BUILTIN_INSQL, 6308 ALPHA_BUILTIN_INSWH, 6309 ALPHA_BUILTIN_INSLH, 6310 ALPHA_BUILTIN_INSQH, 6311 ALPHA_BUILTIN_MSKBL, 6312 ALPHA_BUILTIN_MSKWL, 6313 ALPHA_BUILTIN_MSKLL, 6314 ALPHA_BUILTIN_MSKQL, 6315 ALPHA_BUILTIN_MSKWH, 6316 ALPHA_BUILTIN_MSKLH, 6317 ALPHA_BUILTIN_MSKQH, 6318 ALPHA_BUILTIN_UMULH, 6319 ALPHA_BUILTIN_ZAP, 6320 ALPHA_BUILTIN_ZAPNOT, 6321 ALPHA_BUILTIN_AMASK, 6322 ALPHA_BUILTIN_IMPLVER, 6323 ALPHA_BUILTIN_RPCC, 6324 ALPHA_BUILTIN_ESTABLISH_VMS_CONDITION_HANDLER, 6325 ALPHA_BUILTIN_REVERT_VMS_CONDITION_HANDLER, 6326 6327 /* TARGET_MAX */ 6328 ALPHA_BUILTIN_MINUB8, 6329 ALPHA_BUILTIN_MINSB8, 6330 ALPHA_BUILTIN_MINUW4, 6331 ALPHA_BUILTIN_MINSW4, 6332 ALPHA_BUILTIN_MAXUB8, 6333 ALPHA_BUILTIN_MAXSB8, 6334 ALPHA_BUILTIN_MAXUW4, 6335 ALPHA_BUILTIN_MAXSW4, 6336 ALPHA_BUILTIN_PERR, 6337 ALPHA_BUILTIN_PKLB, 6338 ALPHA_BUILTIN_PKWB, 6339 ALPHA_BUILTIN_UNPKBL, 6340 ALPHA_BUILTIN_UNPKBW, 6341 6342 /* TARGET_CIX */ 6343 ALPHA_BUILTIN_CTTZ, 6344 ALPHA_BUILTIN_CTLZ, 6345 ALPHA_BUILTIN_CTPOP, 6346 6347 ALPHA_BUILTIN_max 6348 }; 6349 6350 static enum insn_code const code_for_builtin[ALPHA_BUILTIN_max] = { 6351 CODE_FOR_builtin_cmpbge, 6352 CODE_FOR_extbl, 6353 CODE_FOR_extwl, 6354 CODE_FOR_extll, 6355 CODE_FOR_extql, 6356 CODE_FOR_extwh, 6357 CODE_FOR_extlh, 6358 CODE_FOR_extqh, 6359 CODE_FOR_builtin_insbl, 6360 CODE_FOR_builtin_inswl, 6361 CODE_FOR_builtin_insll, 6362 CODE_FOR_insql, 6363 CODE_FOR_inswh, 6364 CODE_FOR_inslh, 6365 CODE_FOR_insqh, 6366 CODE_FOR_mskbl, 6367 CODE_FOR_mskwl, 6368 CODE_FOR_mskll, 6369 CODE_FOR_mskql, 6370 CODE_FOR_mskwh, 6371 CODE_FOR_msklh, 6372 CODE_FOR_mskqh, 6373 CODE_FOR_umuldi3_highpart, 6374 CODE_FOR_builtin_zap, 6375 CODE_FOR_builtin_zapnot, 6376 CODE_FOR_builtin_amask, 6377 CODE_FOR_builtin_implver, 6378 CODE_FOR_builtin_rpcc, 6379 CODE_FOR_builtin_establish_vms_condition_handler, 6380 CODE_FOR_builtin_revert_vms_condition_handler, 6381 6382 /* TARGET_MAX */ 6383 CODE_FOR_builtin_minub8, 6384 CODE_FOR_builtin_minsb8, 6385 CODE_FOR_builtin_minuw4, 6386 CODE_FOR_builtin_minsw4, 6387 CODE_FOR_builtin_maxub8, 6388 CODE_FOR_builtin_maxsb8, 6389 CODE_FOR_builtin_maxuw4, 6390 CODE_FOR_builtin_maxsw4, 6391 CODE_FOR_builtin_perr, 6392 CODE_FOR_builtin_pklb, 6393 CODE_FOR_builtin_pkwb, 6394 CODE_FOR_builtin_unpkbl, 6395 CODE_FOR_builtin_unpkbw, 6396 6397 /* TARGET_CIX */ 6398 CODE_FOR_ctzdi2, 6399 CODE_FOR_clzdi2, 6400 CODE_FOR_popcountdi2 6401 }; 6402 6403 struct alpha_builtin_def 6404 { 6405 const char *name; 6406 enum alpha_builtin code; 6407 unsigned int target_mask; 6408 bool is_const; 6409 }; 6410 6411 static struct alpha_builtin_def const zero_arg_builtins[] = { 6412 { "__builtin_alpha_implver", ALPHA_BUILTIN_IMPLVER, 0, true }, 6413 { "__builtin_alpha_rpcc", ALPHA_BUILTIN_RPCC, 0, false } 6414 }; 6415 6416 static struct alpha_builtin_def const one_arg_builtins[] = { 6417 { "__builtin_alpha_amask", ALPHA_BUILTIN_AMASK, 0, true }, 6418 { "__builtin_alpha_pklb", ALPHA_BUILTIN_PKLB, MASK_MAX, true }, 6419 { "__builtin_alpha_pkwb", ALPHA_BUILTIN_PKWB, MASK_MAX, true }, 6420 { "__builtin_alpha_unpkbl", ALPHA_BUILTIN_UNPKBL, MASK_MAX, true }, 6421 { "__builtin_alpha_unpkbw", ALPHA_BUILTIN_UNPKBW, MASK_MAX, true }, 6422 { "__builtin_alpha_cttz", ALPHA_BUILTIN_CTTZ, MASK_CIX, true }, 6423 { "__builtin_alpha_ctlz", ALPHA_BUILTIN_CTLZ, MASK_CIX, true }, 6424 { "__builtin_alpha_ctpop", ALPHA_BUILTIN_CTPOP, MASK_CIX, true } 6425 }; 6426 6427 static struct alpha_builtin_def const two_arg_builtins[] = { 6428 { "__builtin_alpha_cmpbge", ALPHA_BUILTIN_CMPBGE, 0, true }, 6429 { "__builtin_alpha_extbl", ALPHA_BUILTIN_EXTBL, 0, true }, 6430 { "__builtin_alpha_extwl", ALPHA_BUILTIN_EXTWL, 0, true }, 6431 { "__builtin_alpha_extll", ALPHA_BUILTIN_EXTLL, 0, true }, 6432 { "__builtin_alpha_extql", ALPHA_BUILTIN_EXTQL, 0, true }, 6433 { "__builtin_alpha_extwh", ALPHA_BUILTIN_EXTWH, 0, true }, 6434 { "__builtin_alpha_extlh", ALPHA_BUILTIN_EXTLH, 0, true }, 6435 { "__builtin_alpha_extqh", ALPHA_BUILTIN_EXTQH, 0, true }, 6436 { "__builtin_alpha_insbl", ALPHA_BUILTIN_INSBL, 0, true }, 6437 { "__builtin_alpha_inswl", ALPHA_BUILTIN_INSWL, 0, true }, 6438 { "__builtin_alpha_insll", ALPHA_BUILTIN_INSLL, 0, true }, 6439 { "__builtin_alpha_insql", ALPHA_BUILTIN_INSQL, 0, true }, 6440 { "__builtin_alpha_inswh", ALPHA_BUILTIN_INSWH, 0, true }, 6441 { "__builtin_alpha_inslh", ALPHA_BUILTIN_INSLH, 0, true }, 6442 { "__builtin_alpha_insqh", ALPHA_BUILTIN_INSQH, 0, true }, 6443 { "__builtin_alpha_mskbl", ALPHA_BUILTIN_MSKBL, 0, true }, 6444 { "__builtin_alpha_mskwl", ALPHA_BUILTIN_MSKWL, 0, true }, 6445 { "__builtin_alpha_mskll", ALPHA_BUILTIN_MSKLL, 0, true }, 6446 { "__builtin_alpha_mskql", ALPHA_BUILTIN_MSKQL, 0, true }, 6447 { "__builtin_alpha_mskwh", ALPHA_BUILTIN_MSKWH, 0, true }, 6448 { "__builtin_alpha_msklh", ALPHA_BUILTIN_MSKLH, 0, true }, 6449 { "__builtin_alpha_mskqh", ALPHA_BUILTIN_MSKQH, 0, true }, 6450 { "__builtin_alpha_umulh", ALPHA_BUILTIN_UMULH, 0, true }, 6451 { "__builtin_alpha_zap", ALPHA_BUILTIN_ZAP, 0, true }, 6452 { "__builtin_alpha_zapnot", ALPHA_BUILTIN_ZAPNOT, 0, true }, 6453 { "__builtin_alpha_minub8", ALPHA_BUILTIN_MINUB8, MASK_MAX, true }, 6454 { "__builtin_alpha_minsb8", ALPHA_BUILTIN_MINSB8, MASK_MAX, true }, 6455 { "__builtin_alpha_minuw4", ALPHA_BUILTIN_MINUW4, MASK_MAX, true }, 6456 { "__builtin_alpha_minsw4", ALPHA_BUILTIN_MINSW4, MASK_MAX, true }, 6457 { "__builtin_alpha_maxub8", ALPHA_BUILTIN_MAXUB8, MASK_MAX, true }, 6458 { "__builtin_alpha_maxsb8", ALPHA_BUILTIN_MAXSB8, MASK_MAX, true }, 6459 { "__builtin_alpha_maxuw4", ALPHA_BUILTIN_MAXUW4, MASK_MAX, true }, 6460 { "__builtin_alpha_maxsw4", ALPHA_BUILTIN_MAXSW4, MASK_MAX, true }, 6461 { "__builtin_alpha_perr", ALPHA_BUILTIN_PERR, MASK_MAX, true } 6462 }; 6463 6464 static GTY(()) tree alpha_dimode_u; 6465 static GTY(()) tree alpha_v8qi_u; 6466 static GTY(()) tree alpha_v8qi_s; 6467 static GTY(()) tree alpha_v4hi_u; 6468 static GTY(()) tree alpha_v4hi_s; 6469 6470 static GTY(()) tree alpha_builtins[(int) ALPHA_BUILTIN_max]; 6471 6472 /* Return the alpha builtin for CODE. */ 6473 6474 static tree 6475 alpha_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED) 6476 { 6477 if (code >= ALPHA_BUILTIN_max) 6478 return error_mark_node; 6479 return alpha_builtins[code]; 6480 } 6481 6482 /* Helper function of alpha_init_builtins. Add the built-in specified 6483 by NAME, TYPE, CODE, and ECF. */ 6484 6485 static void 6486 alpha_builtin_function (const char *name, tree ftype, 6487 enum alpha_builtin code, unsigned ecf) 6488 { 6489 tree decl = add_builtin_function (name, ftype, (int) code, 6490 BUILT_IN_MD, NULL, NULL_TREE); 6491 6492 if (ecf & ECF_CONST) 6493 TREE_READONLY (decl) = 1; 6494 if (ecf & ECF_NOTHROW) 6495 TREE_NOTHROW (decl) = 1; 6496 6497 alpha_builtins [(int) code] = decl; 6498 } 6499 6500 /* Helper function of alpha_init_builtins. Add the COUNT built-in 6501 functions pointed to by P, with function type FTYPE. */ 6502 6503 static void 6504 alpha_add_builtins (const struct alpha_builtin_def *p, size_t count, 6505 tree ftype) 6506 { 6507 size_t i; 6508 6509 for (i = 0; i < count; ++i, ++p) 6510 if ((target_flags & p->target_mask) == p->target_mask) 6511 alpha_builtin_function (p->name, ftype, p->code, 6512 (p->is_const ? ECF_CONST : 0) | ECF_NOTHROW); 6513 } 6514 6515 static void 6516 alpha_init_builtins (void) 6517 { 6518 tree ftype; 6519 6520 alpha_dimode_u = lang_hooks.types.type_for_mode (DImode, 1); 6521 alpha_v8qi_u = build_vector_type (unsigned_intQI_type_node, 8); 6522 alpha_v8qi_s = build_vector_type (intQI_type_node, 8); 6523 alpha_v4hi_u = build_vector_type (unsigned_intHI_type_node, 4); 6524 alpha_v4hi_s = build_vector_type (intHI_type_node, 4); 6525 6526 ftype = build_function_type_list (alpha_dimode_u, NULL_TREE); 6527 alpha_add_builtins (zero_arg_builtins, ARRAY_SIZE (zero_arg_builtins), ftype); 6528 6529 ftype = build_function_type_list (alpha_dimode_u, alpha_dimode_u, NULL_TREE); 6530 alpha_add_builtins (one_arg_builtins, ARRAY_SIZE (one_arg_builtins), ftype); 6531 6532 ftype = build_function_type_list (alpha_dimode_u, alpha_dimode_u, 6533 alpha_dimode_u, NULL_TREE); 6534 alpha_add_builtins (two_arg_builtins, ARRAY_SIZE (two_arg_builtins), ftype); 6535 6536 if (TARGET_ABI_OPEN_VMS) 6537 { 6538 ftype = build_function_type_list (ptr_type_node, ptr_type_node, 6539 NULL_TREE); 6540 alpha_builtin_function ("__builtin_establish_vms_condition_handler", 6541 ftype, 6542 ALPHA_BUILTIN_ESTABLISH_VMS_CONDITION_HANDLER, 6543 0); 6544 6545 ftype = build_function_type_list (ptr_type_node, void_type_node, 6546 NULL_TREE); 6547 alpha_builtin_function ("__builtin_revert_vms_condition_handler", ftype, 6548 ALPHA_BUILTIN_REVERT_VMS_CONDITION_HANDLER, 0); 6549 6550 vms_patch_builtins (); 6551 } 6552 } 6553 6554 /* Expand an expression EXP that calls a built-in function, 6555 with result going to TARGET if that's convenient 6556 (and in mode MODE if that's convenient). 6557 SUBTARGET may be used as the target for computing one of EXP's operands. 6558 IGNORE is nonzero if the value is to be ignored. */ 6559 6560 static rtx 6561 alpha_expand_builtin (tree exp, rtx target, 6562 rtx subtarget ATTRIBUTE_UNUSED, 6563 enum machine_mode mode ATTRIBUTE_UNUSED, 6564 int ignore ATTRIBUTE_UNUSED) 6565 { 6566 #define MAX_ARGS 2 6567 6568 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); 6569 unsigned int fcode = DECL_FUNCTION_CODE (fndecl); 6570 tree arg; 6571 call_expr_arg_iterator iter; 6572 enum insn_code icode; 6573 rtx op[MAX_ARGS], pat; 6574 int arity; 6575 bool nonvoid; 6576 6577 if (fcode >= ALPHA_BUILTIN_max) 6578 internal_error ("bad builtin fcode"); 6579 icode = code_for_builtin[fcode]; 6580 if (icode == 0) 6581 internal_error ("bad builtin fcode"); 6582 6583 nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node; 6584 6585 arity = 0; 6586 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp) 6587 { 6588 const struct insn_operand_data *insn_op; 6589 6590 if (arg == error_mark_node) 6591 return NULL_RTX; 6592 if (arity > MAX_ARGS) 6593 return NULL_RTX; 6594 6595 insn_op = &insn_data[icode].operand[arity + nonvoid]; 6596 6597 op[arity] = expand_expr (arg, NULL_RTX, insn_op->mode, EXPAND_NORMAL); 6598 6599 if (!(*insn_op->predicate) (op[arity], insn_op->mode)) 6600 op[arity] = copy_to_mode_reg (insn_op->mode, op[arity]); 6601 arity++; 6602 } 6603 6604 if (nonvoid) 6605 { 6606 enum machine_mode tmode = insn_data[icode].operand[0].mode; 6607 if (!target 6608 || GET_MODE (target) != tmode 6609 || !(*insn_data[icode].operand[0].predicate) (target, tmode)) 6610 target = gen_reg_rtx (tmode); 6611 } 6612 6613 switch (arity) 6614 { 6615 case 0: 6616 pat = GEN_FCN (icode) (target); 6617 break; 6618 case 1: 6619 if (nonvoid) 6620 pat = GEN_FCN (icode) (target, op[0]); 6621 else 6622 pat = GEN_FCN (icode) (op[0]); 6623 break; 6624 case 2: 6625 pat = GEN_FCN (icode) (target, op[0], op[1]); 6626 break; 6627 default: 6628 gcc_unreachable (); 6629 } 6630 if (!pat) 6631 return NULL_RTX; 6632 emit_insn (pat); 6633 6634 if (nonvoid) 6635 return target; 6636 else 6637 return const0_rtx; 6638 } 6639 6640 6641 /* Several bits below assume HWI >= 64 bits. This should be enforced 6642 by config.gcc. */ 6643 #if HOST_BITS_PER_WIDE_INT < 64 6644 # error "HOST_WIDE_INT too small" 6645 #endif 6646 6647 /* Fold the builtin for the CMPBGE instruction. This is a vector comparison 6648 with an 8-bit output vector. OPINT contains the integer operands; bit N 6649 of OP_CONST is set if OPINT[N] is valid. */ 6650 6651 static tree 6652 alpha_fold_builtin_cmpbge (unsigned HOST_WIDE_INT opint[], long op_const) 6653 { 6654 if (op_const == 3) 6655 { 6656 int i, val; 6657 for (i = 0, val = 0; i < 8; ++i) 6658 { 6659 unsigned HOST_WIDE_INT c0 = (opint[0] >> (i * 8)) & 0xff; 6660 unsigned HOST_WIDE_INT c1 = (opint[1] >> (i * 8)) & 0xff; 6661 if (c0 >= c1) 6662 val |= 1 << i; 6663 } 6664 return build_int_cst (alpha_dimode_u, val); 6665 } 6666 else if (op_const == 2 && opint[1] == 0) 6667 return build_int_cst (alpha_dimode_u, 0xff); 6668 return NULL; 6669 } 6670 6671 /* Fold the builtin for the ZAPNOT instruction. This is essentially a 6672 specialized form of an AND operation. Other byte manipulation instructions 6673 are defined in terms of this instruction, so this is also used as a 6674 subroutine for other builtins. 6675 6676 OP contains the tree operands; OPINT contains the extracted integer values. 6677 Bit N of OP_CONST it set if OPINT[N] is valid. OP may be null if only 6678 OPINT may be considered. */ 6679 6680 static tree 6681 alpha_fold_builtin_zapnot (tree *op, unsigned HOST_WIDE_INT opint[], 6682 long op_const) 6683 { 6684 if (op_const & 2) 6685 { 6686 unsigned HOST_WIDE_INT mask = 0; 6687 int i; 6688 6689 for (i = 0; i < 8; ++i) 6690 if ((opint[1] >> i) & 1) 6691 mask |= (unsigned HOST_WIDE_INT)0xff << (i * 8); 6692 6693 if (op_const & 1) 6694 return build_int_cst (alpha_dimode_u, opint[0] & mask); 6695 6696 if (op) 6697 return fold_build2 (BIT_AND_EXPR, alpha_dimode_u, op[0], 6698 build_int_cst (alpha_dimode_u, mask)); 6699 } 6700 else if ((op_const & 1) && opint[0] == 0) 6701 return build_int_cst (alpha_dimode_u, 0); 6702 return NULL; 6703 } 6704 6705 /* Fold the builtins for the EXT family of instructions. */ 6706 6707 static tree 6708 alpha_fold_builtin_extxx (tree op[], unsigned HOST_WIDE_INT opint[], 6709 long op_const, unsigned HOST_WIDE_INT bytemask, 6710 bool is_high) 6711 { 6712 long zap_const = 2; 6713 tree *zap_op = NULL; 6714 6715 if (op_const & 2) 6716 { 6717 unsigned HOST_WIDE_INT loc; 6718 6719 loc = opint[1] & 7; 6720 loc *= BITS_PER_UNIT; 6721 6722 if (loc != 0) 6723 { 6724 if (op_const & 1) 6725 { 6726 unsigned HOST_WIDE_INT temp = opint[0]; 6727 if (is_high) 6728 temp <<= loc; 6729 else 6730 temp >>= loc; 6731 opint[0] = temp; 6732 zap_const = 3; 6733 } 6734 } 6735 else 6736 zap_op = op; 6737 } 6738 6739 opint[1] = bytemask; 6740 return alpha_fold_builtin_zapnot (zap_op, opint, zap_const); 6741 } 6742 6743 /* Fold the builtins for the INS family of instructions. */ 6744 6745 static tree 6746 alpha_fold_builtin_insxx (tree op[], unsigned HOST_WIDE_INT opint[], 6747 long op_const, unsigned HOST_WIDE_INT bytemask, 6748 bool is_high) 6749 { 6750 if ((op_const & 1) && opint[0] == 0) 6751 return build_int_cst (alpha_dimode_u, 0); 6752 6753 if (op_const & 2) 6754 { 6755 unsigned HOST_WIDE_INT temp, loc, byteloc; 6756 tree *zap_op = NULL; 6757 6758 loc = opint[1] & 7; 6759 bytemask <<= loc; 6760 6761 temp = opint[0]; 6762 if (is_high) 6763 { 6764 byteloc = (64 - (loc * 8)) & 0x3f; 6765 if (byteloc == 0) 6766 zap_op = op; 6767 else 6768 temp >>= byteloc; 6769 bytemask >>= 8; 6770 } 6771 else 6772 { 6773 byteloc = loc * 8; 6774 if (byteloc == 0) 6775 zap_op = op; 6776 else 6777 temp <<= byteloc; 6778 } 6779 6780 opint[0] = temp; 6781 opint[1] = bytemask; 6782 return alpha_fold_builtin_zapnot (zap_op, opint, op_const); 6783 } 6784 6785 return NULL; 6786 } 6787 6788 static tree 6789 alpha_fold_builtin_mskxx (tree op[], unsigned HOST_WIDE_INT opint[], 6790 long op_const, unsigned HOST_WIDE_INT bytemask, 6791 bool is_high) 6792 { 6793 if (op_const & 2) 6794 { 6795 unsigned HOST_WIDE_INT loc; 6796 6797 loc = opint[1] & 7; 6798 bytemask <<= loc; 6799 6800 if (is_high) 6801 bytemask >>= 8; 6802 6803 opint[1] = bytemask ^ 0xff; 6804 } 6805 6806 return alpha_fold_builtin_zapnot (op, opint, op_const); 6807 } 6808 6809 static tree 6810 alpha_fold_vector_minmax (enum tree_code code, tree op[], tree vtype) 6811 { 6812 tree op0 = fold_convert (vtype, op[0]); 6813 tree op1 = fold_convert (vtype, op[1]); 6814 tree val = fold_build2 (code, vtype, op0, op1); 6815 return fold_build1 (VIEW_CONVERT_EXPR, alpha_dimode_u, val); 6816 } 6817 6818 static tree 6819 alpha_fold_builtin_perr (unsigned HOST_WIDE_INT opint[], long op_const) 6820 { 6821 unsigned HOST_WIDE_INT temp = 0; 6822 int i; 6823 6824 if (op_const != 3) 6825 return NULL; 6826 6827 for (i = 0; i < 8; ++i) 6828 { 6829 unsigned HOST_WIDE_INT a = (opint[0] >> (i * 8)) & 0xff; 6830 unsigned HOST_WIDE_INT b = (opint[1] >> (i * 8)) & 0xff; 6831 if (a >= b) 6832 temp += a - b; 6833 else 6834 temp += b - a; 6835 } 6836 6837 return build_int_cst (alpha_dimode_u, temp); 6838 } 6839 6840 static tree 6841 alpha_fold_builtin_pklb (unsigned HOST_WIDE_INT opint[], long op_const) 6842 { 6843 unsigned HOST_WIDE_INT temp; 6844 6845 if (op_const == 0) 6846 return NULL; 6847 6848 temp = opint[0] & 0xff; 6849 temp |= (opint[0] >> 24) & 0xff00; 6850 6851 return build_int_cst (alpha_dimode_u, temp); 6852 } 6853 6854 static tree 6855 alpha_fold_builtin_pkwb (unsigned HOST_WIDE_INT opint[], long op_const) 6856 { 6857 unsigned HOST_WIDE_INT temp; 6858 6859 if (op_const == 0) 6860 return NULL; 6861 6862 temp = opint[0] & 0xff; 6863 temp |= (opint[0] >> 8) & 0xff00; 6864 temp |= (opint[0] >> 16) & 0xff0000; 6865 temp |= (opint[0] >> 24) & 0xff000000; 6866 6867 return build_int_cst (alpha_dimode_u, temp); 6868 } 6869 6870 static tree 6871 alpha_fold_builtin_unpkbl (unsigned HOST_WIDE_INT opint[], long op_const) 6872 { 6873 unsigned HOST_WIDE_INT temp; 6874 6875 if (op_const == 0) 6876 return NULL; 6877 6878 temp = opint[0] & 0xff; 6879 temp |= (opint[0] & 0xff00) << 24; 6880 6881 return build_int_cst (alpha_dimode_u, temp); 6882 } 6883 6884 static tree 6885 alpha_fold_builtin_unpkbw (unsigned HOST_WIDE_INT opint[], long op_const) 6886 { 6887 unsigned HOST_WIDE_INT temp; 6888 6889 if (op_const == 0) 6890 return NULL; 6891 6892 temp = opint[0] & 0xff; 6893 temp |= (opint[0] & 0x0000ff00) << 8; 6894 temp |= (opint[0] & 0x00ff0000) << 16; 6895 temp |= (opint[0] & 0xff000000) << 24; 6896 6897 return build_int_cst (alpha_dimode_u, temp); 6898 } 6899 6900 static tree 6901 alpha_fold_builtin_cttz (unsigned HOST_WIDE_INT opint[], long op_const) 6902 { 6903 unsigned HOST_WIDE_INT temp; 6904 6905 if (op_const == 0) 6906 return NULL; 6907 6908 if (opint[0] == 0) 6909 temp = 64; 6910 else 6911 temp = exact_log2 (opint[0] & -opint[0]); 6912 6913 return build_int_cst (alpha_dimode_u, temp); 6914 } 6915 6916 static tree 6917 alpha_fold_builtin_ctlz (unsigned HOST_WIDE_INT opint[], long op_const) 6918 { 6919 unsigned HOST_WIDE_INT temp; 6920 6921 if (op_const == 0) 6922 return NULL; 6923 6924 if (opint[0] == 0) 6925 temp = 64; 6926 else 6927 temp = 64 - floor_log2 (opint[0]) - 1; 6928 6929 return build_int_cst (alpha_dimode_u, temp); 6930 } 6931 6932 static tree 6933 alpha_fold_builtin_ctpop (unsigned HOST_WIDE_INT opint[], long op_const) 6934 { 6935 unsigned HOST_WIDE_INT temp, op; 6936 6937 if (op_const == 0) 6938 return NULL; 6939 6940 op = opint[0]; 6941 temp = 0; 6942 while (op) 6943 temp++, op &= op - 1; 6944 6945 return build_int_cst (alpha_dimode_u, temp); 6946 } 6947 6948 /* Fold one of our builtin functions. */ 6949 6950 static tree 6951 alpha_fold_builtin (tree fndecl, int n_args, tree *op, 6952 bool ignore ATTRIBUTE_UNUSED) 6953 { 6954 unsigned HOST_WIDE_INT opint[MAX_ARGS]; 6955 long op_const = 0; 6956 int i; 6957 6958 if (n_args > MAX_ARGS) 6959 return NULL; 6960 6961 for (i = 0; i < n_args; i++) 6962 { 6963 tree arg = op[i]; 6964 if (arg == error_mark_node) 6965 return NULL; 6966 6967 opint[i] = 0; 6968 if (TREE_CODE (arg) == INTEGER_CST) 6969 { 6970 op_const |= 1L << i; 6971 opint[i] = int_cst_value (arg); 6972 } 6973 } 6974 6975 switch (DECL_FUNCTION_CODE (fndecl)) 6976 { 6977 case ALPHA_BUILTIN_CMPBGE: 6978 return alpha_fold_builtin_cmpbge (opint, op_const); 6979 6980 case ALPHA_BUILTIN_EXTBL: 6981 return alpha_fold_builtin_extxx (op, opint, op_const, 0x01, false); 6982 case ALPHA_BUILTIN_EXTWL: 6983 return alpha_fold_builtin_extxx (op, opint, op_const, 0x03, false); 6984 case ALPHA_BUILTIN_EXTLL: 6985 return alpha_fold_builtin_extxx (op, opint, op_const, 0x0f, false); 6986 case ALPHA_BUILTIN_EXTQL: 6987 return alpha_fold_builtin_extxx (op, opint, op_const, 0xff, false); 6988 case ALPHA_BUILTIN_EXTWH: 6989 return alpha_fold_builtin_extxx (op, opint, op_const, 0x03, true); 6990 case ALPHA_BUILTIN_EXTLH: 6991 return alpha_fold_builtin_extxx (op, opint, op_const, 0x0f, true); 6992 case ALPHA_BUILTIN_EXTQH: 6993 return alpha_fold_builtin_extxx (op, opint, op_const, 0xff, true); 6994 6995 case ALPHA_BUILTIN_INSBL: 6996 return alpha_fold_builtin_insxx (op, opint, op_const, 0x01, false); 6997 case ALPHA_BUILTIN_INSWL: 6998 return alpha_fold_builtin_insxx (op, opint, op_const, 0x03, false); 6999 case ALPHA_BUILTIN_INSLL: 7000 return alpha_fold_builtin_insxx (op, opint, op_const, 0x0f, false); 7001 case ALPHA_BUILTIN_INSQL: 7002 return alpha_fold_builtin_insxx (op, opint, op_const, 0xff, false); 7003 case ALPHA_BUILTIN_INSWH: 7004 return alpha_fold_builtin_insxx (op, opint, op_const, 0x03, true); 7005 case ALPHA_BUILTIN_INSLH: 7006 return alpha_fold_builtin_insxx (op, opint, op_const, 0x0f, true); 7007 case ALPHA_BUILTIN_INSQH: 7008 return alpha_fold_builtin_insxx (op, opint, op_const, 0xff, true); 7009 7010 case ALPHA_BUILTIN_MSKBL: 7011 return alpha_fold_builtin_mskxx (op, opint, op_const, 0x01, false); 7012 case ALPHA_BUILTIN_MSKWL: 7013 return alpha_fold_builtin_mskxx (op, opint, op_const, 0x03, false); 7014 case ALPHA_BUILTIN_MSKLL: 7015 return alpha_fold_builtin_mskxx (op, opint, op_const, 0x0f, false); 7016 case ALPHA_BUILTIN_MSKQL: 7017 return alpha_fold_builtin_mskxx (op, opint, op_const, 0xff, false); 7018 case ALPHA_BUILTIN_MSKWH: 7019 return alpha_fold_builtin_mskxx (op, opint, op_const, 0x03, true); 7020 case ALPHA_BUILTIN_MSKLH: 7021 return alpha_fold_builtin_mskxx (op, opint, op_const, 0x0f, true); 7022 case ALPHA_BUILTIN_MSKQH: 7023 return alpha_fold_builtin_mskxx (op, opint, op_const, 0xff, true); 7024 7025 case ALPHA_BUILTIN_UMULH: 7026 return fold_build2 (MULT_HIGHPART_EXPR, alpha_dimode_u, op[0], op[1]); 7027 7028 case ALPHA_BUILTIN_ZAP: 7029 opint[1] ^= 0xff; 7030 /* FALLTHRU */ 7031 case ALPHA_BUILTIN_ZAPNOT: 7032 return alpha_fold_builtin_zapnot (op, opint, op_const); 7033 7034 case ALPHA_BUILTIN_MINUB8: 7035 return alpha_fold_vector_minmax (MIN_EXPR, op, alpha_v8qi_u); 7036 case ALPHA_BUILTIN_MINSB8: 7037 return alpha_fold_vector_minmax (MIN_EXPR, op, alpha_v8qi_s); 7038 case ALPHA_BUILTIN_MINUW4: 7039 return alpha_fold_vector_minmax (MIN_EXPR, op, alpha_v4hi_u); 7040 case ALPHA_BUILTIN_MINSW4: 7041 return alpha_fold_vector_minmax (MIN_EXPR, op, alpha_v4hi_s); 7042 case ALPHA_BUILTIN_MAXUB8: 7043 return alpha_fold_vector_minmax (MAX_EXPR, op, alpha_v8qi_u); 7044 case ALPHA_BUILTIN_MAXSB8: 7045 return alpha_fold_vector_minmax (MAX_EXPR, op, alpha_v8qi_s); 7046 case ALPHA_BUILTIN_MAXUW4: 7047 return alpha_fold_vector_minmax (MAX_EXPR, op, alpha_v4hi_u); 7048 case ALPHA_BUILTIN_MAXSW4: 7049 return alpha_fold_vector_minmax (MAX_EXPR, op, alpha_v4hi_s); 7050 7051 case ALPHA_BUILTIN_PERR: 7052 return alpha_fold_builtin_perr (opint, op_const); 7053 case ALPHA_BUILTIN_PKLB: 7054 return alpha_fold_builtin_pklb (opint, op_const); 7055 case ALPHA_BUILTIN_PKWB: 7056 return alpha_fold_builtin_pkwb (opint, op_const); 7057 case ALPHA_BUILTIN_UNPKBL: 7058 return alpha_fold_builtin_unpkbl (opint, op_const); 7059 case ALPHA_BUILTIN_UNPKBW: 7060 return alpha_fold_builtin_unpkbw (opint, op_const); 7061 7062 case ALPHA_BUILTIN_CTTZ: 7063 return alpha_fold_builtin_cttz (opint, op_const); 7064 case ALPHA_BUILTIN_CTLZ: 7065 return alpha_fold_builtin_ctlz (opint, op_const); 7066 case ALPHA_BUILTIN_CTPOP: 7067 return alpha_fold_builtin_ctpop (opint, op_const); 7068 7069 case ALPHA_BUILTIN_AMASK: 7070 case ALPHA_BUILTIN_IMPLVER: 7071 case ALPHA_BUILTIN_RPCC: 7072 /* None of these are foldable at compile-time. */ 7073 default: 7074 return NULL; 7075 } 7076 } 7077 7078 /* This page contains routines that are used to determine what the function 7079 prologue and epilogue code will do and write them out. */ 7080 7081 /* Compute the size of the save area in the stack. */ 7082 7083 /* These variables are used for communication between the following functions. 7084 They indicate various things about the current function being compiled 7085 that are used to tell what kind of prologue, epilogue and procedure 7086 descriptor to generate. */ 7087 7088 /* Nonzero if we need a stack procedure. */ 7089 enum alpha_procedure_types {PT_NULL = 0, PT_REGISTER = 1, PT_STACK = 2}; 7090 static enum alpha_procedure_types alpha_procedure_type; 7091 7092 /* Register number (either FP or SP) that is used to unwind the frame. */ 7093 static int vms_unwind_regno; 7094 7095 /* Register number used to save FP. We need not have one for RA since 7096 we don't modify it for register procedures. This is only defined 7097 for register frame procedures. */ 7098 static int vms_save_fp_regno; 7099 7100 /* Register number used to reference objects off our PV. */ 7101 static int vms_base_regno; 7102 7103 /* Compute register masks for saved registers. */ 7104 7105 static void 7106 alpha_sa_mask (unsigned long *imaskP, unsigned long *fmaskP) 7107 { 7108 unsigned long imask = 0; 7109 unsigned long fmask = 0; 7110 unsigned int i; 7111 7112 /* When outputting a thunk, we don't have valid register life info, 7113 but assemble_start_function wants to output .frame and .mask 7114 directives. */ 7115 if (cfun->is_thunk) 7116 { 7117 *imaskP = 0; 7118 *fmaskP = 0; 7119 return; 7120 } 7121 7122 if (TARGET_ABI_OPEN_VMS && alpha_procedure_type == PT_STACK) 7123 imask |= (1UL << HARD_FRAME_POINTER_REGNUM); 7124 7125 /* One for every register we have to save. */ 7126 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) 7127 if (! fixed_regs[i] && ! call_used_regs[i] 7128 && df_regs_ever_live_p (i) && i != REG_RA) 7129 { 7130 if (i < 32) 7131 imask |= (1UL << i); 7132 else 7133 fmask |= (1UL << (i - 32)); 7134 } 7135 7136 /* We need to restore these for the handler. */ 7137 if (crtl->calls_eh_return) 7138 { 7139 for (i = 0; ; ++i) 7140 { 7141 unsigned regno = EH_RETURN_DATA_REGNO (i); 7142 if (regno == INVALID_REGNUM) 7143 break; 7144 imask |= 1UL << regno; 7145 } 7146 } 7147 7148 /* If any register spilled, then spill the return address also. */ 7149 /* ??? This is required by the Digital stack unwind specification 7150 and isn't needed if we're doing Dwarf2 unwinding. */ 7151 if (imask || fmask || alpha_ra_ever_killed ()) 7152 imask |= (1UL << REG_RA); 7153 7154 *imaskP = imask; 7155 *fmaskP = fmask; 7156 } 7157 7158 int 7159 alpha_sa_size (void) 7160 { 7161 unsigned long mask[2]; 7162 int sa_size = 0; 7163 int i, j; 7164 7165 alpha_sa_mask (&mask[0], &mask[1]); 7166 7167 for (j = 0; j < 2; ++j) 7168 for (i = 0; i < 32; ++i) 7169 if ((mask[j] >> i) & 1) 7170 sa_size++; 7171 7172 if (TARGET_ABI_OPEN_VMS) 7173 { 7174 /* Start with a stack procedure if we make any calls (REG_RA used), or 7175 need a frame pointer, with a register procedure if we otherwise need 7176 at least a slot, and with a null procedure in other cases. */ 7177 if ((mask[0] >> REG_RA) & 1 || frame_pointer_needed) 7178 alpha_procedure_type = PT_STACK; 7179 else if (get_frame_size() != 0) 7180 alpha_procedure_type = PT_REGISTER; 7181 else 7182 alpha_procedure_type = PT_NULL; 7183 7184 /* Don't reserve space for saving FP & RA yet. Do that later after we've 7185 made the final decision on stack procedure vs register procedure. */ 7186 if (alpha_procedure_type == PT_STACK) 7187 sa_size -= 2; 7188 7189 /* Decide whether to refer to objects off our PV via FP or PV. 7190 If we need FP for something else or if we receive a nonlocal 7191 goto (which expects PV to contain the value), we must use PV. 7192 Otherwise, start by assuming we can use FP. */ 7193 7194 vms_base_regno 7195 = (frame_pointer_needed 7196 || cfun->has_nonlocal_label 7197 || alpha_procedure_type == PT_STACK 7198 || crtl->outgoing_args_size) 7199 ? REG_PV : HARD_FRAME_POINTER_REGNUM; 7200 7201 /* If we want to copy PV into FP, we need to find some register 7202 in which to save FP. */ 7203 7204 vms_save_fp_regno = -1; 7205 if (vms_base_regno == HARD_FRAME_POINTER_REGNUM) 7206 for (i = 0; i < 32; i++) 7207 if (! fixed_regs[i] && call_used_regs[i] && ! df_regs_ever_live_p (i)) 7208 vms_save_fp_regno = i; 7209 7210 /* A VMS condition handler requires a stack procedure in our 7211 implementation. (not required by the calling standard). */ 7212 if ((vms_save_fp_regno == -1 && alpha_procedure_type == PT_REGISTER) 7213 || cfun->machine->uses_condition_handler) 7214 vms_base_regno = REG_PV, alpha_procedure_type = PT_STACK; 7215 else if (alpha_procedure_type == PT_NULL) 7216 vms_base_regno = REG_PV; 7217 7218 /* Stack unwinding should be done via FP unless we use it for PV. */ 7219 vms_unwind_regno = (vms_base_regno == REG_PV 7220 ? HARD_FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM); 7221 7222 /* If this is a stack procedure, allow space for saving FP, RA and 7223 a condition handler slot if needed. */ 7224 if (alpha_procedure_type == PT_STACK) 7225 sa_size += 2 + cfun->machine->uses_condition_handler; 7226 } 7227 else 7228 { 7229 /* Our size must be even (multiple of 16 bytes). */ 7230 if (sa_size & 1) 7231 sa_size++; 7232 } 7233 7234 return sa_size * 8; 7235 } 7236 7237 /* Define the offset between two registers, one to be eliminated, 7238 and the other its replacement, at the start of a routine. */ 7239 7240 HOST_WIDE_INT 7241 alpha_initial_elimination_offset (unsigned int from, 7242 unsigned int to ATTRIBUTE_UNUSED) 7243 { 7244 HOST_WIDE_INT ret; 7245 7246 ret = alpha_sa_size (); 7247 ret += ALPHA_ROUND (crtl->outgoing_args_size); 7248 7249 switch (from) 7250 { 7251 case FRAME_POINTER_REGNUM: 7252 break; 7253 7254 case ARG_POINTER_REGNUM: 7255 ret += (ALPHA_ROUND (get_frame_size () 7256 + crtl->args.pretend_args_size) 7257 - crtl->args.pretend_args_size); 7258 break; 7259 7260 default: 7261 gcc_unreachable (); 7262 } 7263 7264 return ret; 7265 } 7266 7267 #if TARGET_ABI_OPEN_VMS 7268 7269 /* Worker function for TARGET_CAN_ELIMINATE. */ 7270 7271 static bool 7272 alpha_vms_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to) 7273 { 7274 /* We need the alpha_procedure_type to decide. Evaluate it now. */ 7275 alpha_sa_size (); 7276 7277 switch (alpha_procedure_type) 7278 { 7279 case PT_NULL: 7280 /* NULL procedures have no frame of their own and we only 7281 know how to resolve from the current stack pointer. */ 7282 return to == STACK_POINTER_REGNUM; 7283 7284 case PT_REGISTER: 7285 case PT_STACK: 7286 /* We always eliminate except to the stack pointer if there is no 7287 usable frame pointer at hand. */ 7288 return (to != STACK_POINTER_REGNUM 7289 || vms_unwind_regno != HARD_FRAME_POINTER_REGNUM); 7290 } 7291 7292 gcc_unreachable (); 7293 } 7294 7295 /* FROM is to be eliminated for TO. Return the offset so that TO+offset 7296 designates the same location as FROM. */ 7297 7298 HOST_WIDE_INT 7299 alpha_vms_initial_elimination_offset (unsigned int from, unsigned int to) 7300 { 7301 /* The only possible attempts we ever expect are ARG or FRAME_PTR to 7302 HARD_FRAME or STACK_PTR. We need the alpha_procedure_type to decide 7303 on the proper computations and will need the register save area size 7304 in most cases. */ 7305 7306 HOST_WIDE_INT sa_size = alpha_sa_size (); 7307 7308 /* PT_NULL procedures have no frame of their own and we only allow 7309 elimination to the stack pointer. This is the argument pointer and we 7310 resolve the soft frame pointer to that as well. */ 7311 7312 if (alpha_procedure_type == PT_NULL) 7313 return 0; 7314 7315 /* For a PT_STACK procedure the frame layout looks as follows 7316 7317 -----> decreasing addresses 7318 7319 < size rounded up to 16 | likewise > 7320 --------------#------------------------------+++--------------+++-------# 7321 incoming args # pretended args | "frame" | regs sa | PV | outgoing args # 7322 --------------#---------------------------------------------------------# 7323 ^ ^ ^ ^ 7324 ARG_PTR FRAME_PTR HARD_FRAME_PTR STACK_PTR 7325 7326 7327 PT_REGISTER procedures are similar in that they may have a frame of their 7328 own. They have no regs-sa/pv/outgoing-args area. 7329 7330 We first compute offset to HARD_FRAME_PTR, then add what we need to get 7331 to STACK_PTR if need be. */ 7332 7333 { 7334 HOST_WIDE_INT offset; 7335 HOST_WIDE_INT pv_save_size = alpha_procedure_type == PT_STACK ? 8 : 0; 7336 7337 switch (from) 7338 { 7339 case FRAME_POINTER_REGNUM: 7340 offset = ALPHA_ROUND (sa_size + pv_save_size); 7341 break; 7342 case ARG_POINTER_REGNUM: 7343 offset = (ALPHA_ROUND (sa_size + pv_save_size 7344 + get_frame_size () 7345 + crtl->args.pretend_args_size) 7346 - crtl->args.pretend_args_size); 7347 break; 7348 default: 7349 gcc_unreachable (); 7350 } 7351 7352 if (to == STACK_POINTER_REGNUM) 7353 offset += ALPHA_ROUND (crtl->outgoing_args_size); 7354 7355 return offset; 7356 } 7357 } 7358 7359 #define COMMON_OBJECT "common_object" 7360 7361 static tree 7362 common_object_handler (tree *node, tree name ATTRIBUTE_UNUSED, 7363 tree args ATTRIBUTE_UNUSED, int flags ATTRIBUTE_UNUSED, 7364 bool *no_add_attrs ATTRIBUTE_UNUSED) 7365 { 7366 tree decl = *node; 7367 gcc_assert (DECL_P (decl)); 7368 7369 DECL_COMMON (decl) = 1; 7370 return NULL_TREE; 7371 } 7372 7373 static const struct attribute_spec vms_attribute_table[] = 7374 { 7375 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler, 7376 affects_type_identity } */ 7377 { COMMON_OBJECT, 0, 1, true, false, false, common_object_handler, false }, 7378 { NULL, 0, 0, false, false, false, NULL, false } 7379 }; 7380 7381 void 7382 vms_output_aligned_decl_common(FILE *file, tree decl, const char *name, 7383 unsigned HOST_WIDE_INT size, 7384 unsigned int align) 7385 { 7386 tree attr = DECL_ATTRIBUTES (decl); 7387 fprintf (file, "%s", COMMON_ASM_OP); 7388 assemble_name (file, name); 7389 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED, size); 7390 /* ??? Unlike on OSF/1, the alignment factor is not in log units. */ 7391 fprintf (file, ",%u", align / BITS_PER_UNIT); 7392 if (attr) 7393 { 7394 attr = lookup_attribute (COMMON_OBJECT, attr); 7395 if (attr) 7396 fprintf (file, ",%s", 7397 IDENTIFIER_POINTER (TREE_VALUE (TREE_VALUE (attr)))); 7398 } 7399 fputc ('\n', file); 7400 } 7401 7402 #undef COMMON_OBJECT 7403 7404 #endif 7405 7406 static int 7407 find_lo_sum_using_gp (rtx *px, void *data ATTRIBUTE_UNUSED) 7408 { 7409 return GET_CODE (*px) == LO_SUM && XEXP (*px, 0) == pic_offset_table_rtx; 7410 } 7411 7412 int 7413 alpha_find_lo_sum_using_gp (rtx insn) 7414 { 7415 return for_each_rtx (&PATTERN (insn), find_lo_sum_using_gp, NULL) > 0; 7416 } 7417 7418 static int 7419 alpha_does_function_need_gp (void) 7420 { 7421 rtx insn; 7422 7423 /* The GP being variable is an OSF abi thing. */ 7424 if (! TARGET_ABI_OSF) 7425 return 0; 7426 7427 /* We need the gp to load the address of __mcount. */ 7428 if (TARGET_PROFILING_NEEDS_GP && crtl->profile) 7429 return 1; 7430 7431 /* The code emitted by alpha_output_mi_thunk_osf uses the gp. */ 7432 if (cfun->is_thunk) 7433 return 1; 7434 7435 /* The nonlocal receiver pattern assumes that the gp is valid for 7436 the nested function. Reasonable because it's almost always set 7437 correctly already. For the cases where that's wrong, make sure 7438 the nested function loads its gp on entry. */ 7439 if (crtl->has_nonlocal_goto) 7440 return 1; 7441 7442 /* If we need a GP (we have a LDSYM insn or a CALL_INSN), load it first. 7443 Even if we are a static function, we still need to do this in case 7444 our address is taken and passed to something like qsort. */ 7445 7446 push_topmost_sequence (); 7447 insn = get_insns (); 7448 pop_topmost_sequence (); 7449 7450 for (; insn; insn = NEXT_INSN (insn)) 7451 if (NONDEBUG_INSN_P (insn) 7452 && ! JUMP_TABLE_DATA_P (insn) 7453 && GET_CODE (PATTERN (insn)) != USE 7454 && GET_CODE (PATTERN (insn)) != CLOBBER 7455 && get_attr_usegp (insn)) 7456 return 1; 7457 7458 return 0; 7459 } 7460 7461 7462 /* Helper function to set RTX_FRAME_RELATED_P on instructions, including 7463 sequences. */ 7464 7465 static rtx 7466 set_frame_related_p (void) 7467 { 7468 rtx seq = get_insns (); 7469 rtx insn; 7470 7471 end_sequence (); 7472 7473 if (!seq) 7474 return NULL_RTX; 7475 7476 if (INSN_P (seq)) 7477 { 7478 insn = seq; 7479 while (insn != NULL_RTX) 7480 { 7481 RTX_FRAME_RELATED_P (insn) = 1; 7482 insn = NEXT_INSN (insn); 7483 } 7484 seq = emit_insn (seq); 7485 } 7486 else 7487 { 7488 seq = emit_insn (seq); 7489 RTX_FRAME_RELATED_P (seq) = 1; 7490 } 7491 return seq; 7492 } 7493 7494 #define FRP(exp) (start_sequence (), exp, set_frame_related_p ()) 7495 7496 /* Generates a store with the proper unwind info attached. VALUE is 7497 stored at BASE_REG+BASE_OFS. If FRAME_BIAS is nonzero, then BASE_REG 7498 contains SP+FRAME_BIAS, and that is the unwind info that should be 7499 generated. If FRAME_REG != VALUE, then VALUE is being stored on 7500 behalf of FRAME_REG, and FRAME_REG should be present in the unwind. */ 7501 7502 static void 7503 emit_frame_store_1 (rtx value, rtx base_reg, HOST_WIDE_INT frame_bias, 7504 HOST_WIDE_INT base_ofs, rtx frame_reg) 7505 { 7506 rtx addr, mem, insn; 7507 7508 addr = plus_constant (Pmode, base_reg, base_ofs); 7509 mem = gen_frame_mem (DImode, addr); 7510 7511 insn = emit_move_insn (mem, value); 7512 RTX_FRAME_RELATED_P (insn) = 1; 7513 7514 if (frame_bias || value != frame_reg) 7515 { 7516 if (frame_bias) 7517 { 7518 addr = plus_constant (Pmode, stack_pointer_rtx, 7519 frame_bias + base_ofs); 7520 mem = gen_rtx_MEM (DImode, addr); 7521 } 7522 7523 add_reg_note (insn, REG_FRAME_RELATED_EXPR, 7524 gen_rtx_SET (VOIDmode, mem, frame_reg)); 7525 } 7526 } 7527 7528 static void 7529 emit_frame_store (unsigned int regno, rtx base_reg, 7530 HOST_WIDE_INT frame_bias, HOST_WIDE_INT base_ofs) 7531 { 7532 rtx reg = gen_rtx_REG (DImode, regno); 7533 emit_frame_store_1 (reg, base_reg, frame_bias, base_ofs, reg); 7534 } 7535 7536 /* Compute the frame size. SIZE is the size of the "naked" frame 7537 and SA_SIZE is the size of the register save area. */ 7538 7539 static HOST_WIDE_INT 7540 compute_frame_size (HOST_WIDE_INT size, HOST_WIDE_INT sa_size) 7541 { 7542 if (TARGET_ABI_OPEN_VMS) 7543 return ALPHA_ROUND (sa_size 7544 + (alpha_procedure_type == PT_STACK ? 8 : 0) 7545 + size 7546 + crtl->args.pretend_args_size); 7547 else 7548 return ALPHA_ROUND (crtl->outgoing_args_size) 7549 + sa_size 7550 + ALPHA_ROUND (size 7551 + crtl->args.pretend_args_size); 7552 } 7553 7554 /* Write function prologue. */ 7555 7556 /* On vms we have two kinds of functions: 7557 7558 - stack frame (PROC_STACK) 7559 these are 'normal' functions with local vars and which are 7560 calling other functions 7561 - register frame (PROC_REGISTER) 7562 keeps all data in registers, needs no stack 7563 7564 We must pass this to the assembler so it can generate the 7565 proper pdsc (procedure descriptor) 7566 This is done with the '.pdesc' command. 7567 7568 On not-vms, we don't really differentiate between the two, as we can 7569 simply allocate stack without saving registers. */ 7570 7571 void 7572 alpha_expand_prologue (void) 7573 { 7574 /* Registers to save. */ 7575 unsigned long imask = 0; 7576 unsigned long fmask = 0; 7577 /* Stack space needed for pushing registers clobbered by us. */ 7578 HOST_WIDE_INT sa_size, sa_bias; 7579 /* Complete stack size needed. */ 7580 HOST_WIDE_INT frame_size; 7581 /* Probed stack size; it additionally includes the size of 7582 the "reserve region" if any. */ 7583 HOST_WIDE_INT probed_size; 7584 /* Offset from base reg to register save area. */ 7585 HOST_WIDE_INT reg_offset; 7586 rtx sa_reg; 7587 int i; 7588 7589 sa_size = alpha_sa_size (); 7590 frame_size = compute_frame_size (get_frame_size (), sa_size); 7591 7592 if (flag_stack_usage_info) 7593 current_function_static_stack_size = frame_size; 7594 7595 if (TARGET_ABI_OPEN_VMS) 7596 reg_offset = 8 + 8 * cfun->machine->uses_condition_handler; 7597 else 7598 reg_offset = ALPHA_ROUND (crtl->outgoing_args_size); 7599 7600 alpha_sa_mask (&imask, &fmask); 7601 7602 /* Emit an insn to reload GP, if needed. */ 7603 if (TARGET_ABI_OSF) 7604 { 7605 alpha_function_needs_gp = alpha_does_function_need_gp (); 7606 if (alpha_function_needs_gp) 7607 emit_insn (gen_prologue_ldgp ()); 7608 } 7609 7610 /* TARGET_PROFILING_NEEDS_GP actually implies that we need to insert 7611 the call to mcount ourselves, rather than having the linker do it 7612 magically in response to -pg. Since _mcount has special linkage, 7613 don't represent the call as a call. */ 7614 if (TARGET_PROFILING_NEEDS_GP && crtl->profile) 7615 emit_insn (gen_prologue_mcount ()); 7616 7617 /* Adjust the stack by the frame size. If the frame size is > 4096 7618 bytes, we need to be sure we probe somewhere in the first and last 7619 4096 bytes (we can probably get away without the latter test) and 7620 every 8192 bytes in between. If the frame size is > 32768, we 7621 do this in a loop. Otherwise, we generate the explicit probe 7622 instructions. 7623 7624 Note that we are only allowed to adjust sp once in the prologue. */ 7625 7626 probed_size = frame_size; 7627 if (flag_stack_check) 7628 probed_size += STACK_CHECK_PROTECT; 7629 7630 if (probed_size <= 32768) 7631 { 7632 if (probed_size > 4096) 7633 { 7634 int probed; 7635 7636 for (probed = 4096; probed < probed_size; probed += 8192) 7637 emit_insn (gen_probe_stack (GEN_INT (-probed))); 7638 7639 /* We only have to do this probe if we aren't saving registers or 7640 if we are probing beyond the frame because of -fstack-check. */ 7641 if ((sa_size == 0 && probed_size > probed - 4096) 7642 || flag_stack_check) 7643 emit_insn (gen_probe_stack (GEN_INT (-probed_size))); 7644 } 7645 7646 if (frame_size != 0) 7647 FRP (emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx, 7648 GEN_INT (-frame_size)))); 7649 } 7650 else 7651 { 7652 /* Here we generate code to set R22 to SP + 4096 and set R23 to the 7653 number of 8192 byte blocks to probe. We then probe each block 7654 in the loop and then set SP to the proper location. If the 7655 amount remaining is > 4096, we have to do one more probe if we 7656 are not saving any registers or if we are probing beyond the 7657 frame because of -fstack-check. */ 7658 7659 HOST_WIDE_INT blocks = (probed_size + 4096) / 8192; 7660 HOST_WIDE_INT leftover = probed_size + 4096 - blocks * 8192; 7661 rtx ptr = gen_rtx_REG (DImode, 22); 7662 rtx count = gen_rtx_REG (DImode, 23); 7663 rtx seq; 7664 7665 emit_move_insn (count, GEN_INT (blocks)); 7666 emit_insn (gen_adddi3 (ptr, stack_pointer_rtx, GEN_INT (4096))); 7667 7668 /* Because of the difficulty in emitting a new basic block this 7669 late in the compilation, generate the loop as a single insn. */ 7670 emit_insn (gen_prologue_stack_probe_loop (count, ptr)); 7671 7672 if ((leftover > 4096 && sa_size == 0) || flag_stack_check) 7673 { 7674 rtx last = gen_rtx_MEM (DImode, 7675 plus_constant (Pmode, ptr, -leftover)); 7676 MEM_VOLATILE_P (last) = 1; 7677 emit_move_insn (last, const0_rtx); 7678 } 7679 7680 if (flag_stack_check) 7681 { 7682 /* If -fstack-check is specified we have to load the entire 7683 constant into a register and subtract from the sp in one go, 7684 because the probed stack size is not equal to the frame size. */ 7685 HOST_WIDE_INT lo, hi; 7686 lo = ((frame_size & 0xffff) ^ 0x8000) - 0x8000; 7687 hi = frame_size - lo; 7688 7689 emit_move_insn (ptr, GEN_INT (hi)); 7690 emit_insn (gen_adddi3 (ptr, ptr, GEN_INT (lo))); 7691 seq = emit_insn (gen_subdi3 (stack_pointer_rtx, stack_pointer_rtx, 7692 ptr)); 7693 } 7694 else 7695 { 7696 seq = emit_insn (gen_adddi3 (stack_pointer_rtx, ptr, 7697 GEN_INT (-leftover))); 7698 } 7699 7700 /* This alternative is special, because the DWARF code cannot 7701 possibly intuit through the loop above. So we invent this 7702 note it looks at instead. */ 7703 RTX_FRAME_RELATED_P (seq) = 1; 7704 add_reg_note (seq, REG_FRAME_RELATED_EXPR, 7705 gen_rtx_SET (VOIDmode, stack_pointer_rtx, 7706 plus_constant (Pmode, stack_pointer_rtx, 7707 -frame_size))); 7708 } 7709 7710 /* Cope with very large offsets to the register save area. */ 7711 sa_bias = 0; 7712 sa_reg = stack_pointer_rtx; 7713 if (reg_offset + sa_size > 0x8000) 7714 { 7715 int low = ((reg_offset & 0xffff) ^ 0x8000) - 0x8000; 7716 rtx sa_bias_rtx; 7717 7718 if (low + sa_size <= 0x8000) 7719 sa_bias = reg_offset - low, reg_offset = low; 7720 else 7721 sa_bias = reg_offset, reg_offset = 0; 7722 7723 sa_reg = gen_rtx_REG (DImode, 24); 7724 sa_bias_rtx = GEN_INT (sa_bias); 7725 7726 if (add_operand (sa_bias_rtx, DImode)) 7727 emit_insn (gen_adddi3 (sa_reg, stack_pointer_rtx, sa_bias_rtx)); 7728 else 7729 { 7730 emit_move_insn (sa_reg, sa_bias_rtx); 7731 emit_insn (gen_adddi3 (sa_reg, stack_pointer_rtx, sa_reg)); 7732 } 7733 } 7734 7735 /* Save regs in stack order. Beginning with VMS PV. */ 7736 if (TARGET_ABI_OPEN_VMS && alpha_procedure_type == PT_STACK) 7737 emit_frame_store (REG_PV, stack_pointer_rtx, 0, 0); 7738 7739 /* Save register RA next. */ 7740 if (imask & (1UL << REG_RA)) 7741 { 7742 emit_frame_store (REG_RA, sa_reg, sa_bias, reg_offset); 7743 imask &= ~(1UL << REG_RA); 7744 reg_offset += 8; 7745 } 7746 7747 /* Now save any other registers required to be saved. */ 7748 for (i = 0; i < 31; i++) 7749 if (imask & (1UL << i)) 7750 { 7751 emit_frame_store (i, sa_reg, sa_bias, reg_offset); 7752 reg_offset += 8; 7753 } 7754 7755 for (i = 0; i < 31; i++) 7756 if (fmask & (1UL << i)) 7757 { 7758 emit_frame_store (i+32, sa_reg, sa_bias, reg_offset); 7759 reg_offset += 8; 7760 } 7761 7762 if (TARGET_ABI_OPEN_VMS) 7763 { 7764 /* Register frame procedures save the fp. */ 7765 if (alpha_procedure_type == PT_REGISTER) 7766 { 7767 rtx insn = emit_move_insn (gen_rtx_REG (DImode, vms_save_fp_regno), 7768 hard_frame_pointer_rtx); 7769 add_reg_note (insn, REG_CFA_REGISTER, NULL); 7770 RTX_FRAME_RELATED_P (insn) = 1; 7771 } 7772 7773 if (alpha_procedure_type != PT_NULL && vms_base_regno != REG_PV) 7774 emit_insn (gen_force_movdi (gen_rtx_REG (DImode, vms_base_regno), 7775 gen_rtx_REG (DImode, REG_PV))); 7776 7777 if (alpha_procedure_type != PT_NULL 7778 && vms_unwind_regno == HARD_FRAME_POINTER_REGNUM) 7779 FRP (emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx)); 7780 7781 /* If we have to allocate space for outgoing args, do it now. */ 7782 if (crtl->outgoing_args_size != 0) 7783 { 7784 rtx seq 7785 = emit_move_insn (stack_pointer_rtx, 7786 plus_constant 7787 (Pmode, hard_frame_pointer_rtx, 7788 - (ALPHA_ROUND 7789 (crtl->outgoing_args_size)))); 7790 7791 /* Only set FRAME_RELATED_P on the stack adjustment we just emitted 7792 if ! frame_pointer_needed. Setting the bit will change the CFA 7793 computation rule to use sp again, which would be wrong if we had 7794 frame_pointer_needed, as this means sp might move unpredictably 7795 later on. 7796 7797 Also, note that 7798 frame_pointer_needed 7799 => vms_unwind_regno == HARD_FRAME_POINTER_REGNUM 7800 and 7801 crtl->outgoing_args_size != 0 7802 => alpha_procedure_type != PT_NULL, 7803 7804 so when we are not setting the bit here, we are guaranteed to 7805 have emitted an FRP frame pointer update just before. */ 7806 RTX_FRAME_RELATED_P (seq) = ! frame_pointer_needed; 7807 } 7808 } 7809 else 7810 { 7811 /* If we need a frame pointer, set it from the stack pointer. */ 7812 if (frame_pointer_needed) 7813 { 7814 if (TARGET_CAN_FAULT_IN_PROLOGUE) 7815 FRP (emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx)); 7816 else 7817 /* This must always be the last instruction in the 7818 prologue, thus we emit a special move + clobber. */ 7819 FRP (emit_insn (gen_init_fp (hard_frame_pointer_rtx, 7820 stack_pointer_rtx, sa_reg))); 7821 } 7822 } 7823 7824 /* The ABIs for VMS and OSF/1 say that while we can schedule insns into 7825 the prologue, for exception handling reasons, we cannot do this for 7826 any insn that might fault. We could prevent this for mems with a 7827 (clobber:BLK (scratch)), but this doesn't work for fp insns. So we 7828 have to prevent all such scheduling with a blockage. 7829 7830 Linux, on the other hand, never bothered to implement OSF/1's 7831 exception handling, and so doesn't care about such things. Anyone 7832 planning to use dwarf2 frame-unwind info can also omit the blockage. */ 7833 7834 if (! TARGET_CAN_FAULT_IN_PROLOGUE) 7835 emit_insn (gen_blockage ()); 7836 } 7837 7838 /* Count the number of .file directives, so that .loc is up to date. */ 7839 int num_source_filenames = 0; 7840 7841 /* Output the textual info surrounding the prologue. */ 7842 7843 void 7844 alpha_start_function (FILE *file, const char *fnname, 7845 tree decl ATTRIBUTE_UNUSED) 7846 { 7847 unsigned long imask = 0; 7848 unsigned long fmask = 0; 7849 /* Stack space needed for pushing registers clobbered by us. */ 7850 HOST_WIDE_INT sa_size; 7851 /* Complete stack size needed. */ 7852 unsigned HOST_WIDE_INT frame_size; 7853 /* The maximum debuggable frame size. */ 7854 unsigned HOST_WIDE_INT max_frame_size = 1UL << 31; 7855 /* Offset from base reg to register save area. */ 7856 HOST_WIDE_INT reg_offset; 7857 char *entry_label = (char *) alloca (strlen (fnname) + 6); 7858 char *tramp_label = (char *) alloca (strlen (fnname) + 6); 7859 int i; 7860 7861 #if TARGET_ABI_OPEN_VMS 7862 vms_start_function (fnname); 7863 #endif 7864 7865 alpha_fnname = fnname; 7866 sa_size = alpha_sa_size (); 7867 frame_size = compute_frame_size (get_frame_size (), sa_size); 7868 7869 if (TARGET_ABI_OPEN_VMS) 7870 reg_offset = 8 + 8 * cfun->machine->uses_condition_handler; 7871 else 7872 reg_offset = ALPHA_ROUND (crtl->outgoing_args_size); 7873 7874 alpha_sa_mask (&imask, &fmask); 7875 7876 /* Issue function start and label. */ 7877 if (TARGET_ABI_OPEN_VMS || !flag_inhibit_size_directive) 7878 { 7879 fputs ("\t.ent ", file); 7880 assemble_name (file, fnname); 7881 putc ('\n', file); 7882 7883 /* If the function needs GP, we'll write the "..ng" label there. 7884 Otherwise, do it here. */ 7885 if (TARGET_ABI_OSF 7886 && ! alpha_function_needs_gp 7887 && ! cfun->is_thunk) 7888 { 7889 putc ('$', file); 7890 assemble_name (file, fnname); 7891 fputs ("..ng:\n", file); 7892 } 7893 } 7894 /* Nested functions on VMS that are potentially called via trampoline 7895 get a special transfer entry point that loads the called functions 7896 procedure descriptor and static chain. */ 7897 if (TARGET_ABI_OPEN_VMS 7898 && !TREE_PUBLIC (decl) 7899 && DECL_CONTEXT (decl) 7900 && !TYPE_P (DECL_CONTEXT (decl)) 7901 && TREE_CODE (DECL_CONTEXT (decl)) != TRANSLATION_UNIT_DECL) 7902 { 7903 strcpy (tramp_label, fnname); 7904 strcat (tramp_label, "..tr"); 7905 ASM_OUTPUT_LABEL (file, tramp_label); 7906 fprintf (file, "\tldq $1,24($27)\n"); 7907 fprintf (file, "\tldq $27,16($27)\n"); 7908 } 7909 7910 strcpy (entry_label, fnname); 7911 if (TARGET_ABI_OPEN_VMS) 7912 strcat (entry_label, "..en"); 7913 7914 ASM_OUTPUT_LABEL (file, entry_label); 7915 inside_function = TRUE; 7916 7917 if (TARGET_ABI_OPEN_VMS) 7918 fprintf (file, "\t.base $%d\n", vms_base_regno); 7919 7920 if (TARGET_ABI_OSF 7921 && TARGET_IEEE_CONFORMANT 7922 && !flag_inhibit_size_directive) 7923 { 7924 /* Set flags in procedure descriptor to request IEEE-conformant 7925 math-library routines. The value we set it to is PDSC_EXC_IEEE 7926 (/usr/include/pdsc.h). */ 7927 fputs ("\t.eflag 48\n", file); 7928 } 7929 7930 /* Set up offsets to alpha virtual arg/local debugging pointer. */ 7931 alpha_auto_offset = -frame_size + crtl->args.pretend_args_size; 7932 alpha_arg_offset = -frame_size + 48; 7933 7934 /* Describe our frame. If the frame size is larger than an integer, 7935 print it as zero to avoid an assembler error. We won't be 7936 properly describing such a frame, but that's the best we can do. */ 7937 if (TARGET_ABI_OPEN_VMS) 7938 fprintf (file, "\t.frame $%d," HOST_WIDE_INT_PRINT_DEC ",$26," 7939 HOST_WIDE_INT_PRINT_DEC "\n", 7940 vms_unwind_regno, 7941 frame_size >= (1UL << 31) ? 0 : frame_size, 7942 reg_offset); 7943 else if (!flag_inhibit_size_directive) 7944 fprintf (file, "\t.frame $%d," HOST_WIDE_INT_PRINT_DEC ",$26,%d\n", 7945 (frame_pointer_needed 7946 ? HARD_FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM), 7947 frame_size >= max_frame_size ? 0 : frame_size, 7948 crtl->args.pretend_args_size); 7949 7950 /* Describe which registers were spilled. */ 7951 if (TARGET_ABI_OPEN_VMS) 7952 { 7953 if (imask) 7954 /* ??? Does VMS care if mask contains ra? The old code didn't 7955 set it, so I don't here. */ 7956 fprintf (file, "\t.mask 0x%lx,0\n", imask & ~(1UL << REG_RA)); 7957 if (fmask) 7958 fprintf (file, "\t.fmask 0x%lx,0\n", fmask); 7959 if (alpha_procedure_type == PT_REGISTER) 7960 fprintf (file, "\t.fp_save $%d\n", vms_save_fp_regno); 7961 } 7962 else if (!flag_inhibit_size_directive) 7963 { 7964 if (imask) 7965 { 7966 fprintf (file, "\t.mask 0x%lx," HOST_WIDE_INT_PRINT_DEC "\n", imask, 7967 frame_size >= max_frame_size ? 0 : reg_offset - frame_size); 7968 7969 for (i = 0; i < 32; ++i) 7970 if (imask & (1UL << i)) 7971 reg_offset += 8; 7972 } 7973 7974 if (fmask) 7975 fprintf (file, "\t.fmask 0x%lx," HOST_WIDE_INT_PRINT_DEC "\n", fmask, 7976 frame_size >= max_frame_size ? 0 : reg_offset - frame_size); 7977 } 7978 7979 #if TARGET_ABI_OPEN_VMS 7980 /* If a user condition handler has been installed at some point, emit 7981 the procedure descriptor bits to point the Condition Handling Facility 7982 at the indirection wrapper, and state the fp offset at which the user 7983 handler may be found. */ 7984 if (cfun->machine->uses_condition_handler) 7985 { 7986 fprintf (file, "\t.handler __gcc_shell_handler\n"); 7987 fprintf (file, "\t.handler_data %d\n", VMS_COND_HANDLER_FP_OFFSET); 7988 } 7989 7990 #ifdef TARGET_VMS_CRASH_DEBUG 7991 /* Support of minimal traceback info. */ 7992 switch_to_section (readonly_data_section); 7993 fprintf (file, "\t.align 3\n"); 7994 assemble_name (file, fnname); fputs ("..na:\n", file); 7995 fputs ("\t.ascii \"", file); 7996 assemble_name (file, fnname); 7997 fputs ("\\0\"\n", file); 7998 switch_to_section (text_section); 7999 #endif 8000 #endif /* TARGET_ABI_OPEN_VMS */ 8001 } 8002 8003 /* Emit the .prologue note at the scheduled end of the prologue. */ 8004 8005 static void 8006 alpha_output_function_end_prologue (FILE *file) 8007 { 8008 if (TARGET_ABI_OPEN_VMS) 8009 fputs ("\t.prologue\n", file); 8010 else if (!flag_inhibit_size_directive) 8011 fprintf (file, "\t.prologue %d\n", 8012 alpha_function_needs_gp || cfun->is_thunk); 8013 } 8014 8015 /* Write function epilogue. */ 8016 8017 void 8018 alpha_expand_epilogue (void) 8019 { 8020 /* Registers to save. */ 8021 unsigned long imask = 0; 8022 unsigned long fmask = 0; 8023 /* Stack space needed for pushing registers clobbered by us. */ 8024 HOST_WIDE_INT sa_size; 8025 /* Complete stack size needed. */ 8026 HOST_WIDE_INT frame_size; 8027 /* Offset from base reg to register save area. */ 8028 HOST_WIDE_INT reg_offset; 8029 int fp_is_frame_pointer, fp_offset; 8030 rtx sa_reg, sa_reg_exp = NULL; 8031 rtx sp_adj1, sp_adj2, mem, reg, insn; 8032 rtx eh_ofs; 8033 rtx cfa_restores = NULL_RTX; 8034 int i; 8035 8036 sa_size = alpha_sa_size (); 8037 frame_size = compute_frame_size (get_frame_size (), sa_size); 8038 8039 if (TARGET_ABI_OPEN_VMS) 8040 { 8041 if (alpha_procedure_type == PT_STACK) 8042 reg_offset = 8 + 8 * cfun->machine->uses_condition_handler; 8043 else 8044 reg_offset = 0; 8045 } 8046 else 8047 reg_offset = ALPHA_ROUND (crtl->outgoing_args_size); 8048 8049 alpha_sa_mask (&imask, &fmask); 8050 8051 fp_is_frame_pointer 8052 = (TARGET_ABI_OPEN_VMS 8053 ? alpha_procedure_type == PT_STACK 8054 : frame_pointer_needed); 8055 fp_offset = 0; 8056 sa_reg = stack_pointer_rtx; 8057 8058 if (crtl->calls_eh_return) 8059 eh_ofs = EH_RETURN_STACKADJ_RTX; 8060 else 8061 eh_ofs = NULL_RTX; 8062 8063 if (sa_size) 8064 { 8065 /* If we have a frame pointer, restore SP from it. */ 8066 if (TARGET_ABI_OPEN_VMS 8067 ? vms_unwind_regno == HARD_FRAME_POINTER_REGNUM 8068 : frame_pointer_needed) 8069 emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx); 8070 8071 /* Cope with very large offsets to the register save area. */ 8072 if (reg_offset + sa_size > 0x8000) 8073 { 8074 int low = ((reg_offset & 0xffff) ^ 0x8000) - 0x8000; 8075 HOST_WIDE_INT bias; 8076 8077 if (low + sa_size <= 0x8000) 8078 bias = reg_offset - low, reg_offset = low; 8079 else 8080 bias = reg_offset, reg_offset = 0; 8081 8082 sa_reg = gen_rtx_REG (DImode, 22); 8083 sa_reg_exp = plus_constant (Pmode, stack_pointer_rtx, bias); 8084 8085 emit_move_insn (sa_reg, sa_reg_exp); 8086 } 8087 8088 /* Restore registers in order, excepting a true frame pointer. */ 8089 8090 mem = gen_frame_mem (DImode, plus_constant (Pmode, sa_reg, reg_offset)); 8091 reg = gen_rtx_REG (DImode, REG_RA); 8092 emit_move_insn (reg, mem); 8093 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores); 8094 8095 reg_offset += 8; 8096 imask &= ~(1UL << REG_RA); 8097 8098 for (i = 0; i < 31; ++i) 8099 if (imask & (1UL << i)) 8100 { 8101 if (i == HARD_FRAME_POINTER_REGNUM && fp_is_frame_pointer) 8102 fp_offset = reg_offset; 8103 else 8104 { 8105 mem = gen_frame_mem (DImode, 8106 plus_constant (Pmode, sa_reg, 8107 reg_offset)); 8108 reg = gen_rtx_REG (DImode, i); 8109 emit_move_insn (reg, mem); 8110 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, 8111 cfa_restores); 8112 } 8113 reg_offset += 8; 8114 } 8115 8116 for (i = 0; i < 31; ++i) 8117 if (fmask & (1UL << i)) 8118 { 8119 mem = gen_frame_mem (DFmode, plus_constant (Pmode, sa_reg, 8120 reg_offset)); 8121 reg = gen_rtx_REG (DFmode, i+32); 8122 emit_move_insn (reg, mem); 8123 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores); 8124 reg_offset += 8; 8125 } 8126 } 8127 8128 if (frame_size || eh_ofs) 8129 { 8130 sp_adj1 = stack_pointer_rtx; 8131 8132 if (eh_ofs) 8133 { 8134 sp_adj1 = gen_rtx_REG (DImode, 23); 8135 emit_move_insn (sp_adj1, 8136 gen_rtx_PLUS (Pmode, stack_pointer_rtx, eh_ofs)); 8137 } 8138 8139 /* If the stack size is large, begin computation into a temporary 8140 register so as not to interfere with a potential fp restore, 8141 which must be consecutive with an SP restore. */ 8142 if (frame_size < 32768 && !cfun->calls_alloca) 8143 sp_adj2 = GEN_INT (frame_size); 8144 else if (frame_size < 0x40007fffL) 8145 { 8146 int low = ((frame_size & 0xffff) ^ 0x8000) - 0x8000; 8147 8148 sp_adj2 = plus_constant (Pmode, sp_adj1, frame_size - low); 8149 if (sa_reg_exp && rtx_equal_p (sa_reg_exp, sp_adj2)) 8150 sp_adj1 = sa_reg; 8151 else 8152 { 8153 sp_adj1 = gen_rtx_REG (DImode, 23); 8154 emit_move_insn (sp_adj1, sp_adj2); 8155 } 8156 sp_adj2 = GEN_INT (low); 8157 } 8158 else 8159 { 8160 rtx tmp = gen_rtx_REG (DImode, 23); 8161 sp_adj2 = alpha_emit_set_const (tmp, DImode, frame_size, 3, false); 8162 if (!sp_adj2) 8163 { 8164 /* We can't drop new things to memory this late, afaik, 8165 so build it up by pieces. */ 8166 sp_adj2 = alpha_emit_set_long_const (tmp, frame_size, 8167 -(frame_size < 0)); 8168 gcc_assert (sp_adj2); 8169 } 8170 } 8171 8172 /* From now on, things must be in order. So emit blockages. */ 8173 8174 /* Restore the frame pointer. */ 8175 if (fp_is_frame_pointer) 8176 { 8177 emit_insn (gen_blockage ()); 8178 mem = gen_frame_mem (DImode, plus_constant (Pmode, sa_reg, 8179 fp_offset)); 8180 emit_move_insn (hard_frame_pointer_rtx, mem); 8181 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, 8182 hard_frame_pointer_rtx, cfa_restores); 8183 } 8184 else if (TARGET_ABI_OPEN_VMS) 8185 { 8186 emit_insn (gen_blockage ()); 8187 emit_move_insn (hard_frame_pointer_rtx, 8188 gen_rtx_REG (DImode, vms_save_fp_regno)); 8189 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, 8190 hard_frame_pointer_rtx, cfa_restores); 8191 } 8192 8193 /* Restore the stack pointer. */ 8194 emit_insn (gen_blockage ()); 8195 if (sp_adj2 == const0_rtx) 8196 insn = emit_move_insn (stack_pointer_rtx, sp_adj1); 8197 else 8198 insn = emit_move_insn (stack_pointer_rtx, 8199 gen_rtx_PLUS (DImode, sp_adj1, sp_adj2)); 8200 REG_NOTES (insn) = cfa_restores; 8201 add_reg_note (insn, REG_CFA_DEF_CFA, stack_pointer_rtx); 8202 RTX_FRAME_RELATED_P (insn) = 1; 8203 } 8204 else 8205 { 8206 gcc_assert (cfa_restores == NULL); 8207 8208 if (TARGET_ABI_OPEN_VMS && alpha_procedure_type == PT_REGISTER) 8209 { 8210 emit_insn (gen_blockage ()); 8211 insn = emit_move_insn (hard_frame_pointer_rtx, 8212 gen_rtx_REG (DImode, vms_save_fp_regno)); 8213 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx); 8214 RTX_FRAME_RELATED_P (insn) = 1; 8215 } 8216 } 8217 } 8218 8219 /* Output the rest of the textual info surrounding the epilogue. */ 8220 8221 void 8222 alpha_end_function (FILE *file, const char *fnname, tree decl ATTRIBUTE_UNUSED) 8223 { 8224 rtx insn; 8225 8226 /* We output a nop after noreturn calls at the very end of the function to 8227 ensure that the return address always remains in the caller's code range, 8228 as not doing so might confuse unwinding engines. */ 8229 insn = get_last_insn (); 8230 if (!INSN_P (insn)) 8231 insn = prev_active_insn (insn); 8232 if (insn && CALL_P (insn)) 8233 output_asm_insn (get_insn_template (CODE_FOR_nop, NULL), NULL); 8234 8235 #if TARGET_ABI_OPEN_VMS 8236 /* Write the linkage entries. */ 8237 alpha_write_linkage (file, fnname); 8238 #endif 8239 8240 /* End the function. */ 8241 if (TARGET_ABI_OPEN_VMS 8242 || !flag_inhibit_size_directive) 8243 { 8244 fputs ("\t.end ", file); 8245 assemble_name (file, fnname); 8246 putc ('\n', file); 8247 } 8248 inside_function = FALSE; 8249 } 8250 8251 #if TARGET_ABI_OSF 8252 /* Emit a tail call to FUNCTION after adjusting THIS by DELTA. 8253 8254 In order to avoid the hordes of differences between generated code 8255 with and without TARGET_EXPLICIT_RELOCS, and to avoid duplicating 8256 lots of code loading up large constants, generate rtl and emit it 8257 instead of going straight to text. 8258 8259 Not sure why this idea hasn't been explored before... */ 8260 8261 static void 8262 alpha_output_mi_thunk_osf (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED, 8263 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset, 8264 tree function) 8265 { 8266 HOST_WIDE_INT hi, lo; 8267 rtx this_rtx, insn, funexp; 8268 8269 /* We always require a valid GP. */ 8270 emit_insn (gen_prologue_ldgp ()); 8271 emit_note (NOTE_INSN_PROLOGUE_END); 8272 8273 /* Find the "this" pointer. If the function returns a structure, 8274 the structure return pointer is in $16. */ 8275 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)) 8276 this_rtx = gen_rtx_REG (Pmode, 17); 8277 else 8278 this_rtx = gen_rtx_REG (Pmode, 16); 8279 8280 /* Add DELTA. When possible we use ldah+lda. Otherwise load the 8281 entire constant for the add. */ 8282 lo = ((delta & 0xffff) ^ 0x8000) - 0x8000; 8283 hi = (((delta - lo) & 0xffffffff) ^ 0x80000000) - 0x80000000; 8284 if (hi + lo == delta) 8285 { 8286 if (hi) 8287 emit_insn (gen_adddi3 (this_rtx, this_rtx, GEN_INT (hi))); 8288 if (lo) 8289 emit_insn (gen_adddi3 (this_rtx, this_rtx, GEN_INT (lo))); 8290 } 8291 else 8292 { 8293 rtx tmp = alpha_emit_set_long_const (gen_rtx_REG (Pmode, 0), 8294 delta, -(delta < 0)); 8295 emit_insn (gen_adddi3 (this_rtx, this_rtx, tmp)); 8296 } 8297 8298 /* Add a delta stored in the vtable at VCALL_OFFSET. */ 8299 if (vcall_offset) 8300 { 8301 rtx tmp, tmp2; 8302 8303 tmp = gen_rtx_REG (Pmode, 0); 8304 emit_move_insn (tmp, gen_rtx_MEM (Pmode, this_rtx)); 8305 8306 lo = ((vcall_offset & 0xffff) ^ 0x8000) - 0x8000; 8307 hi = (((vcall_offset - lo) & 0xffffffff) ^ 0x80000000) - 0x80000000; 8308 if (hi + lo == vcall_offset) 8309 { 8310 if (hi) 8311 emit_insn (gen_adddi3 (tmp, tmp, GEN_INT (hi))); 8312 } 8313 else 8314 { 8315 tmp2 = alpha_emit_set_long_const (gen_rtx_REG (Pmode, 1), 8316 vcall_offset, -(vcall_offset < 0)); 8317 emit_insn (gen_adddi3 (tmp, tmp, tmp2)); 8318 lo = 0; 8319 } 8320 if (lo) 8321 tmp2 = gen_rtx_PLUS (Pmode, tmp, GEN_INT (lo)); 8322 else 8323 tmp2 = tmp; 8324 emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp2)); 8325 8326 emit_insn (gen_adddi3 (this_rtx, this_rtx, tmp)); 8327 } 8328 8329 /* Generate a tail call to the target function. */ 8330 if (! TREE_USED (function)) 8331 { 8332 assemble_external (function); 8333 TREE_USED (function) = 1; 8334 } 8335 funexp = XEXP (DECL_RTL (function), 0); 8336 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp); 8337 insn = emit_call_insn (gen_sibcall (funexp, const0_rtx)); 8338 SIBLING_CALL_P (insn) = 1; 8339 8340 /* Run just enough of rest_of_compilation to get the insns emitted. 8341 There's not really enough bulk here to make other passes such as 8342 instruction scheduling worth while. Note that use_thunk calls 8343 assemble_start_function and assemble_end_function. */ 8344 insn = get_insns (); 8345 shorten_branches (insn); 8346 final_start_function (insn, file, 1); 8347 final (insn, file, 1); 8348 final_end_function (); 8349 } 8350 #endif /* TARGET_ABI_OSF */ 8351 8352 /* Debugging support. */ 8353 8354 #include "gstab.h" 8355 8356 /* Name of the file containing the current function. */ 8357 8358 static const char *current_function_file = ""; 8359 8360 /* Offsets to alpha virtual arg/local debugging pointers. */ 8361 8362 long alpha_arg_offset; 8363 long alpha_auto_offset; 8364 8365 /* Emit a new filename to a stream. */ 8366 8367 void 8368 alpha_output_filename (FILE *stream, const char *name) 8369 { 8370 static int first_time = TRUE; 8371 8372 if (first_time) 8373 { 8374 first_time = FALSE; 8375 ++num_source_filenames; 8376 current_function_file = name; 8377 fprintf (stream, "\t.file\t%d ", num_source_filenames); 8378 output_quoted_string (stream, name); 8379 fprintf (stream, "\n"); 8380 } 8381 8382 else if (name != current_function_file 8383 && strcmp (name, current_function_file) != 0) 8384 { 8385 ++num_source_filenames; 8386 current_function_file = name; 8387 fprintf (stream, "\t.file\t%d ", num_source_filenames); 8388 8389 output_quoted_string (stream, name); 8390 fprintf (stream, "\n"); 8391 } 8392 } 8393 8394 /* Structure to show the current status of registers and memory. */ 8395 8396 struct shadow_summary 8397 { 8398 struct { 8399 unsigned int i : 31; /* Mask of int regs */ 8400 unsigned int fp : 31; /* Mask of fp regs */ 8401 unsigned int mem : 1; /* mem == imem | fpmem */ 8402 } used, defd; 8403 }; 8404 8405 /* Summary the effects of expression X on the machine. Update SUM, a pointer 8406 to the summary structure. SET is nonzero if the insn is setting the 8407 object, otherwise zero. */ 8408 8409 static void 8410 summarize_insn (rtx x, struct shadow_summary *sum, int set) 8411 { 8412 const char *format_ptr; 8413 int i, j; 8414 8415 if (x == 0) 8416 return; 8417 8418 switch (GET_CODE (x)) 8419 { 8420 /* ??? Note that this case would be incorrect if the Alpha had a 8421 ZERO_EXTRACT in SET_DEST. */ 8422 case SET: 8423 summarize_insn (SET_SRC (x), sum, 0); 8424 summarize_insn (SET_DEST (x), sum, 1); 8425 break; 8426 8427 case CLOBBER: 8428 summarize_insn (XEXP (x, 0), sum, 1); 8429 break; 8430 8431 case USE: 8432 summarize_insn (XEXP (x, 0), sum, 0); 8433 break; 8434 8435 case ASM_OPERANDS: 8436 for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; i--) 8437 summarize_insn (ASM_OPERANDS_INPUT (x, i), sum, 0); 8438 break; 8439 8440 case PARALLEL: 8441 for (i = XVECLEN (x, 0) - 1; i >= 0; i--) 8442 summarize_insn (XVECEXP (x, 0, i), sum, 0); 8443 break; 8444 8445 case SUBREG: 8446 summarize_insn (SUBREG_REG (x), sum, 0); 8447 break; 8448 8449 case REG: 8450 { 8451 int regno = REGNO (x); 8452 unsigned long mask = ((unsigned long) 1) << (regno % 32); 8453 8454 if (regno == 31 || regno == 63) 8455 break; 8456 8457 if (set) 8458 { 8459 if (regno < 32) 8460 sum->defd.i |= mask; 8461 else 8462 sum->defd.fp |= mask; 8463 } 8464 else 8465 { 8466 if (regno < 32) 8467 sum->used.i |= mask; 8468 else 8469 sum->used.fp |= mask; 8470 } 8471 } 8472 break; 8473 8474 case MEM: 8475 if (set) 8476 sum->defd.mem = 1; 8477 else 8478 sum->used.mem = 1; 8479 8480 /* Find the regs used in memory address computation: */ 8481 summarize_insn (XEXP (x, 0), sum, 0); 8482 break; 8483 8484 case CONST_INT: case CONST_DOUBLE: 8485 case SYMBOL_REF: case LABEL_REF: case CONST: 8486 case SCRATCH: case ASM_INPUT: 8487 break; 8488 8489 /* Handle common unary and binary ops for efficiency. */ 8490 case COMPARE: case PLUS: case MINUS: case MULT: case DIV: 8491 case MOD: case UDIV: case UMOD: case AND: case IOR: 8492 case XOR: case ASHIFT: case ROTATE: case ASHIFTRT: case LSHIFTRT: 8493 case ROTATERT: case SMIN: case SMAX: case UMIN: case UMAX: 8494 case NE: case EQ: case GE: case GT: case LE: 8495 case LT: case GEU: case GTU: case LEU: case LTU: 8496 summarize_insn (XEXP (x, 0), sum, 0); 8497 summarize_insn (XEXP (x, 1), sum, 0); 8498 break; 8499 8500 case NEG: case NOT: case SIGN_EXTEND: case ZERO_EXTEND: 8501 case TRUNCATE: case FLOAT_EXTEND: case FLOAT_TRUNCATE: case FLOAT: 8502 case FIX: case UNSIGNED_FLOAT: case UNSIGNED_FIX: case ABS: 8503 case SQRT: case FFS: 8504 summarize_insn (XEXP (x, 0), sum, 0); 8505 break; 8506 8507 default: 8508 format_ptr = GET_RTX_FORMAT (GET_CODE (x)); 8509 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--) 8510 switch (format_ptr[i]) 8511 { 8512 case 'e': 8513 summarize_insn (XEXP (x, i), sum, 0); 8514 break; 8515 8516 case 'E': 8517 for (j = XVECLEN (x, i) - 1; j >= 0; j--) 8518 summarize_insn (XVECEXP (x, i, j), sum, 0); 8519 break; 8520 8521 case 'i': 8522 break; 8523 8524 default: 8525 gcc_unreachable (); 8526 } 8527 } 8528 } 8529 8530 /* Ensure a sufficient number of `trapb' insns are in the code when 8531 the user requests code with a trap precision of functions or 8532 instructions. 8533 8534 In naive mode, when the user requests a trap-precision of 8535 "instruction", a trapb is needed after every instruction that may 8536 generate a trap. This ensures that the code is resumption safe but 8537 it is also slow. 8538 8539 When optimizations are turned on, we delay issuing a trapb as long 8540 as possible. In this context, a trap shadow is the sequence of 8541 instructions that starts with a (potentially) trap generating 8542 instruction and extends to the next trapb or call_pal instruction 8543 (but GCC never generates call_pal by itself). We can delay (and 8544 therefore sometimes omit) a trapb subject to the following 8545 conditions: 8546 8547 (a) On entry to the trap shadow, if any Alpha register or memory 8548 location contains a value that is used as an operand value by some 8549 instruction in the trap shadow (live on entry), then no instruction 8550 in the trap shadow may modify the register or memory location. 8551 8552 (b) Within the trap shadow, the computation of the base register 8553 for a memory load or store instruction may not involve using the 8554 result of an instruction that might generate an UNPREDICTABLE 8555 result. 8556 8557 (c) Within the trap shadow, no register may be used more than once 8558 as a destination register. (This is to make life easier for the 8559 trap-handler.) 8560 8561 (d) The trap shadow may not include any branch instructions. */ 8562 8563 static void 8564 alpha_handle_trap_shadows (void) 8565 { 8566 struct shadow_summary shadow; 8567 int trap_pending, exception_nesting; 8568 rtx i, n; 8569 8570 trap_pending = 0; 8571 exception_nesting = 0; 8572 shadow.used.i = 0; 8573 shadow.used.fp = 0; 8574 shadow.used.mem = 0; 8575 shadow.defd = shadow.used; 8576 8577 for (i = get_insns (); i ; i = NEXT_INSN (i)) 8578 { 8579 if (NOTE_P (i)) 8580 { 8581 switch (NOTE_KIND (i)) 8582 { 8583 case NOTE_INSN_EH_REGION_BEG: 8584 exception_nesting++; 8585 if (trap_pending) 8586 goto close_shadow; 8587 break; 8588 8589 case NOTE_INSN_EH_REGION_END: 8590 exception_nesting--; 8591 if (trap_pending) 8592 goto close_shadow; 8593 break; 8594 8595 case NOTE_INSN_EPILOGUE_BEG: 8596 if (trap_pending && alpha_tp >= ALPHA_TP_FUNC) 8597 goto close_shadow; 8598 break; 8599 } 8600 } 8601 else if (trap_pending) 8602 { 8603 if (alpha_tp == ALPHA_TP_FUNC) 8604 { 8605 if (JUMP_P (i) 8606 && GET_CODE (PATTERN (i)) == RETURN) 8607 goto close_shadow; 8608 } 8609 else if (alpha_tp == ALPHA_TP_INSN) 8610 { 8611 if (optimize > 0) 8612 { 8613 struct shadow_summary sum; 8614 8615 sum.used.i = 0; 8616 sum.used.fp = 0; 8617 sum.used.mem = 0; 8618 sum.defd = sum.used; 8619 8620 switch (GET_CODE (i)) 8621 { 8622 case INSN: 8623 /* Annoyingly, get_attr_trap will die on these. */ 8624 if (GET_CODE (PATTERN (i)) == USE 8625 || GET_CODE (PATTERN (i)) == CLOBBER) 8626 break; 8627 8628 summarize_insn (PATTERN (i), &sum, 0); 8629 8630 if ((sum.defd.i & shadow.defd.i) 8631 || (sum.defd.fp & shadow.defd.fp)) 8632 { 8633 /* (c) would be violated */ 8634 goto close_shadow; 8635 } 8636 8637 /* Combine shadow with summary of current insn: */ 8638 shadow.used.i |= sum.used.i; 8639 shadow.used.fp |= sum.used.fp; 8640 shadow.used.mem |= sum.used.mem; 8641 shadow.defd.i |= sum.defd.i; 8642 shadow.defd.fp |= sum.defd.fp; 8643 shadow.defd.mem |= sum.defd.mem; 8644 8645 if ((sum.defd.i & shadow.used.i) 8646 || (sum.defd.fp & shadow.used.fp) 8647 || (sum.defd.mem & shadow.used.mem)) 8648 { 8649 /* (a) would be violated (also takes care of (b)) */ 8650 gcc_assert (get_attr_trap (i) != TRAP_YES 8651 || (!(sum.defd.i & sum.used.i) 8652 && !(sum.defd.fp & sum.used.fp))); 8653 8654 goto close_shadow; 8655 } 8656 break; 8657 8658 case BARRIER: 8659 /* __builtin_unreachable can expand to no code at all, 8660 leaving (barrier) RTXes in the instruction stream. */ 8661 goto close_shadow_notrapb; 8662 8663 case JUMP_INSN: 8664 case CALL_INSN: 8665 case CODE_LABEL: 8666 goto close_shadow; 8667 8668 default: 8669 gcc_unreachable (); 8670 } 8671 } 8672 else 8673 { 8674 close_shadow: 8675 n = emit_insn_before (gen_trapb (), i); 8676 PUT_MODE (n, TImode); 8677 PUT_MODE (i, TImode); 8678 close_shadow_notrapb: 8679 trap_pending = 0; 8680 shadow.used.i = 0; 8681 shadow.used.fp = 0; 8682 shadow.used.mem = 0; 8683 shadow.defd = shadow.used; 8684 } 8685 } 8686 } 8687 8688 if ((exception_nesting > 0 || alpha_tp >= ALPHA_TP_FUNC) 8689 && NONJUMP_INSN_P (i) 8690 && GET_CODE (PATTERN (i)) != USE 8691 && GET_CODE (PATTERN (i)) != CLOBBER 8692 && get_attr_trap (i) == TRAP_YES) 8693 { 8694 if (optimize && !trap_pending) 8695 summarize_insn (PATTERN (i), &shadow, 0); 8696 trap_pending = 1; 8697 } 8698 } 8699 } 8700 8701 /* Alpha can only issue instruction groups simultaneously if they are 8702 suitably aligned. This is very processor-specific. */ 8703 /* There are a number of entries in alphaev4_insn_pipe and alphaev5_insn_pipe 8704 that are marked "fake". These instructions do not exist on that target, 8705 but it is possible to see these insns with deranged combinations of 8706 command-line options, such as "-mtune=ev4 -mmax". Instead of aborting, 8707 choose a result at random. */ 8708 8709 enum alphaev4_pipe { 8710 EV4_STOP = 0, 8711 EV4_IB0 = 1, 8712 EV4_IB1 = 2, 8713 EV4_IBX = 4 8714 }; 8715 8716 enum alphaev5_pipe { 8717 EV5_STOP = 0, 8718 EV5_NONE = 1, 8719 EV5_E01 = 2, 8720 EV5_E0 = 4, 8721 EV5_E1 = 8, 8722 EV5_FAM = 16, 8723 EV5_FA = 32, 8724 EV5_FM = 64 8725 }; 8726 8727 static enum alphaev4_pipe 8728 alphaev4_insn_pipe (rtx insn) 8729 { 8730 if (recog_memoized (insn) < 0) 8731 return EV4_STOP; 8732 if (get_attr_length (insn) != 4) 8733 return EV4_STOP; 8734 8735 switch (get_attr_type (insn)) 8736 { 8737 case TYPE_ILD: 8738 case TYPE_LDSYM: 8739 case TYPE_FLD: 8740 case TYPE_LD_L: 8741 return EV4_IBX; 8742 8743 case TYPE_IADD: 8744 case TYPE_ILOG: 8745 case TYPE_ICMOV: 8746 case TYPE_ICMP: 8747 case TYPE_FST: 8748 case TYPE_SHIFT: 8749 case TYPE_IMUL: 8750 case TYPE_FBR: 8751 case TYPE_MVI: /* fake */ 8752 return EV4_IB0; 8753 8754 case TYPE_IST: 8755 case TYPE_MISC: 8756 case TYPE_IBR: 8757 case TYPE_JSR: 8758 case TYPE_CALLPAL: 8759 case TYPE_FCPYS: 8760 case TYPE_FCMOV: 8761 case TYPE_FADD: 8762 case TYPE_FDIV: 8763 case TYPE_FMUL: 8764 case TYPE_ST_C: 8765 case TYPE_MB: 8766 case TYPE_FSQRT: /* fake */ 8767 case TYPE_FTOI: /* fake */ 8768 case TYPE_ITOF: /* fake */ 8769 return EV4_IB1; 8770 8771 default: 8772 gcc_unreachable (); 8773 } 8774 } 8775 8776 static enum alphaev5_pipe 8777 alphaev5_insn_pipe (rtx insn) 8778 { 8779 if (recog_memoized (insn) < 0) 8780 return EV5_STOP; 8781 if (get_attr_length (insn) != 4) 8782 return EV5_STOP; 8783 8784 switch (get_attr_type (insn)) 8785 { 8786 case TYPE_ILD: 8787 case TYPE_FLD: 8788 case TYPE_LDSYM: 8789 case TYPE_IADD: 8790 case TYPE_ILOG: 8791 case TYPE_ICMOV: 8792 case TYPE_ICMP: 8793 return EV5_E01; 8794 8795 case TYPE_IST: 8796 case TYPE_FST: 8797 case TYPE_SHIFT: 8798 case TYPE_IMUL: 8799 case TYPE_MISC: 8800 case TYPE_MVI: 8801 case TYPE_LD_L: 8802 case TYPE_ST_C: 8803 case TYPE_MB: 8804 case TYPE_FTOI: /* fake */ 8805 case TYPE_ITOF: /* fake */ 8806 return EV5_E0; 8807 8808 case TYPE_IBR: 8809 case TYPE_JSR: 8810 case TYPE_CALLPAL: 8811 return EV5_E1; 8812 8813 case TYPE_FCPYS: 8814 return EV5_FAM; 8815 8816 case TYPE_FBR: 8817 case TYPE_FCMOV: 8818 case TYPE_FADD: 8819 case TYPE_FDIV: 8820 case TYPE_FSQRT: /* fake */ 8821 return EV5_FA; 8822 8823 case TYPE_FMUL: 8824 return EV5_FM; 8825 8826 default: 8827 gcc_unreachable (); 8828 } 8829 } 8830 8831 /* IN_USE is a mask of the slots currently filled within the insn group. 8832 The mask bits come from alphaev4_pipe above. If EV4_IBX is set, then 8833 the insn in EV4_IB0 can be swapped by the hardware into EV4_IB1. 8834 8835 LEN is, of course, the length of the group in bytes. */ 8836 8837 static rtx 8838 alphaev4_next_group (rtx insn, int *pin_use, int *plen) 8839 { 8840 int len, in_use; 8841 8842 len = in_use = 0; 8843 8844 if (! INSN_P (insn) 8845 || GET_CODE (PATTERN (insn)) == CLOBBER 8846 || GET_CODE (PATTERN (insn)) == USE) 8847 goto next_and_done; 8848 8849 while (1) 8850 { 8851 enum alphaev4_pipe pipe; 8852 8853 pipe = alphaev4_insn_pipe (insn); 8854 switch (pipe) 8855 { 8856 case EV4_STOP: 8857 /* Force complex instructions to start new groups. */ 8858 if (in_use) 8859 goto done; 8860 8861 /* If this is a completely unrecognized insn, it's an asm. 8862 We don't know how long it is, so record length as -1 to 8863 signal a needed realignment. */ 8864 if (recog_memoized (insn) < 0) 8865 len = -1; 8866 else 8867 len = get_attr_length (insn); 8868 goto next_and_done; 8869 8870 case EV4_IBX: 8871 if (in_use & EV4_IB0) 8872 { 8873 if (in_use & EV4_IB1) 8874 goto done; 8875 in_use |= EV4_IB1; 8876 } 8877 else 8878 in_use |= EV4_IB0 | EV4_IBX; 8879 break; 8880 8881 case EV4_IB0: 8882 if (in_use & EV4_IB0) 8883 { 8884 if (!(in_use & EV4_IBX) || (in_use & EV4_IB1)) 8885 goto done; 8886 in_use |= EV4_IB1; 8887 } 8888 in_use |= EV4_IB0; 8889 break; 8890 8891 case EV4_IB1: 8892 if (in_use & EV4_IB1) 8893 goto done; 8894 in_use |= EV4_IB1; 8895 break; 8896 8897 default: 8898 gcc_unreachable (); 8899 } 8900 len += 4; 8901 8902 /* Haifa doesn't do well scheduling branches. */ 8903 if (JUMP_P (insn)) 8904 goto next_and_done; 8905 8906 next: 8907 insn = next_nonnote_insn (insn); 8908 8909 if (!insn || ! INSN_P (insn)) 8910 goto done; 8911 8912 /* Let Haifa tell us where it thinks insn group boundaries are. */ 8913 if (GET_MODE (insn) == TImode) 8914 goto done; 8915 8916 if (GET_CODE (insn) == CLOBBER || GET_CODE (insn) == USE) 8917 goto next; 8918 } 8919 8920 next_and_done: 8921 insn = next_nonnote_insn (insn); 8922 8923 done: 8924 *plen = len; 8925 *pin_use = in_use; 8926 return insn; 8927 } 8928 8929 /* IN_USE is a mask of the slots currently filled within the insn group. 8930 The mask bits come from alphaev5_pipe above. If EV5_E01 is set, then 8931 the insn in EV5_E0 can be swapped by the hardware into EV5_E1. 8932 8933 LEN is, of course, the length of the group in bytes. */ 8934 8935 static rtx 8936 alphaev5_next_group (rtx insn, int *pin_use, int *plen) 8937 { 8938 int len, in_use; 8939 8940 len = in_use = 0; 8941 8942 if (! INSN_P (insn) 8943 || GET_CODE (PATTERN (insn)) == CLOBBER 8944 || GET_CODE (PATTERN (insn)) == USE) 8945 goto next_and_done; 8946 8947 while (1) 8948 { 8949 enum alphaev5_pipe pipe; 8950 8951 pipe = alphaev5_insn_pipe (insn); 8952 switch (pipe) 8953 { 8954 case EV5_STOP: 8955 /* Force complex instructions to start new groups. */ 8956 if (in_use) 8957 goto done; 8958 8959 /* If this is a completely unrecognized insn, it's an asm. 8960 We don't know how long it is, so record length as -1 to 8961 signal a needed realignment. */ 8962 if (recog_memoized (insn) < 0) 8963 len = -1; 8964 else 8965 len = get_attr_length (insn); 8966 goto next_and_done; 8967 8968 /* ??? Most of the places below, we would like to assert never 8969 happen, as it would indicate an error either in Haifa, or 8970 in the scheduling description. Unfortunately, Haifa never 8971 schedules the last instruction of the BB, so we don't have 8972 an accurate TI bit to go off. */ 8973 case EV5_E01: 8974 if (in_use & EV5_E0) 8975 { 8976 if (in_use & EV5_E1) 8977 goto done; 8978 in_use |= EV5_E1; 8979 } 8980 else 8981 in_use |= EV5_E0 | EV5_E01; 8982 break; 8983 8984 case EV5_E0: 8985 if (in_use & EV5_E0) 8986 { 8987 if (!(in_use & EV5_E01) || (in_use & EV5_E1)) 8988 goto done; 8989 in_use |= EV5_E1; 8990 } 8991 in_use |= EV5_E0; 8992 break; 8993 8994 case EV5_E1: 8995 if (in_use & EV5_E1) 8996 goto done; 8997 in_use |= EV5_E1; 8998 break; 8999 9000 case EV5_FAM: 9001 if (in_use & EV5_FA) 9002 { 9003 if (in_use & EV5_FM) 9004 goto done; 9005 in_use |= EV5_FM; 9006 } 9007 else 9008 in_use |= EV5_FA | EV5_FAM; 9009 break; 9010 9011 case EV5_FA: 9012 if (in_use & EV5_FA) 9013 goto done; 9014 in_use |= EV5_FA; 9015 break; 9016 9017 case EV5_FM: 9018 if (in_use & EV5_FM) 9019 goto done; 9020 in_use |= EV5_FM; 9021 break; 9022 9023 case EV5_NONE: 9024 break; 9025 9026 default: 9027 gcc_unreachable (); 9028 } 9029 len += 4; 9030 9031 /* Haifa doesn't do well scheduling branches. */ 9032 /* ??? If this is predicted not-taken, slotting continues, except 9033 that no more IBR, FBR, or JSR insns may be slotted. */ 9034 if (JUMP_P (insn)) 9035 goto next_and_done; 9036 9037 next: 9038 insn = next_nonnote_insn (insn); 9039 9040 if (!insn || ! INSN_P (insn)) 9041 goto done; 9042 9043 /* Let Haifa tell us where it thinks insn group boundaries are. */ 9044 if (GET_MODE (insn) == TImode) 9045 goto done; 9046 9047 if (GET_CODE (insn) == CLOBBER || GET_CODE (insn) == USE) 9048 goto next; 9049 } 9050 9051 next_and_done: 9052 insn = next_nonnote_insn (insn); 9053 9054 done: 9055 *plen = len; 9056 *pin_use = in_use; 9057 return insn; 9058 } 9059 9060 static rtx 9061 alphaev4_next_nop (int *pin_use) 9062 { 9063 int in_use = *pin_use; 9064 rtx nop; 9065 9066 if (!(in_use & EV4_IB0)) 9067 { 9068 in_use |= EV4_IB0; 9069 nop = gen_nop (); 9070 } 9071 else if ((in_use & (EV4_IBX|EV4_IB1)) == EV4_IBX) 9072 { 9073 in_use |= EV4_IB1; 9074 nop = gen_nop (); 9075 } 9076 else if (TARGET_FP && !(in_use & EV4_IB1)) 9077 { 9078 in_use |= EV4_IB1; 9079 nop = gen_fnop (); 9080 } 9081 else 9082 nop = gen_unop (); 9083 9084 *pin_use = in_use; 9085 return nop; 9086 } 9087 9088 static rtx 9089 alphaev5_next_nop (int *pin_use) 9090 { 9091 int in_use = *pin_use; 9092 rtx nop; 9093 9094 if (!(in_use & EV5_E1)) 9095 { 9096 in_use |= EV5_E1; 9097 nop = gen_nop (); 9098 } 9099 else if (TARGET_FP && !(in_use & EV5_FA)) 9100 { 9101 in_use |= EV5_FA; 9102 nop = gen_fnop (); 9103 } 9104 else if (TARGET_FP && !(in_use & EV5_FM)) 9105 { 9106 in_use |= EV5_FM; 9107 nop = gen_fnop (); 9108 } 9109 else 9110 nop = gen_unop (); 9111 9112 *pin_use = in_use; 9113 return nop; 9114 } 9115 9116 /* The instruction group alignment main loop. */ 9117 9118 static void 9119 alpha_align_insns (unsigned int max_align, 9120 rtx (*next_group) (rtx, int *, int *), 9121 rtx (*next_nop) (int *)) 9122 { 9123 /* ALIGN is the known alignment for the insn group. */ 9124 unsigned int align; 9125 /* OFS is the offset of the current insn in the insn group. */ 9126 int ofs; 9127 int prev_in_use, in_use, len, ldgp; 9128 rtx i, next; 9129 9130 /* Let shorten branches care for assigning alignments to code labels. */ 9131 shorten_branches (get_insns ()); 9132 9133 if (align_functions < 4) 9134 align = 4; 9135 else if ((unsigned int) align_functions < max_align) 9136 align = align_functions; 9137 else 9138 align = max_align; 9139 9140 ofs = prev_in_use = 0; 9141 i = get_insns (); 9142 if (NOTE_P (i)) 9143 i = next_nonnote_insn (i); 9144 9145 ldgp = alpha_function_needs_gp ? 8 : 0; 9146 9147 while (i) 9148 { 9149 next = (*next_group) (i, &in_use, &len); 9150 9151 /* When we see a label, resync alignment etc. */ 9152 if (LABEL_P (i)) 9153 { 9154 unsigned int new_align = 1 << label_to_alignment (i); 9155 9156 if (new_align >= align) 9157 { 9158 align = new_align < max_align ? new_align : max_align; 9159 ofs = 0; 9160 } 9161 9162 else if (ofs & (new_align-1)) 9163 ofs = (ofs | (new_align-1)) + 1; 9164 gcc_assert (!len); 9165 } 9166 9167 /* Handle complex instructions special. */ 9168 else if (in_use == 0) 9169 { 9170 /* Asms will have length < 0. This is a signal that we have 9171 lost alignment knowledge. Assume, however, that the asm 9172 will not mis-align instructions. */ 9173 if (len < 0) 9174 { 9175 ofs = 0; 9176 align = 4; 9177 len = 0; 9178 } 9179 } 9180 9181 /* If the known alignment is smaller than the recognized insn group, 9182 realign the output. */ 9183 else if ((int) align < len) 9184 { 9185 unsigned int new_log_align = len > 8 ? 4 : 3; 9186 rtx prev, where; 9187 9188 where = prev = prev_nonnote_insn (i); 9189 if (!where || !LABEL_P (where)) 9190 where = i; 9191 9192 /* Can't realign between a call and its gp reload. */ 9193 if (! (TARGET_EXPLICIT_RELOCS 9194 && prev && CALL_P (prev))) 9195 { 9196 emit_insn_before (gen_realign (GEN_INT (new_log_align)), where); 9197 align = 1 << new_log_align; 9198 ofs = 0; 9199 } 9200 } 9201 9202 /* We may not insert padding inside the initial ldgp sequence. */ 9203 else if (ldgp > 0) 9204 ldgp -= len; 9205 9206 /* If the group won't fit in the same INT16 as the previous, 9207 we need to add padding to keep the group together. Rather 9208 than simply leaving the insn filling to the assembler, we 9209 can make use of the knowledge of what sorts of instructions 9210 were issued in the previous group to make sure that all of 9211 the added nops are really free. */ 9212 else if (ofs + len > (int) align) 9213 { 9214 int nop_count = (align - ofs) / 4; 9215 rtx where; 9216 9217 /* Insert nops before labels, branches, and calls to truly merge 9218 the execution of the nops with the previous instruction group. */ 9219 where = prev_nonnote_insn (i); 9220 if (where) 9221 { 9222 if (LABEL_P (where)) 9223 { 9224 rtx where2 = prev_nonnote_insn (where); 9225 if (where2 && JUMP_P (where2)) 9226 where = where2; 9227 } 9228 else if (NONJUMP_INSN_P (where)) 9229 where = i; 9230 } 9231 else 9232 where = i; 9233 9234 do 9235 emit_insn_before ((*next_nop)(&prev_in_use), where); 9236 while (--nop_count); 9237 ofs = 0; 9238 } 9239 9240 ofs = (ofs + len) & (align - 1); 9241 prev_in_use = in_use; 9242 i = next; 9243 } 9244 } 9245 9246 /* Insert an unop between sibcall or noreturn function call and GP load. */ 9247 9248 static void 9249 alpha_pad_function_end (void) 9250 { 9251 rtx insn, next; 9252 9253 for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) 9254 { 9255 if (!CALL_P (insn) 9256 || !(SIBLING_CALL_P (insn) 9257 || find_reg_note (insn, REG_NORETURN, NULL_RTX))) 9258 continue; 9259 9260 /* Make sure we do not split a call and its corresponding 9261 CALL_ARG_LOCATION note. */ 9262 next = NEXT_INSN (insn); 9263 if (next == NULL) 9264 continue; 9265 if (BARRIER_P (next)) 9266 { 9267 next = NEXT_INSN (next); 9268 if (next == NULL) 9269 continue; 9270 } 9271 if (NOTE_P (next) && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION) 9272 insn = next; 9273 9274 next = next_active_insn (insn); 9275 if (next) 9276 { 9277 rtx pat = PATTERN (next); 9278 9279 if (GET_CODE (pat) == SET 9280 && GET_CODE (SET_SRC (pat)) == UNSPEC_VOLATILE 9281 && XINT (SET_SRC (pat), 1) == UNSPECV_LDGP1) 9282 emit_insn_after (gen_unop (), insn); 9283 } 9284 } 9285 } 9286 9287 /* Machine dependent reorg pass. */ 9288 9289 static void 9290 alpha_reorg (void) 9291 { 9292 /* Workaround for a linker error that triggers when an exception 9293 handler immediatelly follows a sibcall or a noreturn function. 9294 9295 In the sibcall case: 9296 9297 The instruction stream from an object file: 9298 9299 1d8: 00 00 fb 6b jmp (t12) 9300 1dc: 00 00 ba 27 ldah gp,0(ra) 9301 1e0: 00 00 bd 23 lda gp,0(gp) 9302 1e4: 00 00 7d a7 ldq t12,0(gp) 9303 1e8: 00 40 5b 6b jsr ra,(t12),1ec <__funcZ+0x1ec> 9304 9305 was converted in the final link pass to: 9306 9307 12003aa88: 67 fa ff c3 br 120039428 <...> 9308 12003aa8c: 00 00 fe 2f unop 9309 12003aa90: 00 00 fe 2f unop 9310 12003aa94: 48 83 7d a7 ldq t12,-31928(gp) 9311 12003aa98: 00 40 5b 6b jsr ra,(t12),12003aa9c <__func+0x1ec> 9312 9313 And in the noreturn case: 9314 9315 The instruction stream from an object file: 9316 9317 54: 00 40 5b 6b jsr ra,(t12),58 <__func+0x58> 9318 58: 00 00 ba 27 ldah gp,0(ra) 9319 5c: 00 00 bd 23 lda gp,0(gp) 9320 60: 00 00 7d a7 ldq t12,0(gp) 9321 64: 00 40 5b 6b jsr ra,(t12),68 <__func+0x68> 9322 9323 was converted in the final link pass to: 9324 9325 fdb24: a0 03 40 d3 bsr ra,fe9a8 <_called_func+0x8> 9326 fdb28: 00 00 fe 2f unop 9327 fdb2c: 00 00 fe 2f unop 9328 fdb30: 30 82 7d a7 ldq t12,-32208(gp) 9329 fdb34: 00 40 5b 6b jsr ra,(t12),fdb38 <__func+0x68> 9330 9331 GP load instructions were wrongly cleared by the linker relaxation 9332 pass. This workaround prevents removal of GP loads by inserting 9333 an unop instruction between a sibcall or noreturn function call and 9334 exception handler prologue. */ 9335 9336 if (current_function_has_exception_handlers ()) 9337 alpha_pad_function_end (); 9338 9339 if (alpha_tp != ALPHA_TP_PROG || flag_exceptions) 9340 alpha_handle_trap_shadows (); 9341 9342 /* Due to the number of extra trapb insns, don't bother fixing up 9343 alignment when trap precision is instruction. Moreover, we can 9344 only do our job when sched2 is run. */ 9345 if (optimize && !optimize_size 9346 && alpha_tp != ALPHA_TP_INSN 9347 && flag_schedule_insns_after_reload) 9348 { 9349 if (alpha_tune == PROCESSOR_EV4) 9350 alpha_align_insns (8, alphaev4_next_group, alphaev4_next_nop); 9351 else if (alpha_tune == PROCESSOR_EV5) 9352 alpha_align_insns (16, alphaev5_next_group, alphaev5_next_nop); 9353 } 9354 } 9355 9356 static void 9357 alpha_file_start (void) 9358 { 9359 default_file_start (); 9360 9361 fputs ("\t.set noreorder\n", asm_out_file); 9362 fputs ("\t.set volatile\n", asm_out_file); 9363 if (TARGET_ABI_OSF) 9364 fputs ("\t.set noat\n", asm_out_file); 9365 if (TARGET_EXPLICIT_RELOCS) 9366 fputs ("\t.set nomacro\n", asm_out_file); 9367 if (TARGET_SUPPORT_ARCH | TARGET_BWX | TARGET_MAX | TARGET_FIX | TARGET_CIX) 9368 { 9369 const char *arch; 9370 9371 if (alpha_cpu == PROCESSOR_EV6 || TARGET_FIX || TARGET_CIX) 9372 arch = "ev6"; 9373 else if (TARGET_MAX) 9374 arch = "pca56"; 9375 else if (TARGET_BWX) 9376 arch = "ev56"; 9377 else if (alpha_cpu == PROCESSOR_EV5) 9378 arch = "ev5"; 9379 else 9380 arch = "ev4"; 9381 9382 fprintf (asm_out_file, "\t.arch %s\n", arch); 9383 } 9384 } 9385 9386 /* Since we don't have a .dynbss section, we should not allow global 9387 relocations in the .rodata section. */ 9388 9389 static int 9390 alpha_elf_reloc_rw_mask (void) 9391 { 9392 return flag_pic ? 3 : 2; 9393 } 9394 9395 /* Return a section for X. The only special thing we do here is to 9396 honor small data. */ 9397 9398 static section * 9399 alpha_elf_select_rtx_section (enum machine_mode mode, rtx x, 9400 unsigned HOST_WIDE_INT align) 9401 { 9402 if (TARGET_SMALL_DATA && GET_MODE_SIZE (mode) <= g_switch_value) 9403 /* ??? Consider using mergeable sdata sections. */ 9404 return sdata_section; 9405 else 9406 return default_elf_select_rtx_section (mode, x, align); 9407 } 9408 9409 static unsigned int 9410 alpha_elf_section_type_flags (tree decl, const char *name, int reloc) 9411 { 9412 unsigned int flags = 0; 9413 9414 if (strcmp (name, ".sdata") == 0 9415 || strncmp (name, ".sdata.", 7) == 0 9416 || strncmp (name, ".gnu.linkonce.s.", 16) == 0 9417 || strcmp (name, ".sbss") == 0 9418 || strncmp (name, ".sbss.", 6) == 0 9419 || strncmp (name, ".gnu.linkonce.sb.", 17) == 0) 9420 flags = SECTION_SMALL; 9421 9422 flags |= default_section_type_flags (decl, name, reloc); 9423 return flags; 9424 } 9425 9426 /* Structure to collect function names for final output in link section. */ 9427 /* Note that items marked with GTY can't be ifdef'ed out. */ 9428 9429 enum reloc_kind 9430 { 9431 KIND_LINKAGE, 9432 KIND_CODEADDR 9433 }; 9434 9435 struct GTY(()) alpha_links 9436 { 9437 rtx func; 9438 rtx linkage; 9439 enum reloc_kind rkind; 9440 }; 9441 9442 #if TARGET_ABI_OPEN_VMS 9443 9444 /* Return the VMS argument type corresponding to MODE. */ 9445 9446 enum avms_arg_type 9447 alpha_arg_type (enum machine_mode mode) 9448 { 9449 switch (mode) 9450 { 9451 case SFmode: 9452 return TARGET_FLOAT_VAX ? FF : FS; 9453 case DFmode: 9454 return TARGET_FLOAT_VAX ? FD : FT; 9455 default: 9456 return I64; 9457 } 9458 } 9459 9460 /* Return an rtx for an integer representing the VMS Argument Information 9461 register value. */ 9462 9463 rtx 9464 alpha_arg_info_reg_val (CUMULATIVE_ARGS cum) 9465 { 9466 unsigned HOST_WIDE_INT regval = cum.num_args; 9467 int i; 9468 9469 for (i = 0; i < 6; i++) 9470 regval |= ((int) cum.atypes[i]) << (i * 3 + 8); 9471 9472 return GEN_INT (regval); 9473 } 9474 9475 9476 /* Return a SYMBOL_REF representing the reference to the .linkage entry 9477 of function FUNC built for calls made from CFUNDECL. LFLAG is 1 if 9478 this is the reference to the linkage pointer value, 0 if this is the 9479 reference to the function entry value. RFLAG is 1 if this a reduced 9480 reference (code address only), 0 if this is a full reference. */ 9481 9482 rtx 9483 alpha_use_linkage (rtx func, bool lflag, bool rflag) 9484 { 9485 struct alpha_links *al = NULL; 9486 const char *name = XSTR (func, 0); 9487 9488 if (cfun->machine->links) 9489 { 9490 splay_tree_node lnode; 9491 9492 /* Is this name already defined? */ 9493 lnode = splay_tree_lookup (cfun->machine->links, (splay_tree_key) name); 9494 if (lnode) 9495 al = (struct alpha_links *) lnode->value; 9496 } 9497 else 9498 cfun->machine->links = splay_tree_new_ggc 9499 ((splay_tree_compare_fn) strcmp, 9500 ggc_alloc_splay_tree_str_alpha_links_splay_tree_s, 9501 ggc_alloc_splay_tree_str_alpha_links_splay_tree_node_s); 9502 9503 if (al == NULL) 9504 { 9505 size_t buf_len; 9506 char *linksym; 9507 tree id; 9508 9509 if (name[0] == '*') 9510 name++; 9511 9512 /* Follow transparent alias, as this is used for CRTL translations. */ 9513 id = maybe_get_identifier (name); 9514 if (id) 9515 { 9516 while (IDENTIFIER_TRANSPARENT_ALIAS (id)) 9517 id = TREE_CHAIN (id); 9518 name = IDENTIFIER_POINTER (id); 9519 } 9520 9521 buf_len = strlen (name) + 8 + 9; 9522 linksym = (char *) alloca (buf_len); 9523 snprintf (linksym, buf_len, "$%d..%s..lk", cfun->funcdef_no, name); 9524 9525 al = ggc_alloc_alpha_links (); 9526 al->func = func; 9527 al->linkage = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (linksym)); 9528 9529 splay_tree_insert (cfun->machine->links, 9530 (splay_tree_key) ggc_strdup (name), 9531 (splay_tree_value) al); 9532 } 9533 9534 al->rkind = rflag ? KIND_CODEADDR : KIND_LINKAGE; 9535 9536 if (lflag) 9537 return gen_rtx_MEM (Pmode, plus_constant (Pmode, al->linkage, 8)); 9538 else 9539 return al->linkage; 9540 } 9541 9542 static int 9543 alpha_write_one_linkage (splay_tree_node node, void *data) 9544 { 9545 const char *const name = (const char *) node->key; 9546 struct alpha_links *link = (struct alpha_links *) node->value; 9547 FILE *stream = (FILE *) data; 9548 9549 ASM_OUTPUT_INTERNAL_LABEL (stream, XSTR (link->linkage, 0)); 9550 if (link->rkind == KIND_CODEADDR) 9551 { 9552 /* External and used, request code address. */ 9553 fprintf (stream, "\t.code_address "); 9554 } 9555 else 9556 { 9557 if (!SYMBOL_REF_EXTERNAL_P (link->func) 9558 && SYMBOL_REF_LOCAL_P (link->func)) 9559 { 9560 /* Locally defined, build linkage pair. */ 9561 fprintf (stream, "\t.quad %s..en\n", name); 9562 fprintf (stream, "\t.quad "); 9563 } 9564 else 9565 { 9566 /* External, request linkage pair. */ 9567 fprintf (stream, "\t.linkage "); 9568 } 9569 } 9570 assemble_name (stream, name); 9571 fputs ("\n", stream); 9572 9573 return 0; 9574 } 9575 9576 static void 9577 alpha_write_linkage (FILE *stream, const char *funname) 9578 { 9579 fprintf (stream, "\t.link\n"); 9580 fprintf (stream, "\t.align 3\n"); 9581 in_section = NULL; 9582 9583 #ifdef TARGET_VMS_CRASH_DEBUG 9584 fputs ("\t.name ", stream); 9585 assemble_name (stream, funname); 9586 fputs ("..na\n", stream); 9587 #endif 9588 9589 ASM_OUTPUT_LABEL (stream, funname); 9590 fprintf (stream, "\t.pdesc "); 9591 assemble_name (stream, funname); 9592 fprintf (stream, "..en,%s\n", 9593 alpha_procedure_type == PT_STACK ? "stack" 9594 : alpha_procedure_type == PT_REGISTER ? "reg" : "null"); 9595 9596 if (cfun->machine->links) 9597 { 9598 splay_tree_foreach (cfun->machine->links, alpha_write_one_linkage, stream); 9599 /* splay_tree_delete (func->links); */ 9600 } 9601 } 9602 9603 /* Switch to an arbitrary section NAME with attributes as specified 9604 by FLAGS. ALIGN specifies any known alignment requirements for 9605 the section; 0 if the default should be used. */ 9606 9607 static void 9608 vms_asm_named_section (const char *name, unsigned int flags, 9609 tree decl ATTRIBUTE_UNUSED) 9610 { 9611 fputc ('\n', asm_out_file); 9612 fprintf (asm_out_file, ".section\t%s", name); 9613 9614 if (flags & SECTION_DEBUG) 9615 fprintf (asm_out_file, ",NOWRT"); 9616 9617 fputc ('\n', asm_out_file); 9618 } 9619 9620 /* Record an element in the table of global constructors. SYMBOL is 9621 a SYMBOL_REF of the function to be called; PRIORITY is a number 9622 between 0 and MAX_INIT_PRIORITY. 9623 9624 Differs from default_ctors_section_asm_out_constructor in that the 9625 width of the .ctors entry is always 64 bits, rather than the 32 bits 9626 used by a normal pointer. */ 9627 9628 static void 9629 vms_asm_out_constructor (rtx symbol, int priority ATTRIBUTE_UNUSED) 9630 { 9631 switch_to_section (ctors_section); 9632 assemble_align (BITS_PER_WORD); 9633 assemble_integer (symbol, UNITS_PER_WORD, BITS_PER_WORD, 1); 9634 } 9635 9636 static void 9637 vms_asm_out_destructor (rtx symbol, int priority ATTRIBUTE_UNUSED) 9638 { 9639 switch_to_section (dtors_section); 9640 assemble_align (BITS_PER_WORD); 9641 assemble_integer (symbol, UNITS_PER_WORD, BITS_PER_WORD, 1); 9642 } 9643 #else 9644 rtx 9645 alpha_use_linkage (rtx func ATTRIBUTE_UNUSED, 9646 bool lflag ATTRIBUTE_UNUSED, 9647 bool rflag ATTRIBUTE_UNUSED) 9648 { 9649 return NULL_RTX; 9650 } 9651 9652 #endif /* TARGET_ABI_OPEN_VMS */ 9653 9654 static void 9655 alpha_init_libfuncs (void) 9656 { 9657 if (TARGET_ABI_OPEN_VMS) 9658 { 9659 /* Use the VMS runtime library functions for division and 9660 remainder. */ 9661 set_optab_libfunc (sdiv_optab, SImode, "OTS$DIV_I"); 9662 set_optab_libfunc (sdiv_optab, DImode, "OTS$DIV_L"); 9663 set_optab_libfunc (udiv_optab, SImode, "OTS$DIV_UI"); 9664 set_optab_libfunc (udiv_optab, DImode, "OTS$DIV_UL"); 9665 set_optab_libfunc (smod_optab, SImode, "OTS$REM_I"); 9666 set_optab_libfunc (smod_optab, DImode, "OTS$REM_L"); 9667 set_optab_libfunc (umod_optab, SImode, "OTS$REM_UI"); 9668 set_optab_libfunc (umod_optab, DImode, "OTS$REM_UL"); 9669 abort_libfunc = init_one_libfunc ("decc$abort"); 9670 memcmp_libfunc = init_one_libfunc ("decc$memcmp"); 9671 #ifdef MEM_LIBFUNCS_INIT 9672 MEM_LIBFUNCS_INIT; 9673 #endif 9674 } 9675 } 9676 9677 /* On the Alpha, we use this to disable the floating-point registers 9678 when they don't exist. */ 9679 9680 static void 9681 alpha_conditional_register_usage (void) 9682 { 9683 int i; 9684 if (! TARGET_FPREGS) 9685 for (i = 32; i < 63; i++) 9686 fixed_regs[i] = call_used_regs[i] = 1; 9687 } 9688 9689 /* Canonicalize a comparison from one we don't have to one we do have. */ 9690 9691 static void 9692 alpha_canonicalize_comparison (int *code, rtx *op0, rtx *op1, 9693 bool op0_preserve_value) 9694 { 9695 if (!op0_preserve_value 9696 && (*code == GE || *code == GT || *code == GEU || *code == GTU) 9697 && (REG_P (*op1) || *op1 == const0_rtx)) 9698 { 9699 rtx tem = *op0; 9700 *op0 = *op1; 9701 *op1 = tem; 9702 *code = (int)swap_condition ((enum rtx_code)*code); 9703 } 9704 9705 if ((*code == LT || *code == LTU) 9706 && CONST_INT_P (*op1) && INTVAL (*op1) == 256) 9707 { 9708 *code = *code == LT ? LE : LEU; 9709 *op1 = GEN_INT (255); 9710 } 9711 } 9712 9713 /* Initialize the GCC target structure. */ 9714 #if TARGET_ABI_OPEN_VMS 9715 # undef TARGET_ATTRIBUTE_TABLE 9716 # define TARGET_ATTRIBUTE_TABLE vms_attribute_table 9717 # undef TARGET_CAN_ELIMINATE 9718 # define TARGET_CAN_ELIMINATE alpha_vms_can_eliminate 9719 #endif 9720 9721 #undef TARGET_IN_SMALL_DATA_P 9722 #define TARGET_IN_SMALL_DATA_P alpha_in_small_data_p 9723 9724 #undef TARGET_ASM_ALIGNED_HI_OP 9725 #define TARGET_ASM_ALIGNED_HI_OP "\t.word\t" 9726 #undef TARGET_ASM_ALIGNED_DI_OP 9727 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t" 9728 9729 /* Default unaligned ops are provided for ELF systems. To get unaligned 9730 data for non-ELF systems, we have to turn off auto alignment. */ 9731 #if TARGET_ABI_OPEN_VMS 9732 #undef TARGET_ASM_UNALIGNED_HI_OP 9733 #define TARGET_ASM_UNALIGNED_HI_OP "\t.align 0\n\t.word\t" 9734 #undef TARGET_ASM_UNALIGNED_SI_OP 9735 #define TARGET_ASM_UNALIGNED_SI_OP "\t.align 0\n\t.long\t" 9736 #undef TARGET_ASM_UNALIGNED_DI_OP 9737 #define TARGET_ASM_UNALIGNED_DI_OP "\t.align 0\n\t.quad\t" 9738 #endif 9739 9740 #undef TARGET_ASM_RELOC_RW_MASK 9741 #define TARGET_ASM_RELOC_RW_MASK alpha_elf_reloc_rw_mask 9742 #undef TARGET_ASM_SELECT_RTX_SECTION 9743 #define TARGET_ASM_SELECT_RTX_SECTION alpha_elf_select_rtx_section 9744 #undef TARGET_SECTION_TYPE_FLAGS 9745 #define TARGET_SECTION_TYPE_FLAGS alpha_elf_section_type_flags 9746 9747 #undef TARGET_ASM_FUNCTION_END_PROLOGUE 9748 #define TARGET_ASM_FUNCTION_END_PROLOGUE alpha_output_function_end_prologue 9749 9750 #undef TARGET_INIT_LIBFUNCS 9751 #define TARGET_INIT_LIBFUNCS alpha_init_libfuncs 9752 9753 #undef TARGET_LEGITIMIZE_ADDRESS 9754 #define TARGET_LEGITIMIZE_ADDRESS alpha_legitimize_address 9755 #undef TARGET_MODE_DEPENDENT_ADDRESS_P 9756 #define TARGET_MODE_DEPENDENT_ADDRESS_P alpha_mode_dependent_address_p 9757 9758 #undef TARGET_ASM_FILE_START 9759 #define TARGET_ASM_FILE_START alpha_file_start 9760 9761 #undef TARGET_SCHED_ADJUST_COST 9762 #define TARGET_SCHED_ADJUST_COST alpha_adjust_cost 9763 #undef TARGET_SCHED_ISSUE_RATE 9764 #define TARGET_SCHED_ISSUE_RATE alpha_issue_rate 9765 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD 9766 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \ 9767 alpha_multipass_dfa_lookahead 9768 9769 #undef TARGET_HAVE_TLS 9770 #define TARGET_HAVE_TLS HAVE_AS_TLS 9771 9772 #undef TARGET_BUILTIN_DECL 9773 #define TARGET_BUILTIN_DECL alpha_builtin_decl 9774 #undef TARGET_INIT_BUILTINS 9775 #define TARGET_INIT_BUILTINS alpha_init_builtins 9776 #undef TARGET_EXPAND_BUILTIN 9777 #define TARGET_EXPAND_BUILTIN alpha_expand_builtin 9778 #undef TARGET_FOLD_BUILTIN 9779 #define TARGET_FOLD_BUILTIN alpha_fold_builtin 9780 9781 #undef TARGET_FUNCTION_OK_FOR_SIBCALL 9782 #define TARGET_FUNCTION_OK_FOR_SIBCALL alpha_function_ok_for_sibcall 9783 #undef TARGET_CANNOT_COPY_INSN_P 9784 #define TARGET_CANNOT_COPY_INSN_P alpha_cannot_copy_insn_p 9785 #undef TARGET_LEGITIMATE_CONSTANT_P 9786 #define TARGET_LEGITIMATE_CONSTANT_P alpha_legitimate_constant_p 9787 #undef TARGET_CANNOT_FORCE_CONST_MEM 9788 #define TARGET_CANNOT_FORCE_CONST_MEM alpha_cannot_force_const_mem 9789 9790 #if TARGET_ABI_OSF 9791 #undef TARGET_ASM_OUTPUT_MI_THUNK 9792 #define TARGET_ASM_OUTPUT_MI_THUNK alpha_output_mi_thunk_osf 9793 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK 9794 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true 9795 #undef TARGET_STDARG_OPTIMIZE_HOOK 9796 #define TARGET_STDARG_OPTIMIZE_HOOK alpha_stdarg_optimize_hook 9797 #endif 9798 9799 /* Use 16-bits anchor. */ 9800 #undef TARGET_MIN_ANCHOR_OFFSET 9801 #define TARGET_MIN_ANCHOR_OFFSET -0x7fff - 1 9802 #undef TARGET_MAX_ANCHOR_OFFSET 9803 #define TARGET_MAX_ANCHOR_OFFSET 0x7fff 9804 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P 9805 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true 9806 9807 #undef TARGET_RTX_COSTS 9808 #define TARGET_RTX_COSTS alpha_rtx_costs 9809 #undef TARGET_ADDRESS_COST 9810 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0 9811 9812 #undef TARGET_MACHINE_DEPENDENT_REORG 9813 #define TARGET_MACHINE_DEPENDENT_REORG alpha_reorg 9814 9815 #undef TARGET_PROMOTE_FUNCTION_MODE 9816 #define TARGET_PROMOTE_FUNCTION_MODE default_promote_function_mode_always_promote 9817 #undef TARGET_PROMOTE_PROTOTYPES 9818 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_false 9819 #undef TARGET_RETURN_IN_MEMORY 9820 #define TARGET_RETURN_IN_MEMORY alpha_return_in_memory 9821 #undef TARGET_PASS_BY_REFERENCE 9822 #define TARGET_PASS_BY_REFERENCE alpha_pass_by_reference 9823 #undef TARGET_SETUP_INCOMING_VARARGS 9824 #define TARGET_SETUP_INCOMING_VARARGS alpha_setup_incoming_varargs 9825 #undef TARGET_STRICT_ARGUMENT_NAMING 9826 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true 9827 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED 9828 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED hook_bool_CUMULATIVE_ARGS_true 9829 #undef TARGET_SPLIT_COMPLEX_ARG 9830 #define TARGET_SPLIT_COMPLEX_ARG alpha_split_complex_arg 9831 #undef TARGET_GIMPLIFY_VA_ARG_EXPR 9832 #define TARGET_GIMPLIFY_VA_ARG_EXPR alpha_gimplify_va_arg 9833 #undef TARGET_ARG_PARTIAL_BYTES 9834 #define TARGET_ARG_PARTIAL_BYTES alpha_arg_partial_bytes 9835 #undef TARGET_FUNCTION_ARG 9836 #define TARGET_FUNCTION_ARG alpha_function_arg 9837 #undef TARGET_FUNCTION_ARG_ADVANCE 9838 #define TARGET_FUNCTION_ARG_ADVANCE alpha_function_arg_advance 9839 #undef TARGET_TRAMPOLINE_INIT 9840 #define TARGET_TRAMPOLINE_INIT alpha_trampoline_init 9841 9842 #undef TARGET_INSTANTIATE_DECLS 9843 #define TARGET_INSTANTIATE_DECLS alpha_instantiate_decls 9844 9845 #undef TARGET_SECONDARY_RELOAD 9846 #define TARGET_SECONDARY_RELOAD alpha_secondary_reload 9847 9848 #undef TARGET_SCALAR_MODE_SUPPORTED_P 9849 #define TARGET_SCALAR_MODE_SUPPORTED_P alpha_scalar_mode_supported_p 9850 #undef TARGET_VECTOR_MODE_SUPPORTED_P 9851 #define TARGET_VECTOR_MODE_SUPPORTED_P alpha_vector_mode_supported_p 9852 9853 #undef TARGET_BUILD_BUILTIN_VA_LIST 9854 #define TARGET_BUILD_BUILTIN_VA_LIST alpha_build_builtin_va_list 9855 9856 #undef TARGET_EXPAND_BUILTIN_VA_START 9857 #define TARGET_EXPAND_BUILTIN_VA_START alpha_va_start 9858 9859 /* The Alpha architecture does not require sequential consistency. See 9860 http://www.cs.umd.edu/~pugh/java/memoryModel/AlphaReordering.html 9861 for an example of how it can be violated in practice. */ 9862 #undef TARGET_RELAXED_ORDERING 9863 #define TARGET_RELAXED_ORDERING true 9864 9865 #undef TARGET_OPTION_OVERRIDE 9866 #define TARGET_OPTION_OVERRIDE alpha_option_override 9867 9868 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING 9869 #undef TARGET_MANGLE_TYPE 9870 #define TARGET_MANGLE_TYPE alpha_mangle_type 9871 #endif 9872 9873 #undef TARGET_LEGITIMATE_ADDRESS_P 9874 #define TARGET_LEGITIMATE_ADDRESS_P alpha_legitimate_address_p 9875 9876 #undef TARGET_CONDITIONAL_REGISTER_USAGE 9877 #define TARGET_CONDITIONAL_REGISTER_USAGE alpha_conditional_register_usage 9878 9879 #undef TARGET_CANONICALIZE_COMPARISON 9880 #define TARGET_CANONICALIZE_COMPARISON alpha_canonicalize_comparison 9881 9882 struct gcc_target targetm = TARGET_INITIALIZER; 9883 9884 9885 #include "gt-alpha.h" 9886