1 /* Machine description for AArch64 architecture. 2 Copyright (C) 2009-2013 Free Software Foundation, Inc. 3 Contributed by ARM Ltd. 4 5 This file is part of GCC. 6 7 GCC is free software; you can redistribute it and/or modify it 8 under the terms of the GNU General Public License as published by 9 the Free Software Foundation; either version 3, or (at your option) 10 any later version. 11 12 GCC is distributed in the hope that it will be useful, but 13 WITHOUT ANY WARRANTY; without even the implied warranty of 14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15 General Public License for more details. 16 17 You should have received a copy of the GNU General Public License 18 along with GCC; see the file COPYING3. If not see 19 <http://www.gnu.org/licenses/>. */ 20 21 #include "config.h" 22 #include "system.h" 23 #include "coretypes.h" 24 #include "tm.h" 25 #include "insn-codes.h" 26 #include "rtl.h" 27 #include "insn-attr.h" 28 #include "tree.h" 29 #include "regs.h" 30 #include "df.h" 31 #include "hard-reg-set.h" 32 #include "output.h" 33 #include "expr.h" 34 #include "reload.h" 35 #include "toplev.h" 36 #include "target.h" 37 #include "target-def.h" 38 #include "targhooks.h" 39 #include "ggc.h" 40 #include "function.h" 41 #include "tm_p.h" 42 #include "recog.h" 43 #include "langhooks.h" 44 #include "diagnostic-core.h" 45 #include "gimple.h" 46 #include "optabs.h" 47 #include "dwarf2.h" 48 49 /* Classifies an address. 50 51 ADDRESS_REG_IMM 52 A simple base register plus immediate offset. 53 54 ADDRESS_REG_WB 55 A base register indexed by immediate offset with writeback. 56 57 ADDRESS_REG_REG 58 A base register indexed by (optionally scaled) register. 59 60 ADDRESS_REG_UXTW 61 A base register indexed by (optionally scaled) zero-extended register. 62 63 ADDRESS_REG_SXTW 64 A base register indexed by (optionally scaled) sign-extended register. 65 66 ADDRESS_LO_SUM 67 A LO_SUM rtx with a base register and "LO12" symbol relocation. 68 69 ADDRESS_SYMBOLIC: 70 A constant symbolic address, in pc-relative literal pool. */ 71 72 enum aarch64_address_type { 73 ADDRESS_REG_IMM, 74 ADDRESS_REG_WB, 75 ADDRESS_REG_REG, 76 ADDRESS_REG_UXTW, 77 ADDRESS_REG_SXTW, 78 ADDRESS_LO_SUM, 79 ADDRESS_SYMBOLIC 80 }; 81 82 struct aarch64_address_info { 83 enum aarch64_address_type type; 84 rtx base; 85 rtx offset; 86 int shift; 87 enum aarch64_symbol_type symbol_type; 88 }; 89 90 /* The current code model. */ 91 enum aarch64_code_model aarch64_cmodel; 92 93 #ifdef HAVE_AS_TLS 94 #undef TARGET_HAVE_TLS 95 #define TARGET_HAVE_TLS 1 96 #endif 97 98 static bool aarch64_composite_type_p (const_tree, enum machine_mode); 99 static bool aarch64_vfp_is_call_or_return_candidate (enum machine_mode, 100 const_tree, 101 enum machine_mode *, int *, 102 bool *); 103 static void aarch64_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED; 104 static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED; 105 static void aarch64_override_options_after_change (void); 106 static int aarch64_simd_valid_immediate (rtx, enum machine_mode, int, rtx *, 107 int *, unsigned char *, int *, int *); 108 static bool aarch64_vector_mode_supported_p (enum machine_mode); 109 static unsigned bit_count (unsigned HOST_WIDE_INT); 110 static bool aarch64_const_vec_all_same_int_p (rtx, 111 HOST_WIDE_INT, HOST_WIDE_INT); 112 113 static bool aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode, 114 const unsigned char *sel); 115 116 /* The processor for which instructions should be scheduled. */ 117 enum aarch64_processor aarch64_tune = generic; 118 119 /* The current tuning set. */ 120 const struct tune_params *aarch64_tune_params; 121 122 /* Mask to specify which instructions we are allowed to generate. */ 123 unsigned long aarch64_isa_flags = 0; 124 125 /* Mask to specify which instruction scheduling options should be used. */ 126 unsigned long aarch64_tune_flags = 0; 127 128 /* Tuning parameters. */ 129 130 #if HAVE_DESIGNATED_INITIALIZERS 131 #define NAMED_PARAM(NAME, VAL) .NAME = (VAL) 132 #else 133 #define NAMED_PARAM(NAME, VAL) (VAL) 134 #endif 135 136 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007 137 __extension__ 138 #endif 139 static const struct cpu_rtx_cost_table generic_rtx_cost_table = 140 { 141 NAMED_PARAM (memory_load, COSTS_N_INSNS (1)), 142 NAMED_PARAM (memory_store, COSTS_N_INSNS (0)), 143 NAMED_PARAM (register_shift, COSTS_N_INSNS (1)), 144 NAMED_PARAM (int_divide, COSTS_N_INSNS (6)), 145 NAMED_PARAM (float_divide, COSTS_N_INSNS (2)), 146 NAMED_PARAM (double_divide, COSTS_N_INSNS (6)), 147 NAMED_PARAM (int_multiply, COSTS_N_INSNS (1)), 148 NAMED_PARAM (int_multiply_extend, COSTS_N_INSNS (1)), 149 NAMED_PARAM (int_multiply_add, COSTS_N_INSNS (1)), 150 NAMED_PARAM (int_multiply_extend_add, COSTS_N_INSNS (1)), 151 NAMED_PARAM (float_multiply, COSTS_N_INSNS (0)), 152 NAMED_PARAM (double_multiply, COSTS_N_INSNS (1)) 153 }; 154 155 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007 156 __extension__ 157 #endif 158 static const struct cpu_addrcost_table generic_addrcost_table = 159 { 160 NAMED_PARAM (pre_modify, 0), 161 NAMED_PARAM (post_modify, 0), 162 NAMED_PARAM (register_offset, 0), 163 NAMED_PARAM (register_extend, 0), 164 NAMED_PARAM (imm_offset, 0) 165 }; 166 167 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007 168 __extension__ 169 #endif 170 static const struct cpu_regmove_cost generic_regmove_cost = 171 { 172 NAMED_PARAM (GP2GP, 1), 173 NAMED_PARAM (GP2FP, 2), 174 NAMED_PARAM (FP2GP, 2), 175 /* We currently do not provide direct support for TFmode Q->Q move. 176 Therefore we need to raise the cost above 2 in order to have 177 reload handle the situation. */ 178 NAMED_PARAM (FP2FP, 4) 179 }; 180 181 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007 182 __extension__ 183 #endif 184 static const struct tune_params generic_tunings = 185 { 186 &generic_rtx_cost_table, 187 &generic_addrcost_table, 188 &generic_regmove_cost, 189 NAMED_PARAM (memmov_cost, 4) 190 }; 191 192 /* A processor implementing AArch64. */ 193 struct processor 194 { 195 const char *const name; 196 enum aarch64_processor core; 197 const char *arch; 198 const unsigned long flags; 199 const struct tune_params *const tune; 200 }; 201 202 /* Processor cores implementing AArch64. */ 203 static const struct processor all_cores[] = 204 { 205 #define AARCH64_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \ 206 {NAME, IDENT, #ARCH, FLAGS | AARCH64_FL_FOR_ARCH##ARCH, &COSTS##_tunings}, 207 #include "aarch64-cores.def" 208 #undef AARCH64_CORE 209 {"generic", generic, "8", AARCH64_FL_FPSIMD | AARCH64_FL_FOR_ARCH8, &generic_tunings}, 210 {NULL, aarch64_none, NULL, 0, NULL} 211 }; 212 213 /* Architectures implementing AArch64. */ 214 static const struct processor all_architectures[] = 215 { 216 #define AARCH64_ARCH(NAME, CORE, ARCH, FLAGS) \ 217 {NAME, CORE, #ARCH, FLAGS, NULL}, 218 #include "aarch64-arches.def" 219 #undef AARCH64_ARCH 220 {"generic", generic, "8", AARCH64_FL_FOR_ARCH8, NULL}, 221 {NULL, aarch64_none, NULL, 0, NULL} 222 }; 223 224 /* Target specification. These are populated as commandline arguments 225 are processed, or NULL if not specified. */ 226 static const struct processor *selected_arch; 227 static const struct processor *selected_cpu; 228 static const struct processor *selected_tune; 229 230 #define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0) 231 232 /* An ISA extension in the co-processor and main instruction set space. */ 233 struct aarch64_option_extension 234 { 235 const char *const name; 236 const unsigned long flags_on; 237 const unsigned long flags_off; 238 }; 239 240 /* ISA extensions in AArch64. */ 241 static const struct aarch64_option_extension all_extensions[] = 242 { 243 #define AARCH64_OPT_EXTENSION(NAME, FLAGS_ON, FLAGS_OFF) \ 244 {NAME, FLAGS_ON, FLAGS_OFF}, 245 #include "aarch64-option-extensions.def" 246 #undef AARCH64_OPT_EXTENSION 247 {NULL, 0, 0} 248 }; 249 250 /* Used to track the size of an address when generating a pre/post 251 increment address. */ 252 static enum machine_mode aarch64_memory_reference_mode; 253 254 /* Used to force GTY into this file. */ 255 static GTY(()) int gty_dummy; 256 257 /* A table of valid AArch64 "bitmask immediate" values for 258 logical instructions. */ 259 260 #define AARCH64_NUM_BITMASKS 5334 261 static unsigned HOST_WIDE_INT aarch64_bitmasks[AARCH64_NUM_BITMASKS]; 262 263 /* Did we set flag_omit_frame_pointer just so 264 aarch64_frame_pointer_required would be called? */ 265 static bool faked_omit_frame_pointer; 266 267 typedef enum aarch64_cond_code 268 { 269 AARCH64_EQ = 0, AARCH64_NE, AARCH64_CS, AARCH64_CC, AARCH64_MI, AARCH64_PL, 270 AARCH64_VS, AARCH64_VC, AARCH64_HI, AARCH64_LS, AARCH64_GE, AARCH64_LT, 271 AARCH64_GT, AARCH64_LE, AARCH64_AL, AARCH64_NV 272 } 273 aarch64_cc; 274 275 #define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1)) 276 277 /* The condition codes of the processor, and the inverse function. */ 278 static const char * const aarch64_condition_codes[] = 279 { 280 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc", 281 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv" 282 }; 283 284 /* Provide a mapping from gcc register numbers to dwarf register numbers. */ 285 unsigned 286 aarch64_dbx_register_number (unsigned regno) 287 { 288 if (GP_REGNUM_P (regno)) 289 return AARCH64_DWARF_R0 + regno - R0_REGNUM; 290 else if (regno == SP_REGNUM) 291 return AARCH64_DWARF_SP; 292 else if (FP_REGNUM_P (regno)) 293 return AARCH64_DWARF_V0 + regno - V0_REGNUM; 294 295 /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no 296 equivalent DWARF register. */ 297 return DWARF_FRAME_REGISTERS; 298 } 299 300 /* Return TRUE if MODE is any of the large INT modes. */ 301 static bool 302 aarch64_vect_struct_mode_p (enum machine_mode mode) 303 { 304 return mode == OImode || mode == CImode || mode == XImode; 305 } 306 307 /* Return TRUE if MODE is any of the vector modes. */ 308 static bool 309 aarch64_vector_mode_p (enum machine_mode mode) 310 { 311 return aarch64_vector_mode_supported_p (mode) 312 || aarch64_vect_struct_mode_p (mode); 313 } 314 315 /* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P. */ 316 static bool 317 aarch64_array_mode_supported_p (enum machine_mode mode, 318 unsigned HOST_WIDE_INT nelems) 319 { 320 if (TARGET_SIMD 321 && AARCH64_VALID_SIMD_QREG_MODE (mode) 322 && (nelems >= 2 && nelems <= 4)) 323 return true; 324 325 return false; 326 } 327 328 /* Implement HARD_REGNO_NREGS. */ 329 330 int 331 aarch64_hard_regno_nregs (unsigned regno, enum machine_mode mode) 332 { 333 switch (aarch64_regno_regclass (regno)) 334 { 335 case FP_REGS: 336 case FP_LO_REGS: 337 return (GET_MODE_SIZE (mode) + UNITS_PER_VREG - 1) / UNITS_PER_VREG; 338 default: 339 return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD; 340 } 341 gcc_unreachable (); 342 } 343 344 /* Implement HARD_REGNO_MODE_OK. */ 345 346 int 347 aarch64_hard_regno_mode_ok (unsigned regno, enum machine_mode mode) 348 { 349 if (GET_MODE_CLASS (mode) == MODE_CC) 350 return regno == CC_REGNUM; 351 352 if (regno == SP_REGNUM || regno == FRAME_POINTER_REGNUM 353 || regno == ARG_POINTER_REGNUM) 354 return mode == Pmode; 355 356 if (GP_REGNUM_P (regno) && ! aarch64_vect_struct_mode_p (mode)) 357 return 1; 358 359 if (FP_REGNUM_P (regno)) 360 { 361 if (aarch64_vect_struct_mode_p (mode)) 362 return 363 (regno + aarch64_hard_regno_nregs (regno, mode) - 1) <= V31_REGNUM; 364 else 365 return 1; 366 } 367 368 return 0; 369 } 370 371 /* Return true if calls to DECL should be treated as 372 long-calls (ie called via a register). */ 373 static bool 374 aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED) 375 { 376 return false; 377 } 378 379 /* Return true if calls to symbol-ref SYM should be treated as 380 long-calls (ie called via a register). */ 381 bool 382 aarch64_is_long_call_p (rtx sym) 383 { 384 return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym)); 385 } 386 387 /* Return true if the offsets to a zero/sign-extract operation 388 represent an expression that matches an extend operation. The 389 operands represent the paramters from 390 391 (extract (mult (reg) (mult_imm)) (extract_imm) (const_int 0)). */ 392 bool 393 aarch64_is_extend_from_extract (enum machine_mode mode, rtx mult_imm, 394 rtx extract_imm) 395 { 396 HOST_WIDE_INT mult_val, extract_val; 397 398 if (! CONST_INT_P (mult_imm) || ! CONST_INT_P (extract_imm)) 399 return false; 400 401 mult_val = INTVAL (mult_imm); 402 extract_val = INTVAL (extract_imm); 403 404 if (extract_val > 8 405 && extract_val < GET_MODE_BITSIZE (mode) 406 && exact_log2 (extract_val & ~7) > 0 407 && (extract_val & 7) <= 4 408 && mult_val == (1 << (extract_val & 7))) 409 return true; 410 411 return false; 412 } 413 414 /* Emit an insn that's a simple single-set. Both the operands must be 415 known to be valid. */ 416 inline static rtx 417 emit_set_insn (rtx x, rtx y) 418 { 419 return emit_insn (gen_rtx_SET (VOIDmode, x, y)); 420 } 421 422 /* X and Y are two things to compare using CODE. Emit the compare insn and 423 return the rtx for register 0 in the proper mode. */ 424 rtx 425 aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y) 426 { 427 enum machine_mode mode = SELECT_CC_MODE (code, x, y); 428 rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM); 429 430 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y)); 431 return cc_reg; 432 } 433 434 /* Build the SYMBOL_REF for __tls_get_addr. */ 435 436 static GTY(()) rtx tls_get_addr_libfunc; 437 438 rtx 439 aarch64_tls_get_addr (void) 440 { 441 if (!tls_get_addr_libfunc) 442 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr"); 443 return tls_get_addr_libfunc; 444 } 445 446 /* Return the TLS model to use for ADDR. */ 447 448 static enum tls_model 449 tls_symbolic_operand_type (rtx addr) 450 { 451 enum tls_model tls_kind = TLS_MODEL_NONE; 452 rtx sym, addend; 453 454 if (GET_CODE (addr) == CONST) 455 { 456 split_const (addr, &sym, &addend); 457 if (GET_CODE (sym) == SYMBOL_REF) 458 tls_kind = SYMBOL_REF_TLS_MODEL (sym); 459 } 460 else if (GET_CODE (addr) == SYMBOL_REF) 461 tls_kind = SYMBOL_REF_TLS_MODEL (addr); 462 463 return tls_kind; 464 } 465 466 /* We'll allow lo_sum's in addresses in our legitimate addresses 467 so that combine would take care of combining addresses where 468 necessary, but for generation purposes, we'll generate the address 469 as : 470 RTL Absolute 471 tmp = hi (symbol_ref); adrp x1, foo 472 dest = lo_sum (tmp, symbol_ref); add dest, x1, :lo_12:foo 473 nop 474 475 PIC TLS 476 adrp x1, :got:foo adrp tmp, :tlsgd:foo 477 ldr x1, [:got_lo12:foo] add dest, tmp, :tlsgd_lo12:foo 478 bl __tls_get_addr 479 nop 480 481 Load TLS symbol, depending on TLS mechanism and TLS access model. 482 483 Global Dynamic - Traditional TLS: 484 adrp tmp, :tlsgd:imm 485 add dest, tmp, #:tlsgd_lo12:imm 486 bl __tls_get_addr 487 488 Global Dynamic - TLS Descriptors: 489 adrp dest, :tlsdesc:imm 490 ldr tmp, [dest, #:tlsdesc_lo12:imm] 491 add dest, dest, #:tlsdesc_lo12:imm 492 blr tmp 493 mrs tp, tpidr_el0 494 add dest, dest, tp 495 496 Initial Exec: 497 mrs tp, tpidr_el0 498 adrp tmp, :gottprel:imm 499 ldr dest, [tmp, #:gottprel_lo12:imm] 500 add dest, dest, tp 501 502 Local Exec: 503 mrs tp, tpidr_el0 504 add t0, tp, #:tprel_hi12:imm 505 add t0, #:tprel_lo12_nc:imm 506 */ 507 508 static void 509 aarch64_load_symref_appropriately (rtx dest, rtx imm, 510 enum aarch64_symbol_type type) 511 { 512 switch (type) 513 { 514 case SYMBOL_SMALL_ABSOLUTE: 515 { 516 rtx tmp_reg = dest; 517 if (can_create_pseudo_p ()) 518 { 519 tmp_reg = gen_reg_rtx (Pmode); 520 } 521 522 emit_move_insn (tmp_reg, gen_rtx_HIGH (Pmode, imm)); 523 emit_insn (gen_add_losym (dest, tmp_reg, imm)); 524 return; 525 } 526 527 case SYMBOL_SMALL_GOT: 528 { 529 rtx tmp_reg = dest; 530 if (can_create_pseudo_p ()) 531 { 532 tmp_reg = gen_reg_rtx (Pmode); 533 } 534 emit_move_insn (tmp_reg, gen_rtx_HIGH (Pmode, imm)); 535 emit_insn (gen_ldr_got_small (dest, tmp_reg, imm)); 536 return; 537 } 538 539 case SYMBOL_SMALL_TLSGD: 540 { 541 rtx insns; 542 rtx result = gen_rtx_REG (Pmode, R0_REGNUM); 543 544 start_sequence (); 545 emit_call_insn (gen_tlsgd_small (result, imm)); 546 insns = get_insns (); 547 end_sequence (); 548 549 RTL_CONST_CALL_P (insns) = 1; 550 emit_libcall_block (insns, dest, result, imm); 551 return; 552 } 553 554 case SYMBOL_SMALL_TLSDESC: 555 { 556 rtx x0 = gen_rtx_REG (Pmode, R0_REGNUM); 557 rtx tp; 558 559 emit_insn (gen_tlsdesc_small (imm)); 560 tp = aarch64_load_tp (NULL); 561 emit_insn (gen_rtx_SET (Pmode, dest, gen_rtx_PLUS (Pmode, tp, x0))); 562 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm); 563 return; 564 } 565 566 case SYMBOL_SMALL_GOTTPREL: 567 { 568 rtx tmp_reg = gen_reg_rtx (Pmode); 569 rtx tp = aarch64_load_tp (NULL); 570 emit_insn (gen_tlsie_small (tmp_reg, imm)); 571 emit_insn (gen_rtx_SET (Pmode, dest, gen_rtx_PLUS (Pmode, tp, tmp_reg))); 572 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm); 573 return; 574 } 575 576 case SYMBOL_SMALL_TPREL: 577 { 578 rtx tp = aarch64_load_tp (NULL); 579 emit_insn (gen_tlsle_small (dest, tp, imm)); 580 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm); 581 return; 582 } 583 584 default: 585 gcc_unreachable (); 586 } 587 } 588 589 /* Emit a move from SRC to DEST. Assume that the move expanders can 590 handle all moves if !can_create_pseudo_p (). The distinction is 591 important because, unlike emit_move_insn, the move expanders know 592 how to force Pmode objects into the constant pool even when the 593 constant pool address is not itself legitimate. */ 594 static rtx 595 aarch64_emit_move (rtx dest, rtx src) 596 { 597 return (can_create_pseudo_p () 598 ? emit_move_insn (dest, src) 599 : emit_move_insn_1 (dest, src)); 600 } 601 602 void 603 aarch64_split_128bit_move (rtx dst, rtx src) 604 { 605 rtx low_dst; 606 607 gcc_assert (GET_MODE (dst) == TImode); 608 609 if (REG_P (dst) && REG_P (src)) 610 { 611 int src_regno = REGNO (src); 612 int dst_regno = REGNO (dst); 613 614 gcc_assert (GET_MODE (src) == TImode); 615 616 /* Handle r -> w, w -> r. */ 617 if (FP_REGNUM_P (dst_regno) && GP_REGNUM_P (src_regno)) 618 { 619 emit_insn (gen_aarch64_movtilow_di (dst, 620 gen_lowpart (word_mode, src))); 621 emit_insn (gen_aarch64_movtihigh_di (dst, 622 gen_highpart (word_mode, src))); 623 return; 624 } 625 else if (GP_REGNUM_P (dst_regno) && FP_REGNUM_P (src_regno)) 626 { 627 emit_insn (gen_aarch64_movdi_tilow (gen_lowpart (word_mode, dst), 628 src)); 629 emit_insn (gen_aarch64_movdi_tihigh (gen_highpart (word_mode, dst), 630 src)); 631 return; 632 } 633 /* Fall through to r -> r cases. */ 634 } 635 636 low_dst = gen_lowpart (word_mode, dst); 637 if (REG_P (low_dst) 638 && reg_overlap_mentioned_p (low_dst, src)) 639 { 640 aarch64_emit_move (gen_highpart (word_mode, dst), 641 gen_highpart_mode (word_mode, TImode, src)); 642 aarch64_emit_move (low_dst, gen_lowpart (word_mode, src)); 643 } 644 else 645 { 646 aarch64_emit_move (low_dst, gen_lowpart (word_mode, src)); 647 aarch64_emit_move (gen_highpart (word_mode, dst), 648 gen_highpart_mode (word_mode, TImode, src)); 649 } 650 } 651 652 bool 653 aarch64_split_128bit_move_p (rtx dst, rtx src) 654 { 655 return (! REG_P (src) 656 || ! (FP_REGNUM_P (REGNO (dst)) && FP_REGNUM_P (REGNO (src)))); 657 } 658 659 static rtx 660 aarch64_force_temporary (rtx x, rtx value) 661 { 662 if (can_create_pseudo_p ()) 663 return force_reg (Pmode, value); 664 else 665 { 666 x = aarch64_emit_move (x, value); 667 return x; 668 } 669 } 670 671 672 static rtx 673 aarch64_add_offset (enum machine_mode mode, rtx temp, rtx reg, HOST_WIDE_INT offset) 674 { 675 if (!aarch64_plus_immediate (GEN_INT (offset), DImode)) 676 { 677 rtx high; 678 /* Load the full offset into a register. This 679 might be improvable in the future. */ 680 high = GEN_INT (offset); 681 offset = 0; 682 high = aarch64_force_temporary (temp, high); 683 reg = aarch64_force_temporary (temp, gen_rtx_PLUS (Pmode, high, reg)); 684 } 685 return plus_constant (mode, reg, offset); 686 } 687 688 void 689 aarch64_expand_mov_immediate (rtx dest, rtx imm) 690 { 691 enum machine_mode mode = GET_MODE (dest); 692 unsigned HOST_WIDE_INT mask; 693 int i; 694 bool first; 695 unsigned HOST_WIDE_INT val; 696 bool subtargets; 697 rtx subtarget; 698 int one_match, zero_match; 699 700 gcc_assert (mode == SImode || mode == DImode); 701 702 /* Check on what type of symbol it is. */ 703 if (GET_CODE (imm) == SYMBOL_REF 704 || GET_CODE (imm) == LABEL_REF 705 || GET_CODE (imm) == CONST) 706 { 707 rtx mem, base, offset; 708 enum aarch64_symbol_type sty; 709 710 /* If we have (const (plus symbol offset)), separate out the offset 711 before we start classifying the symbol. */ 712 split_const (imm, &base, &offset); 713 714 sty = aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR); 715 switch (sty) 716 { 717 case SYMBOL_FORCE_TO_MEM: 718 if (offset != const0_rtx 719 && targetm.cannot_force_const_mem (mode, imm)) 720 { 721 gcc_assert(can_create_pseudo_p ()); 722 base = aarch64_force_temporary (dest, base); 723 base = aarch64_add_offset (mode, NULL, base, INTVAL (offset)); 724 aarch64_emit_move (dest, base); 725 return; 726 } 727 mem = force_const_mem (mode, imm); 728 gcc_assert (mem); 729 emit_insn (gen_rtx_SET (VOIDmode, dest, mem)); 730 return; 731 732 case SYMBOL_SMALL_TLSGD: 733 case SYMBOL_SMALL_TLSDESC: 734 case SYMBOL_SMALL_GOTTPREL: 735 case SYMBOL_SMALL_GOT: 736 if (offset != const0_rtx) 737 { 738 gcc_assert(can_create_pseudo_p ()); 739 base = aarch64_force_temporary (dest, base); 740 base = aarch64_add_offset (mode, NULL, base, INTVAL (offset)); 741 aarch64_emit_move (dest, base); 742 return; 743 } 744 /* FALLTHRU */ 745 746 case SYMBOL_SMALL_TPREL: 747 case SYMBOL_SMALL_ABSOLUTE: 748 aarch64_load_symref_appropriately (dest, imm, sty); 749 return; 750 751 default: 752 gcc_unreachable (); 753 } 754 } 755 756 if (CONST_INT_P (imm) && aarch64_move_imm (INTVAL (imm), mode)) 757 { 758 emit_insn (gen_rtx_SET (VOIDmode, dest, imm)); 759 return; 760 } 761 762 if (!CONST_INT_P (imm)) 763 { 764 if (GET_CODE (imm) == HIGH) 765 emit_insn (gen_rtx_SET (VOIDmode, dest, imm)); 766 else 767 { 768 rtx mem = force_const_mem (mode, imm); 769 gcc_assert (mem); 770 emit_insn (gen_rtx_SET (VOIDmode, dest, mem)); 771 } 772 773 return; 774 } 775 776 if (mode == SImode) 777 { 778 /* We know we can't do this in 1 insn, and we must be able to do it 779 in two; so don't mess around looking for sequences that don't buy 780 us anything. */ 781 emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (INTVAL (imm) & 0xffff))); 782 emit_insn (gen_insv_immsi (dest, GEN_INT (16), 783 GEN_INT ((INTVAL (imm) >> 16) & 0xffff))); 784 return; 785 } 786 787 /* Remaining cases are all for DImode. */ 788 789 val = INTVAL (imm); 790 subtargets = optimize && can_create_pseudo_p (); 791 792 one_match = 0; 793 zero_match = 0; 794 mask = 0xffff; 795 796 for (i = 0; i < 64; i += 16, mask <<= 16) 797 { 798 if ((val & mask) == 0) 799 zero_match++; 800 else if ((val & mask) == mask) 801 one_match++; 802 } 803 804 if (one_match == 2) 805 { 806 mask = 0xffff; 807 for (i = 0; i < 64; i += 16, mask <<= 16) 808 { 809 if ((val & mask) != mask) 810 { 811 emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (val | mask))); 812 emit_insn (gen_insv_immdi (dest, GEN_INT (i), 813 GEN_INT ((val >> i) & 0xffff))); 814 return; 815 } 816 } 817 gcc_unreachable (); 818 } 819 820 if (zero_match == 2) 821 goto simple_sequence; 822 823 mask = 0x0ffff0000UL; 824 for (i = 16; i < 64; i += 16, mask <<= 16) 825 { 826 HOST_WIDE_INT comp = mask & ~(mask - 1); 827 828 if (aarch64_uimm12_shift (val - (val & mask))) 829 { 830 subtarget = subtargets ? gen_reg_rtx (DImode) : dest; 831 832 emit_insn (gen_rtx_SET (VOIDmode, subtarget, GEN_INT (val & mask))); 833 emit_insn (gen_adddi3 (dest, subtarget, 834 GEN_INT (val - (val & mask)))); 835 return; 836 } 837 else if (aarch64_uimm12_shift (-(val - ((val + comp) & mask)))) 838 { 839 subtarget = subtargets ? gen_reg_rtx (DImode) : dest; 840 841 emit_insn (gen_rtx_SET (VOIDmode, subtarget, 842 GEN_INT ((val + comp) & mask))); 843 emit_insn (gen_adddi3 (dest, subtarget, 844 GEN_INT (val - ((val + comp) & mask)))); 845 return; 846 } 847 else if (aarch64_uimm12_shift (val - ((val - comp) | ~mask))) 848 { 849 subtarget = subtargets ? gen_reg_rtx (DImode) : dest; 850 851 emit_insn (gen_rtx_SET (VOIDmode, subtarget, 852 GEN_INT ((val - comp) | ~mask))); 853 emit_insn (gen_adddi3 (dest, subtarget, 854 GEN_INT (val - ((val - comp) | ~mask)))); 855 return; 856 } 857 else if (aarch64_uimm12_shift (-(val - (val | ~mask)))) 858 { 859 subtarget = subtargets ? gen_reg_rtx (DImode) : dest; 860 861 emit_insn (gen_rtx_SET (VOIDmode, subtarget, 862 GEN_INT (val | ~mask))); 863 emit_insn (gen_adddi3 (dest, subtarget, 864 GEN_INT (val - (val | ~mask)))); 865 return; 866 } 867 } 868 869 /* See if we can do it by arithmetically combining two 870 immediates. */ 871 for (i = 0; i < AARCH64_NUM_BITMASKS; i++) 872 { 873 int j; 874 mask = 0xffff; 875 876 if (aarch64_uimm12_shift (val - aarch64_bitmasks[i]) 877 || aarch64_uimm12_shift (-val + aarch64_bitmasks[i])) 878 { 879 subtarget = subtargets ? gen_reg_rtx (DImode) : dest; 880 emit_insn (gen_rtx_SET (VOIDmode, subtarget, 881 GEN_INT (aarch64_bitmasks[i]))); 882 emit_insn (gen_adddi3 (dest, subtarget, 883 GEN_INT (val - aarch64_bitmasks[i]))); 884 return; 885 } 886 887 for (j = 0; j < 64; j += 16, mask <<= 16) 888 { 889 if ((aarch64_bitmasks[i] & ~mask) == (val & ~mask)) 890 { 891 emit_insn (gen_rtx_SET (VOIDmode, dest, 892 GEN_INT (aarch64_bitmasks[i]))); 893 emit_insn (gen_insv_immdi (dest, GEN_INT (j), 894 GEN_INT ((val >> j) & 0xffff))); 895 return; 896 } 897 } 898 } 899 900 /* See if we can do it by logically combining two immediates. */ 901 for (i = 0; i < AARCH64_NUM_BITMASKS; i++) 902 { 903 if ((aarch64_bitmasks[i] & val) == aarch64_bitmasks[i]) 904 { 905 int j; 906 907 for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++) 908 if (val == (aarch64_bitmasks[i] | aarch64_bitmasks[j])) 909 { 910 subtarget = subtargets ? gen_reg_rtx (mode) : dest; 911 emit_insn (gen_rtx_SET (VOIDmode, subtarget, 912 GEN_INT (aarch64_bitmasks[i]))); 913 emit_insn (gen_iordi3 (dest, subtarget, 914 GEN_INT (aarch64_bitmasks[j]))); 915 return; 916 } 917 } 918 else if ((val & aarch64_bitmasks[i]) == val) 919 { 920 int j; 921 922 for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++) 923 if (val == (aarch64_bitmasks[j] & aarch64_bitmasks[i])) 924 { 925 926 subtarget = subtargets ? gen_reg_rtx (mode) : dest; 927 emit_insn (gen_rtx_SET (VOIDmode, subtarget, 928 GEN_INT (aarch64_bitmasks[j]))); 929 emit_insn (gen_anddi3 (dest, subtarget, 930 GEN_INT (aarch64_bitmasks[i]))); 931 return; 932 } 933 } 934 } 935 936 simple_sequence: 937 first = true; 938 mask = 0xffff; 939 for (i = 0; i < 64; i += 16, mask <<= 16) 940 { 941 if ((val & mask) != 0) 942 { 943 if (first) 944 { 945 emit_insn (gen_rtx_SET (VOIDmode, dest, 946 GEN_INT (val & mask))); 947 first = false; 948 } 949 else 950 emit_insn (gen_insv_immdi (dest, GEN_INT (i), 951 GEN_INT ((val >> i) & 0xffff))); 952 } 953 } 954 } 955 956 static bool 957 aarch64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED) 958 { 959 /* Indirect calls are not currently supported. */ 960 if (decl == NULL) 961 return false; 962 963 /* Cannot tail-call to long-calls, since these are outside of the 964 range of a branch instruction (we could handle this if we added 965 support for indirect tail-calls. */ 966 if (aarch64_decl_is_long_call_p (decl)) 967 return false; 968 969 return true; 970 } 971 972 /* Implement TARGET_PASS_BY_REFERENCE. */ 973 974 static bool 975 aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED, 976 enum machine_mode mode, 977 const_tree type, 978 bool named ATTRIBUTE_UNUSED) 979 { 980 HOST_WIDE_INT size; 981 enum machine_mode dummymode; 982 int nregs; 983 984 /* GET_MODE_SIZE (BLKmode) is useless since it is 0. */ 985 size = (mode == BLKmode && type) 986 ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode); 987 988 if (type) 989 { 990 /* Arrays always passed by reference. */ 991 if (TREE_CODE (type) == ARRAY_TYPE) 992 return true; 993 /* Other aggregates based on their size. */ 994 if (AGGREGATE_TYPE_P (type)) 995 size = int_size_in_bytes (type); 996 } 997 998 /* Variable sized arguments are always returned by reference. */ 999 if (size < 0) 1000 return true; 1001 1002 /* Can this be a candidate to be passed in fp/simd register(s)? */ 1003 if (aarch64_vfp_is_call_or_return_candidate (mode, type, 1004 &dummymode, &nregs, 1005 NULL)) 1006 return false; 1007 1008 /* Arguments which are variable sized or larger than 2 registers are 1009 passed by reference unless they are a homogenous floating point 1010 aggregate. */ 1011 return size > 2 * UNITS_PER_WORD; 1012 } 1013 1014 /* Return TRUE if VALTYPE is padded to its least significant bits. */ 1015 static bool 1016 aarch64_return_in_msb (const_tree valtype) 1017 { 1018 enum machine_mode dummy_mode; 1019 int dummy_int; 1020 1021 /* Never happens in little-endian mode. */ 1022 if (!BYTES_BIG_ENDIAN) 1023 return false; 1024 1025 /* Only composite types smaller than or equal to 16 bytes can 1026 be potentially returned in registers. */ 1027 if (!aarch64_composite_type_p (valtype, TYPE_MODE (valtype)) 1028 || int_size_in_bytes (valtype) <= 0 1029 || int_size_in_bytes (valtype) > 16) 1030 return false; 1031 1032 /* But not a composite that is an HFA (Homogeneous Floating-point Aggregate) 1033 or an HVA (Homogeneous Short-Vector Aggregate); such a special composite 1034 is always passed/returned in the least significant bits of fp/simd 1035 register(s). */ 1036 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (valtype), valtype, 1037 &dummy_mode, &dummy_int, NULL)) 1038 return false; 1039 1040 return true; 1041 } 1042 1043 /* Implement TARGET_FUNCTION_VALUE. 1044 Define how to find the value returned by a function. */ 1045 1046 static rtx 1047 aarch64_function_value (const_tree type, const_tree func, 1048 bool outgoing ATTRIBUTE_UNUSED) 1049 { 1050 enum machine_mode mode; 1051 int unsignedp; 1052 int count; 1053 enum machine_mode ag_mode; 1054 1055 mode = TYPE_MODE (type); 1056 if (INTEGRAL_TYPE_P (type)) 1057 mode = promote_function_mode (type, mode, &unsignedp, func, 1); 1058 1059 if (aarch64_return_in_msb (type)) 1060 { 1061 HOST_WIDE_INT size = int_size_in_bytes (type); 1062 1063 if (size % UNITS_PER_WORD != 0) 1064 { 1065 size += UNITS_PER_WORD - size % UNITS_PER_WORD; 1066 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0); 1067 } 1068 } 1069 1070 if (aarch64_vfp_is_call_or_return_candidate (mode, type, 1071 &ag_mode, &count, NULL)) 1072 { 1073 if (!aarch64_composite_type_p (type, mode)) 1074 { 1075 gcc_assert (count == 1 && mode == ag_mode); 1076 return gen_rtx_REG (mode, V0_REGNUM); 1077 } 1078 else 1079 { 1080 int i; 1081 rtx par; 1082 1083 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count)); 1084 for (i = 0; i < count; i++) 1085 { 1086 rtx tmp = gen_rtx_REG (ag_mode, V0_REGNUM + i); 1087 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, 1088 GEN_INT (i * GET_MODE_SIZE (ag_mode))); 1089 XVECEXP (par, 0, i) = tmp; 1090 } 1091 return par; 1092 } 1093 } 1094 else 1095 return gen_rtx_REG (mode, R0_REGNUM); 1096 } 1097 1098 /* Implements TARGET_FUNCTION_VALUE_REGNO_P. 1099 Return true if REGNO is the number of a hard register in which the values 1100 of called function may come back. */ 1101 1102 static bool 1103 aarch64_function_value_regno_p (const unsigned int regno) 1104 { 1105 /* Maximum of 16 bytes can be returned in the general registers. Examples 1106 of 16-byte return values are: 128-bit integers and 16-byte small 1107 structures (excluding homogeneous floating-point aggregates). */ 1108 if (regno == R0_REGNUM || regno == R1_REGNUM) 1109 return true; 1110 1111 /* Up to four fp/simd registers can return a function value, e.g. a 1112 homogeneous floating-point aggregate having four members. */ 1113 if (regno >= V0_REGNUM && regno < V0_REGNUM + HA_MAX_NUM_FLDS) 1114 return !TARGET_GENERAL_REGS_ONLY; 1115 1116 return false; 1117 } 1118 1119 /* Implement TARGET_RETURN_IN_MEMORY. 1120 1121 If the type T of the result of a function is such that 1122 void func (T arg) 1123 would require that arg be passed as a value in a register (or set of 1124 registers) according to the parameter passing rules, then the result 1125 is returned in the same registers as would be used for such an 1126 argument. */ 1127 1128 static bool 1129 aarch64_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED) 1130 { 1131 HOST_WIDE_INT size; 1132 enum machine_mode ag_mode; 1133 int count; 1134 1135 if (!AGGREGATE_TYPE_P (type) 1136 && TREE_CODE (type) != COMPLEX_TYPE 1137 && TREE_CODE (type) != VECTOR_TYPE) 1138 /* Simple scalar types always returned in registers. */ 1139 return false; 1140 1141 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type), 1142 type, 1143 &ag_mode, 1144 &count, 1145 NULL)) 1146 return false; 1147 1148 /* Types larger than 2 registers returned in memory. */ 1149 size = int_size_in_bytes (type); 1150 return (size < 0 || size > 2 * UNITS_PER_WORD); 1151 } 1152 1153 static bool 1154 aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v, enum machine_mode mode, 1155 const_tree type, int *nregs) 1156 { 1157 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v); 1158 return aarch64_vfp_is_call_or_return_candidate (mode, 1159 type, 1160 &pcum->aapcs_vfp_rmode, 1161 nregs, 1162 NULL); 1163 } 1164 1165 /* Given MODE and TYPE of a function argument, return the alignment in 1166 bits. The idea is to suppress any stronger alignment requested by 1167 the user and opt for the natural alignment (specified in AAPCS64 \S 4.1). 1168 This is a helper function for local use only. */ 1169 1170 static unsigned int 1171 aarch64_function_arg_alignment (enum machine_mode mode, const_tree type) 1172 { 1173 unsigned int alignment; 1174 1175 if (type) 1176 { 1177 if (!integer_zerop (TYPE_SIZE (type))) 1178 { 1179 if (TYPE_MODE (type) == mode) 1180 alignment = TYPE_ALIGN (type); 1181 else 1182 alignment = GET_MODE_ALIGNMENT (mode); 1183 } 1184 else 1185 alignment = 0; 1186 } 1187 else 1188 alignment = GET_MODE_ALIGNMENT (mode); 1189 1190 return alignment; 1191 } 1192 1193 /* Layout a function argument according to the AAPCS64 rules. The rule 1194 numbers refer to the rule numbers in the AAPCS64. */ 1195 1196 static void 1197 aarch64_layout_arg (cumulative_args_t pcum_v, enum machine_mode mode, 1198 const_tree type, 1199 bool named ATTRIBUTE_UNUSED) 1200 { 1201 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v); 1202 int ncrn, nvrn, nregs; 1203 bool allocate_ncrn, allocate_nvrn; 1204 HOST_WIDE_INT size; 1205 1206 /* We need to do this once per argument. */ 1207 if (pcum->aapcs_arg_processed) 1208 return; 1209 1210 pcum->aapcs_arg_processed = true; 1211 1212 /* Size in bytes, rounded to the nearest multiple of 8 bytes. */ 1213 size 1214 = AARCH64_ROUND_UP (type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode), 1215 UNITS_PER_WORD); 1216 1217 allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P (mode); 1218 allocate_nvrn = aarch64_vfp_is_call_candidate (pcum_v, 1219 mode, 1220 type, 1221 &nregs); 1222 1223 /* allocate_ncrn may be false-positive, but allocate_nvrn is quite reliable. 1224 The following code thus handles passing by SIMD/FP registers first. */ 1225 1226 nvrn = pcum->aapcs_nvrn; 1227 1228 /* C1 - C5 for floating point, homogenous floating point aggregates (HFA) 1229 and homogenous short-vector aggregates (HVA). */ 1230 if (allocate_nvrn) 1231 { 1232 if (nvrn + nregs <= NUM_FP_ARG_REGS) 1233 { 1234 pcum->aapcs_nextnvrn = nvrn + nregs; 1235 if (!aarch64_composite_type_p (type, mode)) 1236 { 1237 gcc_assert (nregs == 1); 1238 pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn); 1239 } 1240 else 1241 { 1242 rtx par; 1243 int i; 1244 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs)); 1245 for (i = 0; i < nregs; i++) 1246 { 1247 rtx tmp = gen_rtx_REG (pcum->aapcs_vfp_rmode, 1248 V0_REGNUM + nvrn + i); 1249 tmp = gen_rtx_EXPR_LIST 1250 (VOIDmode, tmp, 1251 GEN_INT (i * GET_MODE_SIZE (pcum->aapcs_vfp_rmode))); 1252 XVECEXP (par, 0, i) = tmp; 1253 } 1254 pcum->aapcs_reg = par; 1255 } 1256 return; 1257 } 1258 else 1259 { 1260 /* C.3 NSRN is set to 8. */ 1261 pcum->aapcs_nextnvrn = NUM_FP_ARG_REGS; 1262 goto on_stack; 1263 } 1264 } 1265 1266 ncrn = pcum->aapcs_ncrn; 1267 nregs = size / UNITS_PER_WORD; 1268 1269 /* C6 - C9. though the sign and zero extension semantics are 1270 handled elsewhere. This is the case where the argument fits 1271 entirely general registers. */ 1272 if (allocate_ncrn && (ncrn + nregs <= NUM_ARG_REGS)) 1273 { 1274 unsigned int alignment = aarch64_function_arg_alignment (mode, type); 1275 1276 gcc_assert (nregs == 0 || nregs == 1 || nregs == 2); 1277 1278 /* C.8 if the argument has an alignment of 16 then the NGRN is 1279 rounded up to the next even number. */ 1280 if (nregs == 2 && alignment == 16 * BITS_PER_UNIT && ncrn % 2) 1281 { 1282 ++ncrn; 1283 gcc_assert (ncrn + nregs <= NUM_ARG_REGS); 1284 } 1285 /* NREGS can be 0 when e.g. an empty structure is to be passed. 1286 A reg is still generated for it, but the caller should be smart 1287 enough not to use it. */ 1288 if (nregs == 0 || nregs == 1 || GET_MODE_CLASS (mode) == MODE_INT) 1289 { 1290 pcum->aapcs_reg = gen_rtx_REG (mode, R0_REGNUM + ncrn); 1291 } 1292 else 1293 { 1294 rtx par; 1295 int i; 1296 1297 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs)); 1298 for (i = 0; i < nregs; i++) 1299 { 1300 rtx tmp = gen_rtx_REG (word_mode, R0_REGNUM + ncrn + i); 1301 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, 1302 GEN_INT (i * UNITS_PER_WORD)); 1303 XVECEXP (par, 0, i) = tmp; 1304 } 1305 pcum->aapcs_reg = par; 1306 } 1307 1308 pcum->aapcs_nextncrn = ncrn + nregs; 1309 return; 1310 } 1311 1312 /* C.11 */ 1313 pcum->aapcs_nextncrn = NUM_ARG_REGS; 1314 1315 /* The argument is passed on stack; record the needed number of words for 1316 this argument and align the total size if necessary. */ 1317 on_stack: 1318 pcum->aapcs_stack_words = size / UNITS_PER_WORD; 1319 if (aarch64_function_arg_alignment (mode, type) == 16 * BITS_PER_UNIT) 1320 pcum->aapcs_stack_size = AARCH64_ROUND_UP (pcum->aapcs_stack_size, 1321 16 / UNITS_PER_WORD); 1322 return; 1323 } 1324 1325 /* Implement TARGET_FUNCTION_ARG. */ 1326 1327 static rtx 1328 aarch64_function_arg (cumulative_args_t pcum_v, enum machine_mode mode, 1329 const_tree type, bool named) 1330 { 1331 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v); 1332 gcc_assert (pcum->pcs_variant == ARM_PCS_AAPCS64); 1333 1334 if (mode == VOIDmode) 1335 return NULL_RTX; 1336 1337 aarch64_layout_arg (pcum_v, mode, type, named); 1338 return pcum->aapcs_reg; 1339 } 1340 1341 void 1342 aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum, 1343 const_tree fntype ATTRIBUTE_UNUSED, 1344 rtx libname ATTRIBUTE_UNUSED, 1345 const_tree fndecl ATTRIBUTE_UNUSED, 1346 unsigned n_named ATTRIBUTE_UNUSED) 1347 { 1348 pcum->aapcs_ncrn = 0; 1349 pcum->aapcs_nvrn = 0; 1350 pcum->aapcs_nextncrn = 0; 1351 pcum->aapcs_nextnvrn = 0; 1352 pcum->pcs_variant = ARM_PCS_AAPCS64; 1353 pcum->aapcs_reg = NULL_RTX; 1354 pcum->aapcs_arg_processed = false; 1355 pcum->aapcs_stack_words = 0; 1356 pcum->aapcs_stack_size = 0; 1357 1358 return; 1359 } 1360 1361 static void 1362 aarch64_function_arg_advance (cumulative_args_t pcum_v, 1363 enum machine_mode mode, 1364 const_tree type, 1365 bool named) 1366 { 1367 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v); 1368 if (pcum->pcs_variant == ARM_PCS_AAPCS64) 1369 { 1370 aarch64_layout_arg (pcum_v, mode, type, named); 1371 gcc_assert ((pcum->aapcs_reg != NULL_RTX) 1372 != (pcum->aapcs_stack_words != 0)); 1373 pcum->aapcs_arg_processed = false; 1374 pcum->aapcs_ncrn = pcum->aapcs_nextncrn; 1375 pcum->aapcs_nvrn = pcum->aapcs_nextnvrn; 1376 pcum->aapcs_stack_size += pcum->aapcs_stack_words; 1377 pcum->aapcs_stack_words = 0; 1378 pcum->aapcs_reg = NULL_RTX; 1379 } 1380 } 1381 1382 bool 1383 aarch64_function_arg_regno_p (unsigned regno) 1384 { 1385 return ((GP_REGNUM_P (regno) && regno < R0_REGNUM + NUM_ARG_REGS) 1386 || (FP_REGNUM_P (regno) && regno < V0_REGNUM + NUM_FP_ARG_REGS)); 1387 } 1388 1389 /* Implement FUNCTION_ARG_BOUNDARY. Every parameter gets at least 1390 PARM_BOUNDARY bits of alignment, but will be given anything up 1391 to STACK_BOUNDARY bits if the type requires it. This makes sure 1392 that both before and after the layout of each argument, the Next 1393 Stacked Argument Address (NSAA) will have a minimum alignment of 1394 8 bytes. */ 1395 1396 static unsigned int 1397 aarch64_function_arg_boundary (enum machine_mode mode, const_tree type) 1398 { 1399 unsigned int alignment = aarch64_function_arg_alignment (mode, type); 1400 1401 if (alignment < PARM_BOUNDARY) 1402 alignment = PARM_BOUNDARY; 1403 if (alignment > STACK_BOUNDARY) 1404 alignment = STACK_BOUNDARY; 1405 return alignment; 1406 } 1407 1408 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE). 1409 1410 Return true if an argument passed on the stack should be padded upwards, 1411 i.e. if the least-significant byte of the stack slot has useful data. 1412 1413 Small aggregate types are placed in the lowest memory address. 1414 1415 The related parameter passing rules are B.4, C.3, C.5 and C.14. */ 1416 1417 bool 1418 aarch64_pad_arg_upward (enum machine_mode mode, const_tree type) 1419 { 1420 /* On little-endian targets, the least significant byte of every stack 1421 argument is passed at the lowest byte address of the stack slot. */ 1422 if (!BYTES_BIG_ENDIAN) 1423 return true; 1424 1425 /* Otherwise, integral types and floating point types are padded downward: 1426 the least significant byte of a stack argument is passed at the highest 1427 byte address of the stack slot. */ 1428 if (type 1429 ? (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type)) 1430 : (SCALAR_INT_MODE_P (mode) || SCALAR_FLOAT_MODE_P (mode))) 1431 return false; 1432 1433 /* Everything else padded upward, i.e. data in first byte of stack slot. */ 1434 return true; 1435 } 1436 1437 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST). 1438 1439 It specifies padding for the last (may also be the only) 1440 element of a block move between registers and memory. If 1441 assuming the block is in the memory, padding upward means that 1442 the last element is padded after its highest significant byte, 1443 while in downward padding, the last element is padded at the 1444 its least significant byte side. 1445 1446 Small aggregates and small complex types are always padded 1447 upwards. 1448 1449 We don't need to worry about homogeneous floating-point or 1450 short-vector aggregates; their move is not affected by the 1451 padding direction determined here. Regardless of endianness, 1452 each element of such an aggregate is put in the least 1453 significant bits of a fp/simd register. 1454 1455 Return !BYTES_BIG_ENDIAN if the least significant byte of the 1456 register has useful data, and return the opposite if the most 1457 significant byte does. */ 1458 1459 bool 1460 aarch64_pad_reg_upward (enum machine_mode mode, const_tree type, 1461 bool first ATTRIBUTE_UNUSED) 1462 { 1463 1464 /* Small composite types are always padded upward. */ 1465 if (BYTES_BIG_ENDIAN && aarch64_composite_type_p (type, mode)) 1466 { 1467 HOST_WIDE_INT size = (type ? int_size_in_bytes (type) 1468 : GET_MODE_SIZE (mode)); 1469 if (size < 2 * UNITS_PER_WORD) 1470 return true; 1471 } 1472 1473 /* Otherwise, use the default padding. */ 1474 return !BYTES_BIG_ENDIAN; 1475 } 1476 1477 static enum machine_mode 1478 aarch64_libgcc_cmp_return_mode (void) 1479 { 1480 return SImode; 1481 } 1482 1483 static bool 1484 aarch64_frame_pointer_required (void) 1485 { 1486 /* If the function contains dynamic stack allocations, we need to 1487 use the frame pointer to access the static parts of the frame. */ 1488 if (cfun->calls_alloca) 1489 return true; 1490 1491 /* We may have turned flag_omit_frame_pointer on in order to have this 1492 function called; if we did, we also set the 'faked_omit_frame_pointer' flag 1493 and we'll check it here. 1494 If we really did set flag_omit_frame_pointer normally, then we return false 1495 (no frame pointer required) in all cases. */ 1496 1497 if (flag_omit_frame_pointer && !faked_omit_frame_pointer) 1498 return false; 1499 else if (flag_omit_leaf_frame_pointer) 1500 return !crtl->is_leaf; 1501 return true; 1502 } 1503 1504 /* Mark the registers that need to be saved by the callee and calculate 1505 the size of the callee-saved registers area and frame record (both FP 1506 and LR may be omitted). */ 1507 static void 1508 aarch64_layout_frame (void) 1509 { 1510 HOST_WIDE_INT offset = 0; 1511 int regno; 1512 1513 if (reload_completed && cfun->machine->frame.laid_out) 1514 return; 1515 1516 cfun->machine->frame.fp_lr_offset = 0; 1517 1518 /* First mark all the registers that really need to be saved... */ 1519 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++) 1520 cfun->machine->frame.reg_offset[regno] = -1; 1521 1522 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++) 1523 cfun->machine->frame.reg_offset[regno] = -1; 1524 1525 /* ... that includes the eh data registers (if needed)... */ 1526 if (crtl->calls_eh_return) 1527 for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; regno++) 1528 cfun->machine->frame.reg_offset[EH_RETURN_DATA_REGNO (regno)] = 0; 1529 1530 /* ... and any callee saved register that dataflow says is live. */ 1531 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++) 1532 if (df_regs_ever_live_p (regno) 1533 && !call_used_regs[regno]) 1534 cfun->machine->frame.reg_offset[regno] = 0; 1535 1536 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++) 1537 if (df_regs_ever_live_p (regno) 1538 && !call_used_regs[regno]) 1539 cfun->machine->frame.reg_offset[regno] = 0; 1540 1541 if (frame_pointer_needed) 1542 { 1543 cfun->machine->frame.reg_offset[R30_REGNUM] = 0; 1544 cfun->machine->frame.reg_offset[R29_REGNUM] = 0; 1545 cfun->machine->frame.hardfp_offset = 2 * UNITS_PER_WORD; 1546 } 1547 1548 /* Now assign stack slots for them. */ 1549 for (regno = R0_REGNUM; regno <= R28_REGNUM; regno++) 1550 if (cfun->machine->frame.reg_offset[regno] != -1) 1551 { 1552 cfun->machine->frame.reg_offset[regno] = offset; 1553 offset += UNITS_PER_WORD; 1554 } 1555 1556 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++) 1557 if (cfun->machine->frame.reg_offset[regno] != -1) 1558 { 1559 cfun->machine->frame.reg_offset[regno] = offset; 1560 offset += UNITS_PER_WORD; 1561 } 1562 1563 if (frame_pointer_needed) 1564 { 1565 cfun->machine->frame.reg_offset[R29_REGNUM] = offset; 1566 offset += UNITS_PER_WORD; 1567 cfun->machine->frame.fp_lr_offset = UNITS_PER_WORD; 1568 } 1569 1570 if (cfun->machine->frame.reg_offset[R30_REGNUM] != -1) 1571 { 1572 cfun->machine->frame.reg_offset[R30_REGNUM] = offset; 1573 offset += UNITS_PER_WORD; 1574 cfun->machine->frame.fp_lr_offset += UNITS_PER_WORD; 1575 } 1576 1577 cfun->machine->frame.padding0 = 1578 (AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT) - offset); 1579 offset = AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT); 1580 1581 cfun->machine->frame.saved_regs_size = offset; 1582 cfun->machine->frame.laid_out = true; 1583 } 1584 1585 /* Make the last instruction frame-related and note that it performs 1586 the operation described by FRAME_PATTERN. */ 1587 1588 static void 1589 aarch64_set_frame_expr (rtx frame_pattern) 1590 { 1591 rtx insn; 1592 1593 insn = get_last_insn (); 1594 RTX_FRAME_RELATED_P (insn) = 1; 1595 RTX_FRAME_RELATED_P (frame_pattern) = 1; 1596 REG_NOTES (insn) = alloc_EXPR_LIST (REG_FRAME_RELATED_EXPR, 1597 frame_pattern, 1598 REG_NOTES (insn)); 1599 } 1600 1601 static bool 1602 aarch64_register_saved_on_entry (int regno) 1603 { 1604 return cfun->machine->frame.reg_offset[regno] != -1; 1605 } 1606 1607 1608 static void 1609 aarch64_save_or_restore_fprs (int start_offset, int increment, 1610 bool restore, rtx base_rtx) 1611 1612 { 1613 unsigned regno; 1614 unsigned regno2; 1615 rtx insn; 1616 rtx (*gen_mem_ref)(enum machine_mode, rtx) = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM; 1617 1618 1619 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++) 1620 { 1621 if (aarch64_register_saved_on_entry (regno)) 1622 { 1623 rtx mem; 1624 mem = gen_mem_ref (DFmode, 1625 plus_constant (Pmode, 1626 base_rtx, 1627 start_offset)); 1628 1629 for (regno2 = regno + 1; 1630 regno2 <= V31_REGNUM 1631 && !aarch64_register_saved_on_entry (regno2); 1632 regno2++) 1633 { 1634 /* Empty loop. */ 1635 } 1636 if (regno2 <= V31_REGNUM && 1637 aarch64_register_saved_on_entry (regno2)) 1638 { 1639 rtx mem2; 1640 /* Next highest register to be saved. */ 1641 mem2 = gen_mem_ref (DFmode, 1642 plus_constant 1643 (Pmode, 1644 base_rtx, 1645 start_offset + increment)); 1646 if (restore == false) 1647 { 1648 insn = emit_insn 1649 ( gen_store_pairdf (mem, gen_rtx_REG (DFmode, regno), 1650 mem2, gen_rtx_REG (DFmode, regno2))); 1651 1652 } 1653 else 1654 { 1655 insn = emit_insn 1656 ( gen_load_pairdf (gen_rtx_REG (DFmode, regno), mem, 1657 gen_rtx_REG (DFmode, regno2), mem2)); 1658 1659 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DFmode, regno)); 1660 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DFmode, regno2)); 1661 } 1662 1663 /* The first part of a frame-related parallel insn 1664 is always assumed to be relevant to the frame 1665 calculations; subsequent parts, are only 1666 frame-related if explicitly marked. */ 1667 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1668 1)) = 1; 1669 regno = regno2; 1670 start_offset += increment * 2; 1671 } 1672 else 1673 { 1674 if (restore == false) 1675 insn = emit_move_insn (mem, gen_rtx_REG (DFmode, regno)); 1676 else 1677 { 1678 insn = emit_move_insn (gen_rtx_REG (DFmode, regno), mem); 1679 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno)); 1680 } 1681 start_offset += increment; 1682 } 1683 RTX_FRAME_RELATED_P (insn) = 1; 1684 } 1685 } 1686 1687 } 1688 1689 1690 /* offset from the stack pointer of where the saves and 1691 restore's have to happen. */ 1692 static void 1693 aarch64_save_or_restore_callee_save_registers (HOST_WIDE_INT offset, 1694 bool restore) 1695 { 1696 rtx insn; 1697 rtx base_rtx = stack_pointer_rtx; 1698 HOST_WIDE_INT start_offset = offset; 1699 HOST_WIDE_INT increment = UNITS_PER_WORD; 1700 rtx (*gen_mem_ref)(enum machine_mode, rtx) = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM; 1701 unsigned limit = (frame_pointer_needed)? R28_REGNUM: R30_REGNUM; 1702 unsigned regno; 1703 unsigned regno2; 1704 1705 for (regno = R0_REGNUM; regno <= limit; regno++) 1706 { 1707 if (aarch64_register_saved_on_entry (regno)) 1708 { 1709 rtx mem; 1710 mem = gen_mem_ref (Pmode, 1711 plus_constant (Pmode, 1712 base_rtx, 1713 start_offset)); 1714 1715 for (regno2 = regno + 1; 1716 regno2 <= limit 1717 && !aarch64_register_saved_on_entry (regno2); 1718 regno2++) 1719 { 1720 /* Empty loop. */ 1721 } 1722 if (regno2 <= limit && 1723 aarch64_register_saved_on_entry (regno2)) 1724 { 1725 rtx mem2; 1726 /* Next highest register to be saved. */ 1727 mem2 = gen_mem_ref (Pmode, 1728 plus_constant 1729 (Pmode, 1730 base_rtx, 1731 start_offset + increment)); 1732 if (restore == false) 1733 { 1734 insn = emit_insn 1735 ( gen_store_pairdi (mem, gen_rtx_REG (DImode, regno), 1736 mem2, gen_rtx_REG (DImode, regno2))); 1737 1738 } 1739 else 1740 { 1741 insn = emit_insn 1742 ( gen_load_pairdi (gen_rtx_REG (DImode, regno), mem, 1743 gen_rtx_REG (DImode, regno2), mem2)); 1744 1745 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno)); 1746 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno2)); 1747 } 1748 1749 /* The first part of a frame-related parallel insn 1750 is always assumed to be relevant to the frame 1751 calculations; subsequent parts, are only 1752 frame-related if explicitly marked. */ 1753 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1754 1)) = 1; 1755 regno = regno2; 1756 start_offset += increment * 2; 1757 } 1758 else 1759 { 1760 if (restore == false) 1761 insn = emit_move_insn (mem, gen_rtx_REG (DImode, regno)); 1762 else 1763 { 1764 insn = emit_move_insn (gen_rtx_REG (DImode, regno), mem); 1765 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno)); 1766 } 1767 start_offset += increment; 1768 } 1769 RTX_FRAME_RELATED_P (insn) = 1; 1770 } 1771 } 1772 1773 aarch64_save_or_restore_fprs (start_offset, increment, restore, base_rtx); 1774 1775 } 1776 1777 /* AArch64 stack frames generated by this compiler look like: 1778 1779 +-------------------------------+ 1780 | | 1781 | incoming stack arguments | 1782 | | 1783 +-------------------------------+ <-- arg_pointer_rtx 1784 | | 1785 | callee-allocated save area | 1786 | for register varargs | 1787 | | 1788 +-------------------------------+ 1789 | | 1790 | local variables | 1791 | | 1792 +-------------------------------+ <-- frame_pointer_rtx 1793 | | 1794 | callee-saved registers | 1795 | | 1796 +-------------------------------+ 1797 | LR' | 1798 +-------------------------------+ 1799 | FP' | 1800 P +-------------------------------+ <-- hard_frame_pointer_rtx 1801 | dynamic allocation | 1802 +-------------------------------+ 1803 | | 1804 | outgoing stack arguments | 1805 | | 1806 +-------------------------------+ <-- stack_pointer_rtx 1807 1808 Dynamic stack allocations such as alloca insert data at point P. 1809 They decrease stack_pointer_rtx but leave frame_pointer_rtx and 1810 hard_frame_pointer_rtx unchanged. */ 1811 1812 /* Generate the prologue instructions for entry into a function. 1813 Establish the stack frame by decreasing the stack pointer with a 1814 properly calculated size and, if necessary, create a frame record 1815 filled with the values of LR and previous frame pointer. The 1816 current FP is also set up is it is in use. */ 1817 1818 void 1819 aarch64_expand_prologue (void) 1820 { 1821 /* sub sp, sp, #<frame_size> 1822 stp {fp, lr}, [sp, #<frame_size> - 16] 1823 add fp, sp, #<frame_size> - hardfp_offset 1824 stp {cs_reg}, [fp, #-16] etc. 1825 1826 sub sp, sp, <final_adjustment_if_any> 1827 */ 1828 HOST_WIDE_INT original_frame_size; /* local variables + vararg save */ 1829 HOST_WIDE_INT frame_size, offset; 1830 HOST_WIDE_INT fp_offset; /* FP offset from SP */ 1831 rtx insn; 1832 1833 aarch64_layout_frame (); 1834 original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size; 1835 gcc_assert ((!cfun->machine->saved_varargs_size || cfun->stdarg) 1836 && (cfun->stdarg || !cfun->machine->saved_varargs_size)); 1837 frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size 1838 + crtl->outgoing_args_size); 1839 offset = frame_size = AARCH64_ROUND_UP (frame_size, 1840 STACK_BOUNDARY / BITS_PER_UNIT); 1841 1842 if (flag_stack_usage_info) 1843 current_function_static_stack_size = frame_size; 1844 1845 fp_offset = (offset 1846 - original_frame_size 1847 - cfun->machine->frame.saved_regs_size); 1848 1849 /* Store pairs and load pairs have a range only -512 to 504. */ 1850 if (offset >= 512) 1851 { 1852 /* When the frame has a large size, an initial decrease is done on 1853 the stack pointer to jump over the callee-allocated save area for 1854 register varargs, the local variable area and/or the callee-saved 1855 register area. This will allow the pre-index write-back 1856 store pair instructions to be used for setting up the stack frame 1857 efficiently. */ 1858 offset = original_frame_size + cfun->machine->frame.saved_regs_size; 1859 if (offset >= 512) 1860 offset = cfun->machine->frame.saved_regs_size; 1861 1862 frame_size -= (offset + crtl->outgoing_args_size); 1863 fp_offset = 0; 1864 1865 if (frame_size >= 0x1000000) 1866 { 1867 rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM); 1868 emit_move_insn (op0, GEN_INT (-frame_size)); 1869 emit_insn (gen_add2_insn (stack_pointer_rtx, op0)); 1870 aarch64_set_frame_expr (gen_rtx_SET 1871 (Pmode, stack_pointer_rtx, 1872 gen_rtx_PLUS (Pmode, 1873 stack_pointer_rtx, 1874 GEN_INT (-frame_size)))); 1875 } 1876 else if (frame_size > 0) 1877 { 1878 if ((frame_size & 0xfff) != frame_size) 1879 { 1880 insn = emit_insn (gen_add2_insn 1881 (stack_pointer_rtx, 1882 GEN_INT (-(frame_size 1883 & ~(HOST_WIDE_INT)0xfff)))); 1884 RTX_FRAME_RELATED_P (insn) = 1; 1885 } 1886 if ((frame_size & 0xfff) != 0) 1887 { 1888 insn = emit_insn (gen_add2_insn 1889 (stack_pointer_rtx, 1890 GEN_INT (-(frame_size 1891 & (HOST_WIDE_INT)0xfff)))); 1892 RTX_FRAME_RELATED_P (insn) = 1; 1893 } 1894 } 1895 } 1896 else 1897 frame_size = -1; 1898 1899 if (offset > 0) 1900 { 1901 /* Save the frame pointer and lr if the frame pointer is needed 1902 first. Make the frame pointer point to the location of the 1903 old frame pointer on the stack. */ 1904 if (frame_pointer_needed) 1905 { 1906 rtx mem_fp, mem_lr; 1907 1908 if (fp_offset) 1909 { 1910 insn = emit_insn (gen_add2_insn (stack_pointer_rtx, 1911 GEN_INT (-offset))); 1912 RTX_FRAME_RELATED_P (insn) = 1; 1913 aarch64_set_frame_expr (gen_rtx_SET 1914 (Pmode, stack_pointer_rtx, 1915 gen_rtx_MINUS (Pmode, 1916 stack_pointer_rtx, 1917 GEN_INT (offset)))); 1918 mem_fp = gen_frame_mem (DImode, 1919 plus_constant (Pmode, 1920 stack_pointer_rtx, 1921 fp_offset)); 1922 mem_lr = gen_frame_mem (DImode, 1923 plus_constant (Pmode, 1924 stack_pointer_rtx, 1925 fp_offset 1926 + UNITS_PER_WORD)); 1927 insn = emit_insn (gen_store_pairdi (mem_fp, 1928 hard_frame_pointer_rtx, 1929 mem_lr, 1930 gen_rtx_REG (DImode, 1931 LR_REGNUM))); 1932 } 1933 else 1934 { 1935 insn = emit_insn (gen_storewb_pairdi_di 1936 (stack_pointer_rtx, stack_pointer_rtx, 1937 hard_frame_pointer_rtx, 1938 gen_rtx_REG (DImode, LR_REGNUM), 1939 GEN_INT (-offset), 1940 GEN_INT (GET_MODE_SIZE (DImode) - offset))); 1941 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1; 1942 } 1943 1944 /* The first part of a frame-related parallel insn is always 1945 assumed to be relevant to the frame calculations; 1946 subsequent parts, are only frame-related if explicitly 1947 marked. */ 1948 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1; 1949 RTX_FRAME_RELATED_P (insn) = 1; 1950 1951 /* Set up frame pointer to point to the location of the 1952 previous frame pointer on the stack. */ 1953 insn = emit_insn (gen_add3_insn (hard_frame_pointer_rtx, 1954 stack_pointer_rtx, 1955 GEN_INT (fp_offset))); 1956 aarch64_set_frame_expr (gen_rtx_SET 1957 (Pmode, hard_frame_pointer_rtx, 1958 gen_rtx_PLUS (Pmode, 1959 stack_pointer_rtx, 1960 GEN_INT (fp_offset)))); 1961 RTX_FRAME_RELATED_P (insn) = 1; 1962 insn = emit_insn (gen_stack_tie (stack_pointer_rtx, 1963 hard_frame_pointer_rtx)); 1964 } 1965 else 1966 { 1967 insn = emit_insn (gen_add2_insn (stack_pointer_rtx, 1968 GEN_INT (-offset))); 1969 RTX_FRAME_RELATED_P (insn) = 1; 1970 } 1971 1972 aarch64_save_or_restore_callee_save_registers 1973 (fp_offset + cfun->machine->frame.hardfp_offset, 0); 1974 } 1975 1976 /* when offset >= 512, 1977 sub sp, sp, #<outgoing_args_size> */ 1978 if (frame_size > -1) 1979 { 1980 if (crtl->outgoing_args_size > 0) 1981 { 1982 insn = emit_insn (gen_add2_insn 1983 (stack_pointer_rtx, 1984 GEN_INT (- crtl->outgoing_args_size))); 1985 RTX_FRAME_RELATED_P (insn) = 1; 1986 } 1987 } 1988 } 1989 1990 /* Generate the epilogue instructions for returning from a function. */ 1991 void 1992 aarch64_expand_epilogue (bool for_sibcall) 1993 { 1994 HOST_WIDE_INT original_frame_size, frame_size, offset; 1995 HOST_WIDE_INT fp_offset; 1996 rtx insn; 1997 rtx cfa_reg; 1998 1999 aarch64_layout_frame (); 2000 original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size; 2001 frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size 2002 + crtl->outgoing_args_size); 2003 offset = frame_size = AARCH64_ROUND_UP (frame_size, 2004 STACK_BOUNDARY / BITS_PER_UNIT); 2005 2006 fp_offset = (offset 2007 - original_frame_size 2008 - cfun->machine->frame.saved_regs_size); 2009 2010 cfa_reg = frame_pointer_needed ? hard_frame_pointer_rtx : stack_pointer_rtx; 2011 2012 /* Store pairs and load pairs have a range only -512 to 504. */ 2013 if (offset >= 512) 2014 { 2015 offset = original_frame_size + cfun->machine->frame.saved_regs_size; 2016 if (offset >= 512) 2017 offset = cfun->machine->frame.saved_regs_size; 2018 2019 frame_size -= (offset + crtl->outgoing_args_size); 2020 fp_offset = 0; 2021 if (!frame_pointer_needed && crtl->outgoing_args_size > 0) 2022 { 2023 insn = emit_insn (gen_add2_insn 2024 (stack_pointer_rtx, 2025 GEN_INT (crtl->outgoing_args_size))); 2026 RTX_FRAME_RELATED_P (insn) = 1; 2027 } 2028 } 2029 else 2030 frame_size = -1; 2031 2032 /* If there were outgoing arguments or we've done dynamic stack 2033 allocation, then restore the stack pointer from the frame 2034 pointer. This is at most one insn and more efficient than using 2035 GCC's internal mechanism. */ 2036 if (frame_pointer_needed 2037 && (crtl->outgoing_args_size || cfun->calls_alloca)) 2038 { 2039 insn = emit_insn (gen_add3_insn (stack_pointer_rtx, 2040 hard_frame_pointer_rtx, 2041 GEN_INT (- fp_offset))); 2042 RTX_FRAME_RELATED_P (insn) = 1; 2043 /* As SP is set to (FP - fp_offset), according to the rules in 2044 dwarf2cfi.c:dwarf2out_frame_debug_expr, CFA should be calculated 2045 from the value of SP from now on. */ 2046 cfa_reg = stack_pointer_rtx; 2047 } 2048 2049 aarch64_save_or_restore_callee_save_registers 2050 (fp_offset + cfun->machine->frame.hardfp_offset, 1); 2051 2052 /* Restore the frame pointer and lr if the frame pointer is needed. */ 2053 if (offset > 0) 2054 { 2055 if (frame_pointer_needed) 2056 { 2057 rtx mem_fp, mem_lr; 2058 2059 if (fp_offset) 2060 { 2061 mem_fp = gen_frame_mem (DImode, 2062 plus_constant (Pmode, 2063 stack_pointer_rtx, 2064 fp_offset)); 2065 mem_lr = gen_frame_mem (DImode, 2066 plus_constant (Pmode, 2067 stack_pointer_rtx, 2068 fp_offset 2069 + UNITS_PER_WORD)); 2070 insn = emit_insn (gen_load_pairdi (hard_frame_pointer_rtx, 2071 mem_fp, 2072 gen_rtx_REG (DImode, 2073 LR_REGNUM), 2074 mem_lr)); 2075 } 2076 else 2077 { 2078 insn = emit_insn (gen_loadwb_pairdi_di 2079 (stack_pointer_rtx, 2080 stack_pointer_rtx, 2081 hard_frame_pointer_rtx, 2082 gen_rtx_REG (DImode, LR_REGNUM), 2083 GEN_INT (offset), 2084 GEN_INT (GET_MODE_SIZE (DImode) + offset))); 2085 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1; 2086 add_reg_note (insn, REG_CFA_ADJUST_CFA, 2087 (gen_rtx_SET (Pmode, stack_pointer_rtx, 2088 plus_constant (Pmode, cfa_reg, 2089 offset)))); 2090 } 2091 2092 /* The first part of a frame-related parallel insn 2093 is always assumed to be relevant to the frame 2094 calculations; subsequent parts, are only 2095 frame-related if explicitly marked. */ 2096 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1; 2097 RTX_FRAME_RELATED_P (insn) = 1; 2098 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx); 2099 add_reg_note (insn, REG_CFA_RESTORE, 2100 gen_rtx_REG (DImode, LR_REGNUM)); 2101 2102 if (fp_offset) 2103 { 2104 insn = emit_insn (gen_add2_insn (stack_pointer_rtx, 2105 GEN_INT (offset))); 2106 RTX_FRAME_RELATED_P (insn) = 1; 2107 } 2108 } 2109 else 2110 { 2111 insn = emit_insn (gen_add2_insn (stack_pointer_rtx, 2112 GEN_INT (offset))); 2113 RTX_FRAME_RELATED_P (insn) = 1; 2114 } 2115 } 2116 2117 /* Stack adjustment for exception handler. */ 2118 if (crtl->calls_eh_return) 2119 { 2120 /* We need to unwind the stack by the offset computed by 2121 EH_RETURN_STACKADJ_RTX. However, at this point the CFA is 2122 based on SP. Ideally we would update the SP and define the 2123 CFA along the lines of: 2124 2125 SP = SP + EH_RETURN_STACKADJ_RTX 2126 (regnote CFA = SP - EH_RETURN_STACKADJ_RTX) 2127 2128 However the dwarf emitter only understands a constant 2129 register offset. 2130 2131 The solution choosen here is to use the otherwise unused IP0 2132 as a temporary register to hold the current SP value. The 2133 CFA is described using IP0 then SP is modified. */ 2134 2135 rtx ip0 = gen_rtx_REG (DImode, IP0_REGNUM); 2136 2137 insn = emit_move_insn (ip0, stack_pointer_rtx); 2138 add_reg_note (insn, REG_CFA_DEF_CFA, ip0); 2139 RTX_FRAME_RELATED_P (insn) = 1; 2140 2141 emit_insn (gen_add2_insn (stack_pointer_rtx, EH_RETURN_STACKADJ_RTX)); 2142 2143 /* Ensure the assignment to IP0 does not get optimized away. */ 2144 emit_use (ip0); 2145 } 2146 2147 if (frame_size > -1) 2148 { 2149 if (frame_size >= 0x1000000) 2150 { 2151 rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM); 2152 emit_move_insn (op0, GEN_INT (frame_size)); 2153 emit_insn (gen_add2_insn (stack_pointer_rtx, op0)); 2154 aarch64_set_frame_expr (gen_rtx_SET 2155 (Pmode, stack_pointer_rtx, 2156 gen_rtx_PLUS (Pmode, 2157 stack_pointer_rtx, 2158 GEN_INT (frame_size)))); 2159 } 2160 else if (frame_size > 0) 2161 { 2162 if ((frame_size & 0xfff) != 0) 2163 { 2164 insn = emit_insn (gen_add2_insn 2165 (stack_pointer_rtx, 2166 GEN_INT ((frame_size 2167 & (HOST_WIDE_INT) 0xfff)))); 2168 RTX_FRAME_RELATED_P (insn) = 1; 2169 } 2170 if ((frame_size & 0xfff) != frame_size) 2171 { 2172 insn = emit_insn (gen_add2_insn 2173 (stack_pointer_rtx, 2174 GEN_INT ((frame_size 2175 & ~ (HOST_WIDE_INT) 0xfff)))); 2176 RTX_FRAME_RELATED_P (insn) = 1; 2177 } 2178 } 2179 2180 aarch64_set_frame_expr (gen_rtx_SET (Pmode, stack_pointer_rtx, 2181 gen_rtx_PLUS (Pmode, 2182 stack_pointer_rtx, 2183 GEN_INT (offset)))); 2184 } 2185 2186 emit_use (gen_rtx_REG (DImode, LR_REGNUM)); 2187 if (!for_sibcall) 2188 emit_jump_insn (ret_rtx); 2189 } 2190 2191 /* Return the place to copy the exception unwinding return address to. 2192 This will probably be a stack slot, but could (in theory be the 2193 return register). */ 2194 rtx 2195 aarch64_final_eh_return_addr (void) 2196 { 2197 HOST_WIDE_INT original_frame_size, frame_size, offset, fp_offset; 2198 aarch64_layout_frame (); 2199 original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size; 2200 frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size 2201 + crtl->outgoing_args_size); 2202 offset = frame_size = AARCH64_ROUND_UP (frame_size, 2203 STACK_BOUNDARY / BITS_PER_UNIT); 2204 fp_offset = offset 2205 - original_frame_size 2206 - cfun->machine->frame.saved_regs_size; 2207 2208 if (cfun->machine->frame.reg_offset[LR_REGNUM] < 0) 2209 return gen_rtx_REG (DImode, LR_REGNUM); 2210 2211 /* DSE and CSELIB do not detect an alias between sp+k1 and fp+k2. This can 2212 result in a store to save LR introduced by builtin_eh_return () being 2213 incorrectly deleted because the alias is not detected. 2214 So in the calculation of the address to copy the exception unwinding 2215 return address to, we note 2 cases. 2216 If FP is needed and the fp_offset is 0, it means that SP = FP and hence 2217 we return a SP-relative location since all the addresses are SP-relative 2218 in this case. This prevents the store from being optimized away. 2219 If the fp_offset is not 0, then the addresses will be FP-relative and 2220 therefore we return a FP-relative location. */ 2221 2222 if (frame_pointer_needed) 2223 { 2224 if (fp_offset) 2225 return gen_frame_mem (DImode, 2226 plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD)); 2227 else 2228 return gen_frame_mem (DImode, 2229 plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD)); 2230 } 2231 2232 /* If FP is not needed, we calculate the location of LR, which would be 2233 at the top of the saved registers block. */ 2234 2235 return gen_frame_mem (DImode, 2236 plus_constant (Pmode, 2237 stack_pointer_rtx, 2238 fp_offset 2239 + cfun->machine->frame.saved_regs_size 2240 - 2 * UNITS_PER_WORD)); 2241 } 2242 2243 /* Output code to build up a constant in a register. */ 2244 static void 2245 aarch64_build_constant (int regnum, HOST_WIDE_INT val) 2246 { 2247 if (aarch64_bitmask_imm (val, DImode)) 2248 emit_move_insn (gen_rtx_REG (Pmode, regnum), GEN_INT (val)); 2249 else 2250 { 2251 int i; 2252 int ncount = 0; 2253 int zcount = 0; 2254 HOST_WIDE_INT valp = val >> 16; 2255 HOST_WIDE_INT valm; 2256 HOST_WIDE_INT tval; 2257 2258 for (i = 16; i < 64; i += 16) 2259 { 2260 valm = (valp & 0xffff); 2261 2262 if (valm != 0) 2263 ++ zcount; 2264 2265 if (valm != 0xffff) 2266 ++ ncount; 2267 2268 valp >>= 16; 2269 } 2270 2271 /* zcount contains the number of additional MOVK instructions 2272 required if the constant is built up with an initial MOVZ instruction, 2273 while ncount is the number of MOVK instructions required if starting 2274 with a MOVN instruction. Choose the sequence that yields the fewest 2275 number of instructions, preferring MOVZ instructions when they are both 2276 the same. */ 2277 if (ncount < zcount) 2278 { 2279 emit_move_insn (gen_rtx_REG (Pmode, regnum), 2280 GEN_INT (val | ~(HOST_WIDE_INT) 0xffff)); 2281 tval = 0xffff; 2282 } 2283 else 2284 { 2285 emit_move_insn (gen_rtx_REG (Pmode, regnum), 2286 GEN_INT (val & 0xffff)); 2287 tval = 0; 2288 } 2289 2290 val >>= 16; 2291 2292 for (i = 16; i < 64; i += 16) 2293 { 2294 if ((val & 0xffff) != tval) 2295 emit_insn (gen_insv_immdi (gen_rtx_REG (Pmode, regnum), 2296 GEN_INT (i), GEN_INT (val & 0xffff))); 2297 val >>= 16; 2298 } 2299 } 2300 } 2301 2302 static void 2303 aarch64_add_constant (int regnum, int scratchreg, HOST_WIDE_INT delta) 2304 { 2305 HOST_WIDE_INT mdelta = delta; 2306 rtx this_rtx = gen_rtx_REG (Pmode, regnum); 2307 rtx scratch_rtx = gen_rtx_REG (Pmode, scratchreg); 2308 2309 if (mdelta < 0) 2310 mdelta = -mdelta; 2311 2312 if (mdelta >= 4096 * 4096) 2313 { 2314 aarch64_build_constant (scratchreg, delta); 2315 emit_insn (gen_add3_insn (this_rtx, this_rtx, scratch_rtx)); 2316 } 2317 else if (mdelta > 0) 2318 { 2319 if (mdelta >= 4096) 2320 { 2321 emit_insn (gen_rtx_SET (Pmode, scratch_rtx, GEN_INT (mdelta / 4096))); 2322 rtx shift = gen_rtx_ASHIFT (Pmode, scratch_rtx, GEN_INT (12)); 2323 if (delta < 0) 2324 emit_insn (gen_rtx_SET (Pmode, this_rtx, 2325 gen_rtx_MINUS (Pmode, this_rtx, shift))); 2326 else 2327 emit_insn (gen_rtx_SET (Pmode, this_rtx, 2328 gen_rtx_PLUS (Pmode, this_rtx, shift))); 2329 } 2330 if (mdelta % 4096 != 0) 2331 { 2332 scratch_rtx = GEN_INT ((delta < 0 ? -1 : 1) * (mdelta % 4096)); 2333 emit_insn (gen_rtx_SET (Pmode, this_rtx, 2334 gen_rtx_PLUS (Pmode, this_rtx, scratch_rtx))); 2335 } 2336 } 2337 } 2338 2339 /* Output code to add DELTA to the first argument, and then jump 2340 to FUNCTION. Used for C++ multiple inheritance. */ 2341 static void 2342 aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED, 2343 HOST_WIDE_INT delta, 2344 HOST_WIDE_INT vcall_offset, 2345 tree function) 2346 { 2347 /* The this pointer is always in x0. Note that this differs from 2348 Arm where the this pointer maybe bumped to r1 if r0 is required 2349 to return a pointer to an aggregate. On AArch64 a result value 2350 pointer will be in x8. */ 2351 int this_regno = R0_REGNUM; 2352 rtx this_rtx, temp0, temp1, addr, insn, funexp; 2353 2354 reload_completed = 1; 2355 emit_note (NOTE_INSN_PROLOGUE_END); 2356 2357 if (vcall_offset == 0) 2358 aarch64_add_constant (this_regno, IP1_REGNUM, delta); 2359 else 2360 { 2361 gcc_assert ((vcall_offset & 0x7) == 0); 2362 2363 this_rtx = gen_rtx_REG (Pmode, this_regno); 2364 temp0 = gen_rtx_REG (Pmode, IP0_REGNUM); 2365 temp1 = gen_rtx_REG (Pmode, IP1_REGNUM); 2366 2367 addr = this_rtx; 2368 if (delta != 0) 2369 { 2370 if (delta >= -256 && delta < 256) 2371 addr = gen_rtx_PRE_MODIFY (Pmode, this_rtx, 2372 plus_constant (Pmode, this_rtx, delta)); 2373 else 2374 aarch64_add_constant (this_regno, IP1_REGNUM, delta); 2375 } 2376 2377 aarch64_emit_move (temp0, gen_rtx_MEM (Pmode, addr)); 2378 2379 if (vcall_offset >= -256 && vcall_offset < 32768) 2380 addr = plus_constant (Pmode, temp0, vcall_offset); 2381 else 2382 { 2383 aarch64_build_constant (IP1_REGNUM, vcall_offset); 2384 addr = gen_rtx_PLUS (Pmode, temp0, temp1); 2385 } 2386 2387 aarch64_emit_move (temp1, gen_rtx_MEM (Pmode,addr)); 2388 emit_insn (gen_add2_insn (this_rtx, temp1)); 2389 } 2390 2391 /* Generate a tail call to the target function. */ 2392 if (!TREE_USED (function)) 2393 { 2394 assemble_external (function); 2395 TREE_USED (function) = 1; 2396 } 2397 funexp = XEXP (DECL_RTL (function), 0); 2398 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp); 2399 insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX)); 2400 SIBLING_CALL_P (insn) = 1; 2401 2402 insn = get_insns (); 2403 shorten_branches (insn); 2404 final_start_function (insn, file, 1); 2405 final (insn, file, 1); 2406 final_end_function (); 2407 2408 /* Stop pretending to be a post-reload pass. */ 2409 reload_completed = 0; 2410 } 2411 2412 static int 2413 aarch64_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED) 2414 { 2415 if (GET_CODE (*x) == SYMBOL_REF) 2416 return SYMBOL_REF_TLS_MODEL (*x) != 0; 2417 2418 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are 2419 TLS offsets, not real symbol references. */ 2420 if (GET_CODE (*x) == UNSPEC 2421 && XINT (*x, 1) == UNSPEC_TLS) 2422 return -1; 2423 2424 return 0; 2425 } 2426 2427 static bool 2428 aarch64_tls_referenced_p (rtx x) 2429 { 2430 if (!TARGET_HAVE_TLS) 2431 return false; 2432 2433 return for_each_rtx (&x, aarch64_tls_operand_p_1, NULL); 2434 } 2435 2436 2437 static int 2438 aarch64_bitmasks_cmp (const void *i1, const void *i2) 2439 { 2440 const unsigned HOST_WIDE_INT *imm1 = (const unsigned HOST_WIDE_INT *) i1; 2441 const unsigned HOST_WIDE_INT *imm2 = (const unsigned HOST_WIDE_INT *) i2; 2442 2443 if (*imm1 < *imm2) 2444 return -1; 2445 if (*imm1 > *imm2) 2446 return +1; 2447 return 0; 2448 } 2449 2450 2451 static void 2452 aarch64_build_bitmask_table (void) 2453 { 2454 unsigned HOST_WIDE_INT mask, imm; 2455 unsigned int log_e, e, s, r; 2456 unsigned int nimms = 0; 2457 2458 for (log_e = 1; log_e <= 6; log_e++) 2459 { 2460 e = 1 << log_e; 2461 if (e == 64) 2462 mask = ~(HOST_WIDE_INT) 0; 2463 else 2464 mask = ((HOST_WIDE_INT) 1 << e) - 1; 2465 for (s = 1; s < e; s++) 2466 { 2467 for (r = 0; r < e; r++) 2468 { 2469 /* set s consecutive bits to 1 (s < 64) */ 2470 imm = ((unsigned HOST_WIDE_INT)1 << s) - 1; 2471 /* rotate right by r */ 2472 if (r != 0) 2473 imm = ((imm >> r) | (imm << (e - r))) & mask; 2474 /* replicate the constant depending on SIMD size */ 2475 switch (log_e) { 2476 case 1: imm |= (imm << 2); 2477 case 2: imm |= (imm << 4); 2478 case 3: imm |= (imm << 8); 2479 case 4: imm |= (imm << 16); 2480 case 5: imm |= (imm << 32); 2481 case 6: 2482 break; 2483 default: 2484 gcc_unreachable (); 2485 } 2486 gcc_assert (nimms < AARCH64_NUM_BITMASKS); 2487 aarch64_bitmasks[nimms++] = imm; 2488 } 2489 } 2490 } 2491 2492 gcc_assert (nimms == AARCH64_NUM_BITMASKS); 2493 qsort (aarch64_bitmasks, nimms, sizeof (aarch64_bitmasks[0]), 2494 aarch64_bitmasks_cmp); 2495 } 2496 2497 2498 /* Return true if val can be encoded as a 12-bit unsigned immediate with 2499 a left shift of 0 or 12 bits. */ 2500 bool 2501 aarch64_uimm12_shift (HOST_WIDE_INT val) 2502 { 2503 return ((val & (((HOST_WIDE_INT) 0xfff) << 0)) == val 2504 || (val & (((HOST_WIDE_INT) 0xfff) << 12)) == val 2505 ); 2506 } 2507 2508 2509 /* Return true if val is an immediate that can be loaded into a 2510 register by a MOVZ instruction. */ 2511 static bool 2512 aarch64_movw_imm (HOST_WIDE_INT val, enum machine_mode mode) 2513 { 2514 if (GET_MODE_SIZE (mode) > 4) 2515 { 2516 if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val 2517 || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val) 2518 return 1; 2519 } 2520 else 2521 { 2522 /* Ignore sign extension. */ 2523 val &= (HOST_WIDE_INT) 0xffffffff; 2524 } 2525 return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val 2526 || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val); 2527 } 2528 2529 2530 /* Return true if val is a valid bitmask immediate. */ 2531 bool 2532 aarch64_bitmask_imm (HOST_WIDE_INT val, enum machine_mode mode) 2533 { 2534 if (GET_MODE_SIZE (mode) < 8) 2535 { 2536 /* Replicate bit pattern. */ 2537 val &= (HOST_WIDE_INT) 0xffffffff; 2538 val |= val << 32; 2539 } 2540 return bsearch (&val, aarch64_bitmasks, AARCH64_NUM_BITMASKS, 2541 sizeof (aarch64_bitmasks[0]), aarch64_bitmasks_cmp) != NULL; 2542 } 2543 2544 2545 /* Return true if val is an immediate that can be loaded into a 2546 register in a single instruction. */ 2547 bool 2548 aarch64_move_imm (HOST_WIDE_INT val, enum machine_mode mode) 2549 { 2550 if (aarch64_movw_imm (val, mode) || aarch64_movw_imm (~val, mode)) 2551 return 1; 2552 return aarch64_bitmask_imm (val, mode); 2553 } 2554 2555 static bool 2556 aarch64_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x) 2557 { 2558 rtx base, offset; 2559 if (GET_CODE (x) == HIGH) 2560 return true; 2561 2562 split_const (x, &base, &offset); 2563 if (GET_CODE (base) == SYMBOL_REF || GET_CODE (base) == LABEL_REF) 2564 return (aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR) != SYMBOL_FORCE_TO_MEM); 2565 2566 return aarch64_tls_referenced_p (x); 2567 } 2568 2569 /* Return true if register REGNO is a valid index register. 2570 STRICT_P is true if REG_OK_STRICT is in effect. */ 2571 2572 bool 2573 aarch64_regno_ok_for_index_p (int regno, bool strict_p) 2574 { 2575 if (!HARD_REGISTER_NUM_P (regno)) 2576 { 2577 if (!strict_p) 2578 return true; 2579 2580 if (!reg_renumber) 2581 return false; 2582 2583 regno = reg_renumber[regno]; 2584 } 2585 return GP_REGNUM_P (regno); 2586 } 2587 2588 /* Return true if register REGNO is a valid base register for mode MODE. 2589 STRICT_P is true if REG_OK_STRICT is in effect. */ 2590 2591 bool 2592 aarch64_regno_ok_for_base_p (int regno, bool strict_p) 2593 { 2594 if (!HARD_REGISTER_NUM_P (regno)) 2595 { 2596 if (!strict_p) 2597 return true; 2598 2599 if (!reg_renumber) 2600 return false; 2601 2602 regno = reg_renumber[regno]; 2603 } 2604 2605 /* The fake registers will be eliminated to either the stack or 2606 hard frame pointer, both of which are usually valid base registers. 2607 Reload deals with the cases where the eliminated form isn't valid. */ 2608 return (GP_REGNUM_P (regno) 2609 || regno == SP_REGNUM 2610 || regno == FRAME_POINTER_REGNUM 2611 || regno == ARG_POINTER_REGNUM); 2612 } 2613 2614 /* Return true if X is a valid base register for mode MODE. 2615 STRICT_P is true if REG_OK_STRICT is in effect. */ 2616 2617 static bool 2618 aarch64_base_register_rtx_p (rtx x, bool strict_p) 2619 { 2620 if (!strict_p && GET_CODE (x) == SUBREG) 2621 x = SUBREG_REG (x); 2622 2623 return (REG_P (x) && aarch64_regno_ok_for_base_p (REGNO (x), strict_p)); 2624 } 2625 2626 /* Return true if address offset is a valid index. If it is, fill in INFO 2627 appropriately. STRICT_P is true if REG_OK_STRICT is in effect. */ 2628 2629 static bool 2630 aarch64_classify_index (struct aarch64_address_info *info, rtx x, 2631 enum machine_mode mode, bool strict_p) 2632 { 2633 enum aarch64_address_type type; 2634 rtx index; 2635 int shift; 2636 2637 /* (reg:P) */ 2638 if ((REG_P (x) || GET_CODE (x) == SUBREG) 2639 && GET_MODE (x) == Pmode) 2640 { 2641 type = ADDRESS_REG_REG; 2642 index = x; 2643 shift = 0; 2644 } 2645 /* (sign_extend:DI (reg:SI)) */ 2646 else if ((GET_CODE (x) == SIGN_EXTEND 2647 || GET_CODE (x) == ZERO_EXTEND) 2648 && GET_MODE (x) == DImode 2649 && GET_MODE (XEXP (x, 0)) == SImode) 2650 { 2651 type = (GET_CODE (x) == SIGN_EXTEND) 2652 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW; 2653 index = XEXP (x, 0); 2654 shift = 0; 2655 } 2656 /* (mult:DI (sign_extend:DI (reg:SI)) (const_int scale)) */ 2657 else if (GET_CODE (x) == MULT 2658 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND 2659 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND) 2660 && GET_MODE (XEXP (x, 0)) == DImode 2661 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode 2662 && CONST_INT_P (XEXP (x, 1))) 2663 { 2664 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND) 2665 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW; 2666 index = XEXP (XEXP (x, 0), 0); 2667 shift = exact_log2 (INTVAL (XEXP (x, 1))); 2668 } 2669 /* (ashift:DI (sign_extend:DI (reg:SI)) (const_int shift)) */ 2670 else if (GET_CODE (x) == ASHIFT 2671 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND 2672 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND) 2673 && GET_MODE (XEXP (x, 0)) == DImode 2674 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode 2675 && CONST_INT_P (XEXP (x, 1))) 2676 { 2677 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND) 2678 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW; 2679 index = XEXP (XEXP (x, 0), 0); 2680 shift = INTVAL (XEXP (x, 1)); 2681 } 2682 /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */ 2683 else if ((GET_CODE (x) == SIGN_EXTRACT 2684 || GET_CODE (x) == ZERO_EXTRACT) 2685 && GET_MODE (x) == DImode 2686 && GET_CODE (XEXP (x, 0)) == MULT 2687 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode 2688 && CONST_INT_P (XEXP (XEXP (x, 0), 1))) 2689 { 2690 type = (GET_CODE (x) == SIGN_EXTRACT) 2691 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW; 2692 index = XEXP (XEXP (x, 0), 0); 2693 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1))); 2694 if (INTVAL (XEXP (x, 1)) != 32 + shift 2695 || INTVAL (XEXP (x, 2)) != 0) 2696 shift = -1; 2697 } 2698 /* (and:DI (mult:DI (reg:DI) (const_int scale)) 2699 (const_int 0xffffffff<<shift)) */ 2700 else if (GET_CODE (x) == AND 2701 && GET_MODE (x) == DImode 2702 && GET_CODE (XEXP (x, 0)) == MULT 2703 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode 2704 && CONST_INT_P (XEXP (XEXP (x, 0), 1)) 2705 && CONST_INT_P (XEXP (x, 1))) 2706 { 2707 type = ADDRESS_REG_UXTW; 2708 index = XEXP (XEXP (x, 0), 0); 2709 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1))); 2710 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift) 2711 shift = -1; 2712 } 2713 /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */ 2714 else if ((GET_CODE (x) == SIGN_EXTRACT 2715 || GET_CODE (x) == ZERO_EXTRACT) 2716 && GET_MODE (x) == DImode 2717 && GET_CODE (XEXP (x, 0)) == ASHIFT 2718 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode 2719 && CONST_INT_P (XEXP (XEXP (x, 0), 1))) 2720 { 2721 type = (GET_CODE (x) == SIGN_EXTRACT) 2722 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW; 2723 index = XEXP (XEXP (x, 0), 0); 2724 shift = INTVAL (XEXP (XEXP (x, 0), 1)); 2725 if (INTVAL (XEXP (x, 1)) != 32 + shift 2726 || INTVAL (XEXP (x, 2)) != 0) 2727 shift = -1; 2728 } 2729 /* (and:DI (ashift:DI (reg:DI) (const_int shift)) 2730 (const_int 0xffffffff<<shift)) */ 2731 else if (GET_CODE (x) == AND 2732 && GET_MODE (x) == DImode 2733 && GET_CODE (XEXP (x, 0)) == ASHIFT 2734 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode 2735 && CONST_INT_P (XEXP (XEXP (x, 0), 1)) 2736 && CONST_INT_P (XEXP (x, 1))) 2737 { 2738 type = ADDRESS_REG_UXTW; 2739 index = XEXP (XEXP (x, 0), 0); 2740 shift = INTVAL (XEXP (XEXP (x, 0), 1)); 2741 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift) 2742 shift = -1; 2743 } 2744 /* (mult:P (reg:P) (const_int scale)) */ 2745 else if (GET_CODE (x) == MULT 2746 && GET_MODE (x) == Pmode 2747 && GET_MODE (XEXP (x, 0)) == Pmode 2748 && CONST_INT_P (XEXP (x, 1))) 2749 { 2750 type = ADDRESS_REG_REG; 2751 index = XEXP (x, 0); 2752 shift = exact_log2 (INTVAL (XEXP (x, 1))); 2753 } 2754 /* (ashift:P (reg:P) (const_int shift)) */ 2755 else if (GET_CODE (x) == ASHIFT 2756 && GET_MODE (x) == Pmode 2757 && GET_MODE (XEXP (x, 0)) == Pmode 2758 && CONST_INT_P (XEXP (x, 1))) 2759 { 2760 type = ADDRESS_REG_REG; 2761 index = XEXP (x, 0); 2762 shift = INTVAL (XEXP (x, 1)); 2763 } 2764 else 2765 return false; 2766 2767 if (GET_CODE (index) == SUBREG) 2768 index = SUBREG_REG (index); 2769 2770 if ((shift == 0 || 2771 (shift > 0 && shift <= 3 2772 && (1 << shift) == GET_MODE_SIZE (mode))) 2773 && REG_P (index) 2774 && aarch64_regno_ok_for_index_p (REGNO (index), strict_p)) 2775 { 2776 info->type = type; 2777 info->offset = index; 2778 info->shift = shift; 2779 return true; 2780 } 2781 2782 return false; 2783 } 2784 2785 static inline bool 2786 offset_7bit_signed_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset) 2787 { 2788 return (offset >= -64 * GET_MODE_SIZE (mode) 2789 && offset < 64 * GET_MODE_SIZE (mode) 2790 && offset % GET_MODE_SIZE (mode) == 0); 2791 } 2792 2793 static inline bool 2794 offset_9bit_signed_unscaled_p (enum machine_mode mode ATTRIBUTE_UNUSED, 2795 HOST_WIDE_INT offset) 2796 { 2797 return offset >= -256 && offset < 256; 2798 } 2799 2800 static inline bool 2801 offset_12bit_unsigned_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset) 2802 { 2803 return (offset >= 0 2804 && offset < 4096 * GET_MODE_SIZE (mode) 2805 && offset % GET_MODE_SIZE (mode) == 0); 2806 } 2807 2808 /* Return true if X is a valid address for machine mode MODE. If it is, 2809 fill in INFO appropriately. STRICT_P is true if REG_OK_STRICT is in 2810 effect. OUTER_CODE is PARALLEL for a load/store pair. */ 2811 2812 static bool 2813 aarch64_classify_address (struct aarch64_address_info *info, 2814 rtx x, enum machine_mode mode, 2815 RTX_CODE outer_code, bool strict_p) 2816 { 2817 enum rtx_code code = GET_CODE (x); 2818 rtx op0, op1; 2819 bool allow_reg_index_p = 2820 outer_code != PARALLEL && GET_MODE_SIZE(mode) != 16; 2821 2822 /* Don't support anything other than POST_INC or REG addressing for 2823 AdvSIMD. */ 2824 if (aarch64_vector_mode_p (mode) 2825 && (code != POST_INC && code != REG)) 2826 return false; 2827 2828 switch (code) 2829 { 2830 case REG: 2831 case SUBREG: 2832 info->type = ADDRESS_REG_IMM; 2833 info->base = x; 2834 info->offset = const0_rtx; 2835 return aarch64_base_register_rtx_p (x, strict_p); 2836 2837 case PLUS: 2838 op0 = XEXP (x, 0); 2839 op1 = XEXP (x, 1); 2840 if (GET_MODE_SIZE (mode) != 0 2841 && CONST_INT_P (op1) 2842 && aarch64_base_register_rtx_p (op0, strict_p)) 2843 { 2844 HOST_WIDE_INT offset = INTVAL (op1); 2845 2846 info->type = ADDRESS_REG_IMM; 2847 info->base = op0; 2848 info->offset = op1; 2849 2850 /* TImode and TFmode values are allowed in both pairs of X 2851 registers and individual Q registers. The available 2852 address modes are: 2853 X,X: 7-bit signed scaled offset 2854 Q: 9-bit signed offset 2855 We conservatively require an offset representable in either mode. 2856 */ 2857 if (mode == TImode || mode == TFmode) 2858 return (offset_7bit_signed_scaled_p (mode, offset) 2859 && offset_9bit_signed_unscaled_p (mode, offset)); 2860 2861 if (outer_code == PARALLEL) 2862 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8) 2863 && offset_7bit_signed_scaled_p (mode, offset)); 2864 else 2865 return (offset_9bit_signed_unscaled_p (mode, offset) 2866 || offset_12bit_unsigned_scaled_p (mode, offset)); 2867 } 2868 2869 if (allow_reg_index_p) 2870 { 2871 /* Look for base + (scaled/extended) index register. */ 2872 if (aarch64_base_register_rtx_p (op0, strict_p) 2873 && aarch64_classify_index (info, op1, mode, strict_p)) 2874 { 2875 info->base = op0; 2876 return true; 2877 } 2878 if (aarch64_base_register_rtx_p (op1, strict_p) 2879 && aarch64_classify_index (info, op0, mode, strict_p)) 2880 { 2881 info->base = op1; 2882 return true; 2883 } 2884 } 2885 2886 return false; 2887 2888 case POST_INC: 2889 case POST_DEC: 2890 case PRE_INC: 2891 case PRE_DEC: 2892 info->type = ADDRESS_REG_WB; 2893 info->base = XEXP (x, 0); 2894 info->offset = NULL_RTX; 2895 return aarch64_base_register_rtx_p (info->base, strict_p); 2896 2897 case POST_MODIFY: 2898 case PRE_MODIFY: 2899 info->type = ADDRESS_REG_WB; 2900 info->base = XEXP (x, 0); 2901 if (GET_CODE (XEXP (x, 1)) == PLUS 2902 && CONST_INT_P (XEXP (XEXP (x, 1), 1)) 2903 && rtx_equal_p (XEXP (XEXP (x, 1), 0), info->base) 2904 && aarch64_base_register_rtx_p (info->base, strict_p)) 2905 { 2906 HOST_WIDE_INT offset; 2907 info->offset = XEXP (XEXP (x, 1), 1); 2908 offset = INTVAL (info->offset); 2909 2910 /* TImode and TFmode values are allowed in both pairs of X 2911 registers and individual Q registers. The available 2912 address modes are: 2913 X,X: 7-bit signed scaled offset 2914 Q: 9-bit signed offset 2915 We conservatively require an offset representable in either mode. 2916 */ 2917 if (mode == TImode || mode == TFmode) 2918 return (offset_7bit_signed_scaled_p (mode, offset) 2919 && offset_9bit_signed_unscaled_p (mode, offset)); 2920 2921 if (outer_code == PARALLEL) 2922 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8) 2923 && offset_7bit_signed_scaled_p (mode, offset)); 2924 else 2925 return offset_9bit_signed_unscaled_p (mode, offset); 2926 } 2927 return false; 2928 2929 case CONST: 2930 case SYMBOL_REF: 2931 case LABEL_REF: 2932 /* load literal: pc-relative constant pool entry. Only supported 2933 for SI mode or larger. */ 2934 info->type = ADDRESS_SYMBOLIC; 2935 if (outer_code != PARALLEL && GET_MODE_SIZE (mode) >= 4) 2936 { 2937 rtx sym, addend; 2938 2939 split_const (x, &sym, &addend); 2940 return (GET_CODE (sym) == LABEL_REF 2941 || (GET_CODE (sym) == SYMBOL_REF 2942 && CONSTANT_POOL_ADDRESS_P (sym))); 2943 } 2944 return false; 2945 2946 case LO_SUM: 2947 info->type = ADDRESS_LO_SUM; 2948 info->base = XEXP (x, 0); 2949 info->offset = XEXP (x, 1); 2950 if (allow_reg_index_p 2951 && aarch64_base_register_rtx_p (info->base, strict_p)) 2952 { 2953 rtx sym, offs; 2954 split_const (info->offset, &sym, &offs); 2955 if (GET_CODE (sym) == SYMBOL_REF 2956 && (aarch64_classify_symbol (sym, SYMBOL_CONTEXT_MEM) 2957 == SYMBOL_SMALL_ABSOLUTE)) 2958 { 2959 /* The symbol and offset must be aligned to the access size. */ 2960 unsigned int align; 2961 unsigned int ref_size; 2962 2963 if (CONSTANT_POOL_ADDRESS_P (sym)) 2964 align = GET_MODE_ALIGNMENT (get_pool_mode (sym)); 2965 else if (TREE_CONSTANT_POOL_ADDRESS_P (sym)) 2966 { 2967 tree exp = SYMBOL_REF_DECL (sym); 2968 align = TYPE_ALIGN (TREE_TYPE (exp)); 2969 align = CONSTANT_ALIGNMENT (exp, align); 2970 } 2971 else if (SYMBOL_REF_DECL (sym)) 2972 align = DECL_ALIGN (SYMBOL_REF_DECL (sym)); 2973 else 2974 align = BITS_PER_UNIT; 2975 2976 ref_size = GET_MODE_SIZE (mode); 2977 if (ref_size == 0) 2978 ref_size = GET_MODE_SIZE (DImode); 2979 2980 return ((INTVAL (offs) & (ref_size - 1)) == 0 2981 && ((align / BITS_PER_UNIT) & (ref_size - 1)) == 0); 2982 } 2983 } 2984 return false; 2985 2986 default: 2987 return false; 2988 } 2989 } 2990 2991 bool 2992 aarch64_symbolic_address_p (rtx x) 2993 { 2994 rtx offset; 2995 2996 split_const (x, &x, &offset); 2997 return GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF; 2998 } 2999 3000 /* Classify the base of symbolic expression X, given that X appears in 3001 context CONTEXT. */ 3002 static enum aarch64_symbol_type 3003 aarch64_classify_symbolic_expression (rtx x, enum aarch64_symbol_context context) 3004 { 3005 rtx offset; 3006 split_const (x, &x, &offset); 3007 return aarch64_classify_symbol (x, context); 3008 } 3009 3010 3011 /* Return TRUE if X is a legitimate address for accessing memory in 3012 mode MODE. */ 3013 static bool 3014 aarch64_legitimate_address_hook_p (enum machine_mode mode, rtx x, bool strict_p) 3015 { 3016 struct aarch64_address_info addr; 3017 3018 return aarch64_classify_address (&addr, x, mode, MEM, strict_p); 3019 } 3020 3021 /* Return TRUE if X is a legitimate address for accessing memory in 3022 mode MODE. OUTER_CODE will be PARALLEL if this is a load/store 3023 pair operation. */ 3024 bool 3025 aarch64_legitimate_address_p (enum machine_mode mode, rtx x, 3026 RTX_CODE outer_code, bool strict_p) 3027 { 3028 struct aarch64_address_info addr; 3029 3030 return aarch64_classify_address (&addr, x, mode, outer_code, strict_p); 3031 } 3032 3033 /* Return TRUE if rtx X is immediate constant 0.0 */ 3034 bool 3035 aarch64_float_const_zero_rtx_p (rtx x) 3036 { 3037 REAL_VALUE_TYPE r; 3038 3039 if (GET_MODE (x) == VOIDmode) 3040 return false; 3041 3042 REAL_VALUE_FROM_CONST_DOUBLE (r, x); 3043 if (REAL_VALUE_MINUS_ZERO (r)) 3044 return !HONOR_SIGNED_ZEROS (GET_MODE (x)); 3045 return REAL_VALUES_EQUAL (r, dconst0); 3046 } 3047 3048 /* Return the fixed registers used for condition codes. */ 3049 3050 static bool 3051 aarch64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2) 3052 { 3053 *p1 = CC_REGNUM; 3054 *p2 = INVALID_REGNUM; 3055 return true; 3056 } 3057 3058 enum machine_mode 3059 aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y) 3060 { 3061 /* All floating point compares return CCFP if it is an equality 3062 comparison, and CCFPE otherwise. */ 3063 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT) 3064 { 3065 switch (code) 3066 { 3067 case EQ: 3068 case NE: 3069 case UNORDERED: 3070 case ORDERED: 3071 case UNLT: 3072 case UNLE: 3073 case UNGT: 3074 case UNGE: 3075 case UNEQ: 3076 case LTGT: 3077 return CCFPmode; 3078 3079 case LT: 3080 case LE: 3081 case GT: 3082 case GE: 3083 return CCFPEmode; 3084 3085 default: 3086 gcc_unreachable (); 3087 } 3088 } 3089 3090 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode) 3091 && y == const0_rtx 3092 && (code == EQ || code == NE || code == LT || code == GE) 3093 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS || GET_CODE (x) == AND)) 3094 return CC_NZmode; 3095 3096 /* A compare with a shifted operand. Because of canonicalization, 3097 the comparison will have to be swapped when we emit the assembly 3098 code. */ 3099 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode) 3100 && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG) 3101 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT 3102 || GET_CODE (x) == LSHIFTRT 3103 || GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND)) 3104 return CC_SWPmode; 3105 3106 /* A compare of a mode narrower than SI mode against zero can be done 3107 by extending the value in the comparison. */ 3108 if ((GET_MODE (x) == QImode || GET_MODE (x) == HImode) 3109 && y == const0_rtx) 3110 /* Only use sign-extension if we really need it. */ 3111 return ((code == GT || code == GE || code == LE || code == LT) 3112 ? CC_SESWPmode : CC_ZESWPmode); 3113 3114 /* For everything else, return CCmode. */ 3115 return CCmode; 3116 } 3117 3118 static unsigned 3119 aarch64_get_condition_code (rtx x) 3120 { 3121 enum machine_mode mode = GET_MODE (XEXP (x, 0)); 3122 enum rtx_code comp_code = GET_CODE (x); 3123 3124 if (GET_MODE_CLASS (mode) != MODE_CC) 3125 mode = SELECT_CC_MODE (comp_code, XEXP (x, 0), XEXP (x, 1)); 3126 3127 switch (mode) 3128 { 3129 case CCFPmode: 3130 case CCFPEmode: 3131 switch (comp_code) 3132 { 3133 case GE: return AARCH64_GE; 3134 case GT: return AARCH64_GT; 3135 case LE: return AARCH64_LS; 3136 case LT: return AARCH64_MI; 3137 case NE: return AARCH64_NE; 3138 case EQ: return AARCH64_EQ; 3139 case ORDERED: return AARCH64_VC; 3140 case UNORDERED: return AARCH64_VS; 3141 case UNLT: return AARCH64_LT; 3142 case UNLE: return AARCH64_LE; 3143 case UNGT: return AARCH64_HI; 3144 case UNGE: return AARCH64_PL; 3145 default: gcc_unreachable (); 3146 } 3147 break; 3148 3149 case CCmode: 3150 switch (comp_code) 3151 { 3152 case NE: return AARCH64_NE; 3153 case EQ: return AARCH64_EQ; 3154 case GE: return AARCH64_GE; 3155 case GT: return AARCH64_GT; 3156 case LE: return AARCH64_LE; 3157 case LT: return AARCH64_LT; 3158 case GEU: return AARCH64_CS; 3159 case GTU: return AARCH64_HI; 3160 case LEU: return AARCH64_LS; 3161 case LTU: return AARCH64_CC; 3162 default: gcc_unreachable (); 3163 } 3164 break; 3165 3166 case CC_SWPmode: 3167 case CC_ZESWPmode: 3168 case CC_SESWPmode: 3169 switch (comp_code) 3170 { 3171 case NE: return AARCH64_NE; 3172 case EQ: return AARCH64_EQ; 3173 case GE: return AARCH64_LE; 3174 case GT: return AARCH64_LT; 3175 case LE: return AARCH64_GE; 3176 case LT: return AARCH64_GT; 3177 case GEU: return AARCH64_LS; 3178 case GTU: return AARCH64_CC; 3179 case LEU: return AARCH64_CS; 3180 case LTU: return AARCH64_HI; 3181 default: gcc_unreachable (); 3182 } 3183 break; 3184 3185 case CC_NZmode: 3186 switch (comp_code) 3187 { 3188 case NE: return AARCH64_NE; 3189 case EQ: return AARCH64_EQ; 3190 case GE: return AARCH64_PL; 3191 case LT: return AARCH64_MI; 3192 default: gcc_unreachable (); 3193 } 3194 break; 3195 3196 default: 3197 gcc_unreachable (); 3198 break; 3199 } 3200 } 3201 3202 static unsigned 3203 bit_count (unsigned HOST_WIDE_INT value) 3204 { 3205 unsigned count = 0; 3206 3207 while (value) 3208 { 3209 count++; 3210 value &= value - 1; 3211 } 3212 3213 return count; 3214 } 3215 3216 void 3217 aarch64_print_operand (FILE *f, rtx x, char code) 3218 { 3219 switch (code) 3220 { 3221 case 'e': 3222 /* Print the sign/zero-extend size as a character 8->b, 16->h, 32->w. */ 3223 { 3224 int n; 3225 3226 if (GET_CODE (x) != CONST_INT 3227 || (n = exact_log2 (INTVAL (x) & ~7)) <= 0) 3228 { 3229 output_operand_lossage ("invalid operand for '%%%c'", code); 3230 return; 3231 } 3232 3233 switch (n) 3234 { 3235 case 3: 3236 fputc ('b', f); 3237 break; 3238 case 4: 3239 fputc ('h', f); 3240 break; 3241 case 5: 3242 fputc ('w', f); 3243 break; 3244 default: 3245 output_operand_lossage ("invalid operand for '%%%c'", code); 3246 return; 3247 } 3248 } 3249 break; 3250 3251 case 'p': 3252 { 3253 int n; 3254 3255 /* Print N such that 2^N == X. */ 3256 if (GET_CODE (x) != CONST_INT || (n = exact_log2 (INTVAL (x))) < 0) 3257 { 3258 output_operand_lossage ("invalid operand for '%%%c'", code); 3259 return; 3260 } 3261 3262 asm_fprintf (f, "%d", n); 3263 } 3264 break; 3265 3266 case 'P': 3267 /* Print the number of non-zero bits in X (a const_int). */ 3268 if (GET_CODE (x) != CONST_INT) 3269 { 3270 output_operand_lossage ("invalid operand for '%%%c'", code); 3271 return; 3272 } 3273 3274 asm_fprintf (f, "%u", bit_count (INTVAL (x))); 3275 break; 3276 3277 case 'H': 3278 /* Print the higher numbered register of a pair (TImode) of regs. */ 3279 if (GET_CODE (x) != REG || !GP_REGNUM_P (REGNO (x) + 1)) 3280 { 3281 output_operand_lossage ("invalid operand for '%%%c'", code); 3282 return; 3283 } 3284 3285 asm_fprintf (f, "%s", reg_names [REGNO (x) + 1]); 3286 break; 3287 3288 case 'Q': 3289 /* Print the least significant register of a pair (TImode) of regs. */ 3290 if (GET_CODE (x) != REG || !GP_REGNUM_P (REGNO (x) + 1)) 3291 { 3292 output_operand_lossage ("invalid operand for '%%%c'", code); 3293 return; 3294 } 3295 asm_fprintf (f, "%s", reg_names [REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0)]); 3296 break; 3297 3298 case 'R': 3299 /* Print the most significant register of a pair (TImode) of regs. */ 3300 if (GET_CODE (x) != REG || !GP_REGNUM_P (REGNO (x) + 1)) 3301 { 3302 output_operand_lossage ("invalid operand for '%%%c'", code); 3303 return; 3304 } 3305 asm_fprintf (f, "%s", reg_names [REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1)]); 3306 break; 3307 3308 case 'm': 3309 /* Print a condition (eq, ne, etc). */ 3310 3311 /* CONST_TRUE_RTX means always -- that's the default. */ 3312 if (x == const_true_rtx) 3313 return; 3314 3315 if (!COMPARISON_P (x)) 3316 { 3317 output_operand_lossage ("invalid operand for '%%%c'", code); 3318 return; 3319 } 3320 3321 fputs (aarch64_condition_codes[aarch64_get_condition_code (x)], f); 3322 break; 3323 3324 case 'M': 3325 /* Print the inverse of a condition (eq <-> ne, etc). */ 3326 3327 /* CONST_TRUE_RTX means never -- that's the default. */ 3328 if (x == const_true_rtx) 3329 { 3330 fputs ("nv", f); 3331 return; 3332 } 3333 3334 if (!COMPARISON_P (x)) 3335 { 3336 output_operand_lossage ("invalid operand for '%%%c'", code); 3337 return; 3338 } 3339 3340 fputs (aarch64_condition_codes[AARCH64_INVERSE_CONDITION_CODE 3341 (aarch64_get_condition_code (x))], f); 3342 break; 3343 3344 case 'b': 3345 case 'h': 3346 case 's': 3347 case 'd': 3348 case 'q': 3349 /* Print a scalar FP/SIMD register name. */ 3350 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x))) 3351 { 3352 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code); 3353 return; 3354 } 3355 asm_fprintf (f, "%s%c%d", REGISTER_PREFIX, code, REGNO (x) - V0_REGNUM); 3356 break; 3357 3358 case 'S': 3359 case 'T': 3360 case 'U': 3361 case 'V': 3362 /* Print the first FP/SIMD register name in a list. */ 3363 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x))) 3364 { 3365 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code); 3366 return; 3367 } 3368 asm_fprintf (f, "%sv%d", REGISTER_PREFIX, 3369 REGNO (x) - V0_REGNUM + (code - 'S')); 3370 break; 3371 3372 case 'X': 3373 /* Print integer constant in hex. */ 3374 if (GET_CODE (x) != CONST_INT) 3375 { 3376 output_operand_lossage ("invalid operand for '%%%c'", code); 3377 return; 3378 } 3379 asm_fprintf (f, "0x%wx", UINTVAL (x)); 3380 break; 3381 3382 case 'w': 3383 case 'x': 3384 /* Print a general register name or the zero register (32-bit or 3385 64-bit). */ 3386 if (x == const0_rtx 3387 || (CONST_DOUBLE_P (x) && aarch64_float_const_zero_rtx_p (x))) 3388 { 3389 asm_fprintf (f, "%s%czr", REGISTER_PREFIX, code); 3390 break; 3391 } 3392 3393 if (REG_P (x) && GP_REGNUM_P (REGNO (x))) 3394 { 3395 asm_fprintf (f, "%s%c%d", REGISTER_PREFIX, code, 3396 REGNO (x) - R0_REGNUM); 3397 break; 3398 } 3399 3400 if (REG_P (x) && REGNO (x) == SP_REGNUM) 3401 { 3402 asm_fprintf (f, "%s%ssp", REGISTER_PREFIX, code == 'w' ? "w" : ""); 3403 break; 3404 } 3405 3406 /* Fall through */ 3407 3408 case 0: 3409 /* Print a normal operand, if it's a general register, then we 3410 assume DImode. */ 3411 if (x == NULL) 3412 { 3413 output_operand_lossage ("missing operand"); 3414 return; 3415 } 3416 3417 switch (GET_CODE (x)) 3418 { 3419 case REG: 3420 asm_fprintf (f, "%s", reg_names [REGNO (x)]); 3421 break; 3422 3423 case MEM: 3424 aarch64_memory_reference_mode = GET_MODE (x); 3425 output_address (XEXP (x, 0)); 3426 break; 3427 3428 case LABEL_REF: 3429 case SYMBOL_REF: 3430 output_addr_const (asm_out_file, x); 3431 break; 3432 3433 case CONST_INT: 3434 asm_fprintf (f, "%wd", INTVAL (x)); 3435 break; 3436 3437 case CONST_VECTOR: 3438 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_INT) 3439 { 3440 gcc_assert (aarch64_const_vec_all_same_int_p (x, 3441 HOST_WIDE_INT_MIN, 3442 HOST_WIDE_INT_MAX)); 3443 asm_fprintf (f, "%wd", INTVAL (CONST_VECTOR_ELT (x, 0))); 3444 } 3445 else if (aarch64_simd_imm_zero_p (x, GET_MODE (x))) 3446 { 3447 fputc ('0', f); 3448 } 3449 else 3450 gcc_unreachable (); 3451 break; 3452 3453 case CONST_DOUBLE: 3454 /* CONST_DOUBLE can represent a double-width integer. 3455 In this case, the mode of x is VOIDmode. */ 3456 if (GET_MODE (x) == VOIDmode) 3457 ; /* Do Nothing. */ 3458 else if (aarch64_float_const_zero_rtx_p (x)) 3459 { 3460 fputc ('0', f); 3461 break; 3462 } 3463 else if (aarch64_float_const_representable_p (x)) 3464 { 3465 #define buf_size 20 3466 char float_buf[buf_size] = {'\0'}; 3467 REAL_VALUE_TYPE r; 3468 REAL_VALUE_FROM_CONST_DOUBLE (r, x); 3469 real_to_decimal_for_mode (float_buf, &r, 3470 buf_size, buf_size, 3471 1, GET_MODE (x)); 3472 asm_fprintf (asm_out_file, "%s", float_buf); 3473 break; 3474 #undef buf_size 3475 } 3476 output_operand_lossage ("invalid constant"); 3477 return; 3478 default: 3479 output_operand_lossage ("invalid operand"); 3480 return; 3481 } 3482 break; 3483 3484 case 'A': 3485 if (GET_CODE (x) == HIGH) 3486 x = XEXP (x, 0); 3487 3488 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR)) 3489 { 3490 case SYMBOL_SMALL_GOT: 3491 asm_fprintf (asm_out_file, ":got:"); 3492 break; 3493 3494 case SYMBOL_SMALL_TLSGD: 3495 asm_fprintf (asm_out_file, ":tlsgd:"); 3496 break; 3497 3498 case SYMBOL_SMALL_TLSDESC: 3499 asm_fprintf (asm_out_file, ":tlsdesc:"); 3500 break; 3501 3502 case SYMBOL_SMALL_GOTTPREL: 3503 asm_fprintf (asm_out_file, ":gottprel:"); 3504 break; 3505 3506 case SYMBOL_SMALL_TPREL: 3507 asm_fprintf (asm_out_file, ":tprel:"); 3508 break; 3509 3510 default: 3511 break; 3512 } 3513 output_addr_const (asm_out_file, x); 3514 break; 3515 3516 case 'L': 3517 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR)) 3518 { 3519 case SYMBOL_SMALL_GOT: 3520 asm_fprintf (asm_out_file, ":lo12:"); 3521 break; 3522 3523 case SYMBOL_SMALL_TLSGD: 3524 asm_fprintf (asm_out_file, ":tlsgd_lo12:"); 3525 break; 3526 3527 case SYMBOL_SMALL_TLSDESC: 3528 asm_fprintf (asm_out_file, ":tlsdesc_lo12:"); 3529 break; 3530 3531 case SYMBOL_SMALL_GOTTPREL: 3532 asm_fprintf (asm_out_file, ":gottprel_lo12:"); 3533 break; 3534 3535 case SYMBOL_SMALL_TPREL: 3536 asm_fprintf (asm_out_file, ":tprel_lo12_nc:"); 3537 break; 3538 3539 default: 3540 break; 3541 } 3542 output_addr_const (asm_out_file, x); 3543 break; 3544 3545 case 'G': 3546 3547 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR)) 3548 { 3549 case SYMBOL_SMALL_TPREL: 3550 asm_fprintf (asm_out_file, ":tprel_hi12:"); 3551 break; 3552 default: 3553 break; 3554 } 3555 output_addr_const (asm_out_file, x); 3556 break; 3557 3558 default: 3559 output_operand_lossage ("invalid operand prefix '%%%c'", code); 3560 return; 3561 } 3562 } 3563 3564 void 3565 aarch64_print_operand_address (FILE *f, rtx x) 3566 { 3567 struct aarch64_address_info addr; 3568 3569 if (aarch64_classify_address (&addr, x, aarch64_memory_reference_mode, 3570 MEM, true)) 3571 switch (addr.type) 3572 { 3573 case ADDRESS_REG_IMM: 3574 if (addr.offset == const0_rtx) 3575 asm_fprintf (f, "[%s]", reg_names [REGNO (addr.base)]); 3576 else 3577 asm_fprintf (f, "[%s,%wd]", reg_names [REGNO (addr.base)], 3578 INTVAL (addr.offset)); 3579 return; 3580 3581 case ADDRESS_REG_REG: 3582 if (addr.shift == 0) 3583 asm_fprintf (f, "[%s,%s]", reg_names [REGNO (addr.base)], 3584 reg_names [REGNO (addr.offset)]); 3585 else 3586 asm_fprintf (f, "[%s,%s,lsl %u]", reg_names [REGNO (addr.base)], 3587 reg_names [REGNO (addr.offset)], addr.shift); 3588 return; 3589 3590 case ADDRESS_REG_UXTW: 3591 if (addr.shift == 0) 3592 asm_fprintf (f, "[%s,w%d,uxtw]", reg_names [REGNO (addr.base)], 3593 REGNO (addr.offset) - R0_REGNUM); 3594 else 3595 asm_fprintf (f, "[%s,w%d,uxtw %u]", reg_names [REGNO (addr.base)], 3596 REGNO (addr.offset) - R0_REGNUM, addr.shift); 3597 return; 3598 3599 case ADDRESS_REG_SXTW: 3600 if (addr.shift == 0) 3601 asm_fprintf (f, "[%s,w%d,sxtw]", reg_names [REGNO (addr.base)], 3602 REGNO (addr.offset) - R0_REGNUM); 3603 else 3604 asm_fprintf (f, "[%s,w%d,sxtw %u]", reg_names [REGNO (addr.base)], 3605 REGNO (addr.offset) - R0_REGNUM, addr.shift); 3606 return; 3607 3608 case ADDRESS_REG_WB: 3609 switch (GET_CODE (x)) 3610 { 3611 case PRE_INC: 3612 asm_fprintf (f, "[%s,%d]!", reg_names [REGNO (addr.base)], 3613 GET_MODE_SIZE (aarch64_memory_reference_mode)); 3614 return; 3615 case POST_INC: 3616 asm_fprintf (f, "[%s],%d", reg_names [REGNO (addr.base)], 3617 GET_MODE_SIZE (aarch64_memory_reference_mode)); 3618 return; 3619 case PRE_DEC: 3620 asm_fprintf (f, "[%s,-%d]!", reg_names [REGNO (addr.base)], 3621 GET_MODE_SIZE (aarch64_memory_reference_mode)); 3622 return; 3623 case POST_DEC: 3624 asm_fprintf (f, "[%s],-%d", reg_names [REGNO (addr.base)], 3625 GET_MODE_SIZE (aarch64_memory_reference_mode)); 3626 return; 3627 case PRE_MODIFY: 3628 asm_fprintf (f, "[%s,%wd]!", reg_names [REGNO (addr.base)], 3629 INTVAL (addr.offset)); 3630 return; 3631 case POST_MODIFY: 3632 asm_fprintf (f, "[%s],%wd", reg_names [REGNO (addr.base)], 3633 INTVAL (addr.offset)); 3634 return; 3635 default: 3636 break; 3637 } 3638 break; 3639 3640 case ADDRESS_LO_SUM: 3641 asm_fprintf (f, "[%s,#:lo12:", reg_names [REGNO (addr.base)]); 3642 output_addr_const (f, addr.offset); 3643 asm_fprintf (f, "]"); 3644 return; 3645 3646 case ADDRESS_SYMBOLIC: 3647 break; 3648 } 3649 3650 output_addr_const (f, x); 3651 } 3652 3653 void 3654 aarch64_function_profiler (FILE *f ATTRIBUTE_UNUSED, 3655 int labelno ATTRIBUTE_UNUSED) 3656 { 3657 sorry ("function profiling"); 3658 } 3659 3660 bool 3661 aarch64_label_mentioned_p (rtx x) 3662 { 3663 const char *fmt; 3664 int i; 3665 3666 if (GET_CODE (x) == LABEL_REF) 3667 return true; 3668 3669 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the 3670 referencing instruction, but they are constant offsets, not 3671 symbols. */ 3672 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS) 3673 return false; 3674 3675 fmt = GET_RTX_FORMAT (GET_CODE (x)); 3676 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--) 3677 { 3678 if (fmt[i] == 'E') 3679 { 3680 int j; 3681 3682 for (j = XVECLEN (x, i) - 1; j >= 0; j--) 3683 if (aarch64_label_mentioned_p (XVECEXP (x, i, j))) 3684 return 1; 3685 } 3686 else if (fmt[i] == 'e' && aarch64_label_mentioned_p (XEXP (x, i))) 3687 return 1; 3688 } 3689 3690 return 0; 3691 } 3692 3693 /* Implement REGNO_REG_CLASS. */ 3694 3695 enum reg_class 3696 aarch64_regno_regclass (unsigned regno) 3697 { 3698 if (GP_REGNUM_P (regno)) 3699 return CORE_REGS; 3700 3701 if (regno == SP_REGNUM) 3702 return STACK_REG; 3703 3704 if (regno == FRAME_POINTER_REGNUM 3705 || regno == ARG_POINTER_REGNUM) 3706 return CORE_REGS; 3707 3708 if (FP_REGNUM_P (regno)) 3709 return FP_LO_REGNUM_P (regno) ? FP_LO_REGS : FP_REGS; 3710 3711 return NO_REGS; 3712 } 3713 3714 /* Try a machine-dependent way of reloading an illegitimate address 3715 operand. If we find one, push the reload and return the new rtx. */ 3716 3717 rtx 3718 aarch64_legitimize_reload_address (rtx *x_p, 3719 enum machine_mode mode, 3720 int opnum, int type, 3721 int ind_levels ATTRIBUTE_UNUSED) 3722 { 3723 rtx x = *x_p; 3724 3725 /* Do not allow mem (plus (reg, const)) if vector mode. */ 3726 if (aarch64_vector_mode_p (mode) 3727 && GET_CODE (x) == PLUS 3728 && REG_P (XEXP (x, 0)) 3729 && CONST_INT_P (XEXP (x, 1))) 3730 { 3731 rtx orig_rtx = x; 3732 x = copy_rtx (x); 3733 push_reload (orig_rtx, NULL_RTX, x_p, NULL, 3734 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0, 3735 opnum, (enum reload_type) type); 3736 return x; 3737 } 3738 3739 /* We must recognize output that we have already generated ourselves. */ 3740 if (GET_CODE (x) == PLUS 3741 && GET_CODE (XEXP (x, 0)) == PLUS 3742 && REG_P (XEXP (XEXP (x, 0), 0)) 3743 && CONST_INT_P (XEXP (XEXP (x, 0), 1)) 3744 && CONST_INT_P (XEXP (x, 1))) 3745 { 3746 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL, 3747 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0, 3748 opnum, (enum reload_type) type); 3749 return x; 3750 } 3751 3752 /* We wish to handle large displacements off a base register by splitting 3753 the addend across an add and the mem insn. This can cut the number of 3754 extra insns needed from 3 to 1. It is only useful for load/store of a 3755 single register with 12 bit offset field. */ 3756 if (GET_CODE (x) == PLUS 3757 && REG_P (XEXP (x, 0)) 3758 && CONST_INT_P (XEXP (x, 1)) 3759 && HARD_REGISTER_P (XEXP (x, 0)) 3760 && mode != TImode 3761 && mode != TFmode 3762 && aarch64_regno_ok_for_base_p (REGNO (XEXP (x, 0)), true)) 3763 { 3764 HOST_WIDE_INT val = INTVAL (XEXP (x, 1)); 3765 HOST_WIDE_INT low = val & 0xfff; 3766 HOST_WIDE_INT high = val - low; 3767 HOST_WIDE_INT offs; 3768 rtx cst; 3769 3770 /* Reload non-zero BLKmode offsets. This is because we cannot ascertain 3771 BLKmode alignment. */ 3772 if (GET_MODE_SIZE (mode) == 0) 3773 return NULL_RTX; 3774 3775 offs = low % GET_MODE_SIZE (mode); 3776 3777 /* Align misaligned offset by adjusting high part to compensate. */ 3778 if (offs != 0) 3779 { 3780 if (aarch64_uimm12_shift (high + offs)) 3781 { 3782 /* Align down. */ 3783 low = low - offs; 3784 high = high + offs; 3785 } 3786 else 3787 { 3788 /* Align up. */ 3789 offs = GET_MODE_SIZE (mode) - offs; 3790 low = low + offs; 3791 high = high + (low & 0x1000) - offs; 3792 low &= 0xfff; 3793 } 3794 } 3795 3796 /* Check for overflow. */ 3797 if (high + low != val) 3798 return NULL_RTX; 3799 3800 cst = GEN_INT (high); 3801 if (!aarch64_uimm12_shift (high)) 3802 cst = force_const_mem (Pmode, cst); 3803 3804 /* Reload high part into base reg, leaving the low part 3805 in the mem instruction. */ 3806 x = gen_rtx_PLUS (Pmode, 3807 gen_rtx_PLUS (Pmode, XEXP (x, 0), cst), 3808 GEN_INT (low)); 3809 3810 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL, 3811 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, 3812 opnum, (enum reload_type) type); 3813 return x; 3814 } 3815 3816 return NULL_RTX; 3817 } 3818 3819 3820 static reg_class_t 3821 aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x, 3822 reg_class_t rclass, 3823 enum machine_mode mode, 3824 secondary_reload_info *sri) 3825 { 3826 /* Address expressions of the form PLUS (SP, large_offset) need two 3827 scratch registers, one for the constant, and one for holding a 3828 copy of SP, since SP cannot be used on the RHS of an add-reg 3829 instruction. */ 3830 if (mode == DImode 3831 && GET_CODE (x) == PLUS 3832 && XEXP (x, 0) == stack_pointer_rtx 3833 && CONST_INT_P (XEXP (x, 1)) 3834 && !aarch64_uimm12_shift (INTVAL (XEXP (x, 1)))) 3835 { 3836 sri->icode = CODE_FOR_reload_sp_immediate; 3837 return NO_REGS; 3838 } 3839 3840 /* Without the TARGET_SIMD instructions we cannot move a Q register 3841 to a Q register directly. We need a scratch. */ 3842 if (REG_P (x) && (mode == TFmode || mode == TImode) && mode == GET_MODE (x) 3843 && FP_REGNUM_P (REGNO (x)) && !TARGET_SIMD 3844 && reg_class_subset_p (rclass, FP_REGS)) 3845 { 3846 if (mode == TFmode) 3847 sri->icode = CODE_FOR_aarch64_reload_movtf; 3848 else if (mode == TImode) 3849 sri->icode = CODE_FOR_aarch64_reload_movti; 3850 return NO_REGS; 3851 } 3852 3853 /* A TFmode or TImode memory access should be handled via an FP_REGS 3854 because AArch64 has richer addressing modes for LDR/STR instructions 3855 than LDP/STP instructions. */ 3856 if (!TARGET_GENERAL_REGS_ONLY && rclass == CORE_REGS 3857 && GET_MODE_SIZE (mode) == 16 && MEM_P (x)) 3858 return FP_REGS; 3859 3860 if (rclass == FP_REGS && (mode == TImode || mode == TFmode) && CONSTANT_P(x)) 3861 return CORE_REGS; 3862 3863 return NO_REGS; 3864 } 3865 3866 static bool 3867 aarch64_can_eliminate (const int from, const int to) 3868 { 3869 /* If we need a frame pointer, we must eliminate FRAME_POINTER_REGNUM into 3870 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM. */ 3871 3872 if (frame_pointer_needed) 3873 { 3874 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM) 3875 return true; 3876 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM) 3877 return false; 3878 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM 3879 && !cfun->calls_alloca) 3880 return true; 3881 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM) 3882 return true; 3883 return false; 3884 } 3885 else 3886 { 3887 /* If we decided that we didn't need a leaf frame pointer but then used 3888 LR in the function, then we'll want a frame pointer after all, so 3889 prevent this elimination to ensure a frame pointer is used. 3890 3891 NOTE: the original value of flag_omit_frame_pointer gets trashed 3892 IFF flag_omit_leaf_frame_pointer is true, so we check the value 3893 of faked_omit_frame_pointer here (which is true when we always 3894 wish to keep non-leaf frame pointers but only wish to keep leaf frame 3895 pointers when LR is clobbered). */ 3896 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM 3897 && df_regs_ever_live_p (LR_REGNUM) 3898 && faked_omit_frame_pointer) 3899 return false; 3900 } 3901 3902 return true; 3903 } 3904 3905 HOST_WIDE_INT 3906 aarch64_initial_elimination_offset (unsigned from, unsigned to) 3907 { 3908 HOST_WIDE_INT frame_size; 3909 HOST_WIDE_INT offset; 3910 3911 aarch64_layout_frame (); 3912 frame_size = (get_frame_size () + cfun->machine->frame.saved_regs_size 3913 + crtl->outgoing_args_size 3914 + cfun->machine->saved_varargs_size); 3915 3916 frame_size = AARCH64_ROUND_UP (frame_size, STACK_BOUNDARY / BITS_PER_UNIT); 3917 offset = frame_size; 3918 3919 if (to == HARD_FRAME_POINTER_REGNUM) 3920 { 3921 if (from == ARG_POINTER_REGNUM) 3922 return offset - crtl->outgoing_args_size; 3923 3924 if (from == FRAME_POINTER_REGNUM) 3925 return cfun->machine->frame.saved_regs_size; 3926 } 3927 3928 if (to == STACK_POINTER_REGNUM) 3929 { 3930 if (from == FRAME_POINTER_REGNUM) 3931 { 3932 HOST_WIDE_INT elim = crtl->outgoing_args_size 3933 + cfun->machine->frame.saved_regs_size 3934 - cfun->machine->frame.fp_lr_offset; 3935 elim = AARCH64_ROUND_UP (elim, STACK_BOUNDARY / BITS_PER_UNIT); 3936 return elim; 3937 } 3938 } 3939 3940 return offset; 3941 } 3942 3943 3944 /* Implement RETURN_ADDR_RTX. We do not support moving back to a 3945 previous frame. */ 3946 3947 rtx 3948 aarch64_return_addr (int count, rtx frame ATTRIBUTE_UNUSED) 3949 { 3950 if (count != 0) 3951 return const0_rtx; 3952 return get_hard_reg_initial_val (Pmode, LR_REGNUM); 3953 } 3954 3955 3956 static void 3957 aarch64_asm_trampoline_template (FILE *f) 3958 { 3959 asm_fprintf (f, "\tldr\t%s, .+16\n", reg_names [IP1_REGNUM]); 3960 asm_fprintf (f, "\tldr\t%s, .+20\n", reg_names [STATIC_CHAIN_REGNUM]); 3961 asm_fprintf (f, "\tbr\t%s\n", reg_names [IP1_REGNUM]); 3962 assemble_aligned_integer (4, const0_rtx); 3963 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx); 3964 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx); 3965 } 3966 3967 unsigned 3968 aarch64_trampoline_size (void) 3969 { 3970 return 32; /* 3 insns + padding + 2 dwords. */ 3971 } 3972 3973 static void 3974 aarch64_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value) 3975 { 3976 rtx fnaddr, mem, a_tramp; 3977 3978 /* Don't need to copy the trailing D-words, we fill those in below. */ 3979 emit_block_move (m_tramp, assemble_trampoline_template (), 3980 GEN_INT (TRAMPOLINE_SIZE - 16), BLOCK_OP_NORMAL); 3981 mem = adjust_address (m_tramp, DImode, 16); 3982 fnaddr = XEXP (DECL_RTL (fndecl), 0); 3983 emit_move_insn (mem, fnaddr); 3984 3985 mem = adjust_address (m_tramp, DImode, 24); 3986 emit_move_insn (mem, chain_value); 3987 3988 /* XXX We should really define a "clear_cache" pattern and use 3989 gen_clear_cache(). */ 3990 a_tramp = XEXP (m_tramp, 0); 3991 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"), 3992 LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode, 3993 plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode); 3994 } 3995 3996 static unsigned char 3997 aarch64_class_max_nregs (reg_class_t regclass, enum machine_mode mode) 3998 { 3999 switch (regclass) 4000 { 4001 case CORE_REGS: 4002 case POINTER_REGS: 4003 case GENERAL_REGS: 4004 case ALL_REGS: 4005 case FP_REGS: 4006 case FP_LO_REGS: 4007 return 4008 aarch64_vector_mode_p (mode) ? (GET_MODE_SIZE (mode) + 15) / 16 : 4009 (GET_MODE_SIZE (mode) + 7) / 8; 4010 case STACK_REG: 4011 return 1; 4012 4013 case NO_REGS: 4014 return 0; 4015 4016 default: 4017 break; 4018 } 4019 gcc_unreachable (); 4020 } 4021 4022 static reg_class_t 4023 aarch64_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t regclass) 4024 { 4025 return ((regclass == POINTER_REGS || regclass == STACK_REG) 4026 ? GENERAL_REGS : regclass); 4027 } 4028 4029 void 4030 aarch64_asm_output_labelref (FILE* f, const char *name) 4031 { 4032 asm_fprintf (f, "%U%s", name); 4033 } 4034 4035 static void 4036 aarch64_elf_asm_constructor (rtx symbol, int priority) 4037 { 4038 if (priority == DEFAULT_INIT_PRIORITY) 4039 default_ctor_section_asm_out_constructor (symbol, priority); 4040 else 4041 { 4042 section *s; 4043 char buf[18]; 4044 snprintf (buf, sizeof (buf), ".init_array.%.5u", priority); 4045 s = get_section (buf, SECTION_WRITE, NULL); 4046 switch_to_section (s); 4047 assemble_align (POINTER_SIZE); 4048 fputs ("\t.dword\t", asm_out_file); 4049 output_addr_const (asm_out_file, symbol); 4050 fputc ('\n', asm_out_file); 4051 } 4052 } 4053 4054 static void 4055 aarch64_elf_asm_destructor (rtx symbol, int priority) 4056 { 4057 if (priority == DEFAULT_INIT_PRIORITY) 4058 default_dtor_section_asm_out_destructor (symbol, priority); 4059 else 4060 { 4061 section *s; 4062 char buf[18]; 4063 snprintf (buf, sizeof (buf), ".fini_array.%.5u", priority); 4064 s = get_section (buf, SECTION_WRITE, NULL); 4065 switch_to_section (s); 4066 assemble_align (POINTER_SIZE); 4067 fputs ("\t.dword\t", asm_out_file); 4068 output_addr_const (asm_out_file, symbol); 4069 fputc ('\n', asm_out_file); 4070 } 4071 } 4072 4073 const char* 4074 aarch64_output_casesi (rtx *operands) 4075 { 4076 char buf[100]; 4077 char label[100]; 4078 rtx diff_vec = PATTERN (next_real_insn (operands[2])); 4079 int index; 4080 static const char *const patterns[4][2] = 4081 { 4082 { 4083 "ldrb\t%w3, [%0,%w1,uxtw]", 4084 "add\t%3, %4, %w3, sxtb #2" 4085 }, 4086 { 4087 "ldrh\t%w3, [%0,%w1,uxtw #1]", 4088 "add\t%3, %4, %w3, sxth #2" 4089 }, 4090 { 4091 "ldr\t%w3, [%0,%w1,uxtw #2]", 4092 "add\t%3, %4, %w3, sxtw #2" 4093 }, 4094 /* We assume that DImode is only generated when not optimizing and 4095 that we don't really need 64-bit address offsets. That would 4096 imply an object file with 8GB of code in a single function! */ 4097 { 4098 "ldr\t%w3, [%0,%w1,uxtw #2]", 4099 "add\t%3, %4, %w3, sxtw #2" 4100 } 4101 }; 4102 4103 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC); 4104 4105 index = exact_log2 (GET_MODE_SIZE (GET_MODE (diff_vec))); 4106 4107 gcc_assert (index >= 0 && index <= 3); 4108 4109 /* Need to implement table size reduction, by chaning the code below. */ 4110 output_asm_insn (patterns[index][0], operands); 4111 ASM_GENERATE_INTERNAL_LABEL (label, "Lrtx", CODE_LABEL_NUMBER (operands[2])); 4112 snprintf (buf, sizeof (buf), 4113 "adr\t%%4, %s", targetm.strip_name_encoding (label)); 4114 output_asm_insn (buf, operands); 4115 output_asm_insn (patterns[index][1], operands); 4116 output_asm_insn ("br\t%3", operands); 4117 assemble_label (asm_out_file, label); 4118 return ""; 4119 } 4120 4121 4122 /* Return size in bits of an arithmetic operand which is shifted/scaled and 4123 masked such that it is suitable for a UXTB, UXTH, or UXTW extend 4124 operator. */ 4125 4126 int 4127 aarch64_uxt_size (int shift, HOST_WIDE_INT mask) 4128 { 4129 if (shift >= 0 && shift <= 3) 4130 { 4131 int size; 4132 for (size = 8; size <= 32; size *= 2) 4133 { 4134 HOST_WIDE_INT bits = ((HOST_WIDE_INT)1U << size) - 1; 4135 if (mask == bits << shift) 4136 return size; 4137 } 4138 } 4139 return 0; 4140 } 4141 4142 static bool 4143 aarch64_use_blocks_for_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, 4144 const_rtx x ATTRIBUTE_UNUSED) 4145 { 4146 /* We can't use blocks for constants when we're using a per-function 4147 constant pool. */ 4148 return false; 4149 } 4150 4151 static section * 4152 aarch64_select_rtx_section (enum machine_mode mode ATTRIBUTE_UNUSED, 4153 rtx x ATTRIBUTE_UNUSED, 4154 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED) 4155 { 4156 /* Force all constant pool entries into the current function section. */ 4157 return function_section (current_function_decl); 4158 } 4159 4160 4161 /* Costs. */ 4162 4163 /* Helper function for rtx cost calculation. Strip a shift expression 4164 from X. Returns the inner operand if successful, or the original 4165 expression on failure. */ 4166 static rtx 4167 aarch64_strip_shift (rtx x) 4168 { 4169 rtx op = x; 4170 4171 if ((GET_CODE (op) == ASHIFT 4172 || GET_CODE (op) == ASHIFTRT 4173 || GET_CODE (op) == LSHIFTRT) 4174 && CONST_INT_P (XEXP (op, 1))) 4175 return XEXP (op, 0); 4176 4177 if (GET_CODE (op) == MULT 4178 && CONST_INT_P (XEXP (op, 1)) 4179 && ((unsigned) exact_log2 (INTVAL (XEXP (op, 1)))) < 64) 4180 return XEXP (op, 0); 4181 4182 return x; 4183 } 4184 4185 /* Helper function for rtx cost calculation. Strip a shift or extend 4186 expression from X. Returns the inner operand if successful, or the 4187 original expression on failure. We deal with a number of possible 4188 canonicalization variations here. */ 4189 static rtx 4190 aarch64_strip_shift_or_extend (rtx x) 4191 { 4192 rtx op = x; 4193 4194 /* Zero and sign extraction of a widened value. */ 4195 if ((GET_CODE (op) == ZERO_EXTRACT || GET_CODE (op) == SIGN_EXTRACT) 4196 && XEXP (op, 2) == const0_rtx 4197 && aarch64_is_extend_from_extract (GET_MODE (op), XEXP (XEXP (op, 0), 1), 4198 XEXP (op, 1))) 4199 return XEXP (XEXP (op, 0), 0); 4200 4201 /* It can also be represented (for zero-extend) as an AND with an 4202 immediate. */ 4203 if (GET_CODE (op) == AND 4204 && GET_CODE (XEXP (op, 0)) == MULT 4205 && CONST_INT_P (XEXP (XEXP (op, 0), 1)) 4206 && CONST_INT_P (XEXP (op, 1)) 4207 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (XEXP (op, 0), 1))), 4208 INTVAL (XEXP (op, 1))) != 0) 4209 return XEXP (XEXP (op, 0), 0); 4210 4211 /* Now handle extended register, as this may also have an optional 4212 left shift by 1..4. */ 4213 if (GET_CODE (op) == ASHIFT 4214 && CONST_INT_P (XEXP (op, 1)) 4215 && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) <= 4) 4216 op = XEXP (op, 0); 4217 4218 if (GET_CODE (op) == ZERO_EXTEND 4219 || GET_CODE (op) == SIGN_EXTEND) 4220 op = XEXP (op, 0); 4221 4222 if (op != x) 4223 return op; 4224 4225 return aarch64_strip_shift (x); 4226 } 4227 4228 /* Calculate the cost of calculating X, storing it in *COST. Result 4229 is true if the total cost of the operation has now been calculated. */ 4230 static bool 4231 aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED, 4232 int param ATTRIBUTE_UNUSED, int *cost, bool speed) 4233 { 4234 rtx op0, op1; 4235 const struct cpu_rtx_cost_table *extra_cost 4236 = aarch64_tune_params->insn_extra_cost; 4237 4238 switch (code) 4239 { 4240 case SET: 4241 op0 = SET_DEST (x); 4242 op1 = SET_SRC (x); 4243 4244 switch (GET_CODE (op0)) 4245 { 4246 case MEM: 4247 if (speed) 4248 *cost += extra_cost->memory_store; 4249 4250 if (op1 != const0_rtx) 4251 *cost += rtx_cost (op1, SET, 1, speed); 4252 return true; 4253 4254 case SUBREG: 4255 if (! REG_P (SUBREG_REG (op0))) 4256 *cost += rtx_cost (SUBREG_REG (op0), SET, 0, speed); 4257 /* Fall through. */ 4258 case REG: 4259 /* Cost is just the cost of the RHS of the set. */ 4260 *cost += rtx_cost (op1, SET, 1, true); 4261 return true; 4262 4263 case ZERO_EXTRACT: /* Bit-field insertion. */ 4264 case SIGN_EXTRACT: 4265 /* Strip any redundant widening of the RHS to meet the width of 4266 the target. */ 4267 if (GET_CODE (op1) == SUBREG) 4268 op1 = SUBREG_REG (op1); 4269 if ((GET_CODE (op1) == ZERO_EXTEND 4270 || GET_CODE (op1) == SIGN_EXTEND) 4271 && GET_CODE (XEXP (op0, 1)) == CONST_INT 4272 && (GET_MODE_BITSIZE (GET_MODE (XEXP (op1, 0))) 4273 >= INTVAL (XEXP (op0, 1)))) 4274 op1 = XEXP (op1, 0); 4275 *cost += rtx_cost (op1, SET, 1, speed); 4276 return true; 4277 4278 default: 4279 break; 4280 } 4281 return false; 4282 4283 case MEM: 4284 if (speed) 4285 *cost += extra_cost->memory_load; 4286 4287 return true; 4288 4289 case NEG: 4290 op0 = CONST0_RTX (GET_MODE (x)); 4291 op1 = XEXP (x, 0); 4292 goto cost_minus; 4293 4294 case COMPARE: 4295 op0 = XEXP (x, 0); 4296 op1 = XEXP (x, 1); 4297 4298 if (op1 == const0_rtx 4299 && GET_CODE (op0) == AND) 4300 { 4301 x = op0; 4302 goto cost_logic; 4303 } 4304 4305 /* Comparisons can work if the order is swapped. 4306 Canonicalization puts the more complex operation first, but 4307 we want it in op1. */ 4308 if (! (REG_P (op0) 4309 || (GET_CODE (op0) == SUBREG && REG_P (SUBREG_REG (op0))))) 4310 { 4311 op0 = XEXP (x, 1); 4312 op1 = XEXP (x, 0); 4313 } 4314 goto cost_minus; 4315 4316 case MINUS: 4317 op0 = XEXP (x, 0); 4318 op1 = XEXP (x, 1); 4319 4320 cost_minus: 4321 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT 4322 || (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC 4323 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)) 4324 { 4325 if (op0 != const0_rtx) 4326 *cost += rtx_cost (op0, MINUS, 0, speed); 4327 4328 if (CONST_INT_P (op1)) 4329 { 4330 if (!aarch64_uimm12_shift (INTVAL (op1))) 4331 *cost += rtx_cost (op1, MINUS, 1, speed); 4332 } 4333 else 4334 { 4335 op1 = aarch64_strip_shift_or_extend (op1); 4336 *cost += rtx_cost (op1, MINUS, 1, speed); 4337 } 4338 return true; 4339 } 4340 4341 return false; 4342 4343 case PLUS: 4344 op0 = XEXP (x, 0); 4345 op1 = XEXP (x, 1); 4346 4347 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT) 4348 { 4349 if (CONST_INT_P (op1) && aarch64_uimm12_shift (INTVAL (op1))) 4350 { 4351 *cost += rtx_cost (op0, PLUS, 0, speed); 4352 } 4353 else 4354 { 4355 rtx new_op0 = aarch64_strip_shift_or_extend (op0); 4356 4357 if (new_op0 == op0 4358 && GET_CODE (op0) == MULT) 4359 { 4360 if ((GET_CODE (XEXP (op0, 0)) == ZERO_EXTEND 4361 && GET_CODE (XEXP (op0, 1)) == ZERO_EXTEND) 4362 || (GET_CODE (XEXP (op0, 0)) == SIGN_EXTEND 4363 && GET_CODE (XEXP (op0, 1)) == SIGN_EXTEND)) 4364 { 4365 *cost += (rtx_cost (XEXP (XEXP (op0, 0), 0), MULT, 0, 4366 speed) 4367 + rtx_cost (XEXP (XEXP (op0, 1), 0), MULT, 1, 4368 speed) 4369 + rtx_cost (op1, PLUS, 1, speed)); 4370 if (speed) 4371 *cost += extra_cost->int_multiply_extend_add; 4372 return true; 4373 } 4374 *cost += (rtx_cost (XEXP (op0, 0), MULT, 0, speed) 4375 + rtx_cost (XEXP (op0, 1), MULT, 1, speed) 4376 + rtx_cost (op1, PLUS, 1, speed)); 4377 4378 if (speed) 4379 *cost += extra_cost->int_multiply_add; 4380 } 4381 4382 *cost += (rtx_cost (new_op0, PLUS, 0, speed) 4383 + rtx_cost (op1, PLUS, 1, speed)); 4384 } 4385 return true; 4386 } 4387 4388 return false; 4389 4390 case IOR: 4391 case XOR: 4392 case AND: 4393 cost_logic: 4394 op0 = XEXP (x, 0); 4395 op1 = XEXP (x, 1); 4396 4397 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT) 4398 { 4399 if (CONST_INT_P (op1) 4400 && aarch64_bitmask_imm (INTVAL (op1), GET_MODE (x))) 4401 { 4402 *cost += rtx_cost (op0, AND, 0, speed); 4403 } 4404 else 4405 { 4406 if (GET_CODE (op0) == NOT) 4407 op0 = XEXP (op0, 0); 4408 op0 = aarch64_strip_shift (op0); 4409 *cost += (rtx_cost (op0, AND, 0, speed) 4410 + rtx_cost (op1, AND, 1, speed)); 4411 } 4412 return true; 4413 } 4414 return false; 4415 4416 case ZERO_EXTEND: 4417 if ((GET_MODE (x) == DImode 4418 && GET_MODE (XEXP (x, 0)) == SImode) 4419 || GET_CODE (XEXP (x, 0)) == MEM) 4420 { 4421 *cost += rtx_cost (XEXP (x, 0), ZERO_EXTEND, 0, speed); 4422 return true; 4423 } 4424 return false; 4425 4426 case SIGN_EXTEND: 4427 if (GET_CODE (XEXP (x, 0)) == MEM) 4428 { 4429 *cost += rtx_cost (XEXP (x, 0), SIGN_EXTEND, 0, speed); 4430 return true; 4431 } 4432 return false; 4433 4434 case ROTATE: 4435 if (!CONST_INT_P (XEXP (x, 1))) 4436 *cost += COSTS_N_INSNS (2); 4437 /* Fall through. */ 4438 case ROTATERT: 4439 case LSHIFTRT: 4440 case ASHIFT: 4441 case ASHIFTRT: 4442 4443 /* Shifting by a register often takes an extra cycle. */ 4444 if (speed && !CONST_INT_P (XEXP (x, 1))) 4445 *cost += extra_cost->register_shift; 4446 4447 *cost += rtx_cost (XEXP (x, 0), ASHIFT, 0, speed); 4448 return true; 4449 4450 case HIGH: 4451 if (!CONSTANT_P (XEXP (x, 0))) 4452 *cost += rtx_cost (XEXP (x, 0), HIGH, 0, speed); 4453 return true; 4454 4455 case LO_SUM: 4456 if (!CONSTANT_P (XEXP (x, 1))) 4457 *cost += rtx_cost (XEXP (x, 1), LO_SUM, 1, speed); 4458 *cost += rtx_cost (XEXP (x, 0), LO_SUM, 0, speed); 4459 return true; 4460 4461 case ZERO_EXTRACT: 4462 case SIGN_EXTRACT: 4463 *cost += rtx_cost (XEXP (x, 0), ZERO_EXTRACT, 0, speed); 4464 return true; 4465 4466 case MULT: 4467 op0 = XEXP (x, 0); 4468 op1 = XEXP (x, 1); 4469 4470 *cost = COSTS_N_INSNS (1); 4471 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT) 4472 { 4473 if (CONST_INT_P (op1) 4474 && exact_log2 (INTVAL (op1)) > 0) 4475 { 4476 *cost += rtx_cost (op0, ASHIFT, 0, speed); 4477 return true; 4478 } 4479 4480 if ((GET_CODE (op0) == ZERO_EXTEND 4481 && GET_CODE (op1) == ZERO_EXTEND) 4482 || (GET_CODE (op0) == SIGN_EXTEND 4483 && GET_CODE (op1) == SIGN_EXTEND)) 4484 { 4485 *cost += (rtx_cost (XEXP (op0, 0), MULT, 0, speed) 4486 + rtx_cost (XEXP (op1, 0), MULT, 1, speed)); 4487 if (speed) 4488 *cost += extra_cost->int_multiply_extend; 4489 return true; 4490 } 4491 4492 if (speed) 4493 *cost += extra_cost->int_multiply; 4494 } 4495 else if (speed) 4496 { 4497 if (GET_MODE (x) == DFmode) 4498 *cost += extra_cost->double_multiply; 4499 else if (GET_MODE (x) == SFmode) 4500 *cost += extra_cost->float_multiply; 4501 } 4502 4503 return false; /* All arguments need to be in registers. */ 4504 4505 case MOD: 4506 case UMOD: 4507 *cost = COSTS_N_INSNS (2); 4508 if (speed) 4509 { 4510 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT) 4511 *cost += (extra_cost->int_multiply_add 4512 + extra_cost->int_divide); 4513 else if (GET_MODE (x) == DFmode) 4514 *cost += (extra_cost->double_multiply 4515 + extra_cost->double_divide); 4516 else if (GET_MODE (x) == SFmode) 4517 *cost += (extra_cost->float_multiply 4518 + extra_cost->float_divide); 4519 } 4520 return false; /* All arguments need to be in registers. */ 4521 4522 case DIV: 4523 case UDIV: 4524 *cost = COSTS_N_INSNS (1); 4525 if (speed) 4526 { 4527 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT) 4528 *cost += extra_cost->int_divide; 4529 else if (GET_MODE (x) == DFmode) 4530 *cost += extra_cost->double_divide; 4531 else if (GET_MODE (x) == SFmode) 4532 *cost += extra_cost->float_divide; 4533 } 4534 return false; /* All arguments need to be in registers. */ 4535 4536 default: 4537 break; 4538 } 4539 return false; 4540 } 4541 4542 static int 4543 aarch64_address_cost (rtx x ATTRIBUTE_UNUSED, 4544 enum machine_mode mode ATTRIBUTE_UNUSED, 4545 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED) 4546 { 4547 enum rtx_code c = GET_CODE (x); 4548 const struct cpu_addrcost_table *addr_cost = aarch64_tune_params->addr_cost; 4549 4550 if (c == PRE_INC || c == PRE_DEC || c == PRE_MODIFY) 4551 return addr_cost->pre_modify; 4552 4553 if (c == POST_INC || c == POST_DEC || c == POST_MODIFY) 4554 return addr_cost->post_modify; 4555 4556 if (c == PLUS) 4557 { 4558 if (GET_CODE (XEXP (x, 1)) == CONST_INT) 4559 return addr_cost->imm_offset; 4560 else if (GET_CODE (XEXP (x, 0)) == MULT 4561 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND 4562 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND) 4563 return addr_cost->register_extend; 4564 4565 return addr_cost->register_offset; 4566 } 4567 else if (c == MEM || c == LABEL_REF || c == SYMBOL_REF) 4568 return addr_cost->imm_offset; 4569 4570 return 0; 4571 } 4572 4573 static int 4574 aarch64_register_move_cost (enum machine_mode mode, 4575 reg_class_t from_i, reg_class_t to_i) 4576 { 4577 enum reg_class from = (enum reg_class) from_i; 4578 enum reg_class to = (enum reg_class) to_i; 4579 const struct cpu_regmove_cost *regmove_cost 4580 = aarch64_tune_params->regmove_cost; 4581 4582 if (from == GENERAL_REGS && to == GENERAL_REGS) 4583 return regmove_cost->GP2GP; 4584 else if (from == GENERAL_REGS) 4585 return regmove_cost->GP2FP; 4586 else if (to == GENERAL_REGS) 4587 return regmove_cost->FP2GP; 4588 4589 /* When AdvSIMD instructions are disabled it is not possible to move 4590 a 128-bit value directly between Q registers. This is handled in 4591 secondary reload. A general register is used as a scratch to move 4592 the upper DI value and the lower DI value is moved directly, 4593 hence the cost is the sum of three moves. */ 4594 if (! TARGET_SIMD && GET_MODE_SIZE (mode) == 128) 4595 return regmove_cost->GP2FP + regmove_cost->FP2GP + regmove_cost->FP2FP; 4596 4597 return regmove_cost->FP2FP; 4598 } 4599 4600 static int 4601 aarch64_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED, 4602 reg_class_t rclass ATTRIBUTE_UNUSED, 4603 bool in ATTRIBUTE_UNUSED) 4604 { 4605 return aarch64_tune_params->memmov_cost; 4606 } 4607 4608 static void initialize_aarch64_code_model (void); 4609 4610 /* Parse the architecture extension string. */ 4611 4612 static void 4613 aarch64_parse_extension (char *str) 4614 { 4615 /* The extension string is parsed left to right. */ 4616 const struct aarch64_option_extension *opt = NULL; 4617 4618 /* Flag to say whether we are adding or removing an extension. */ 4619 int adding_ext = -1; 4620 4621 while (str != NULL && *str != 0) 4622 { 4623 char *ext; 4624 size_t len; 4625 4626 str++; 4627 ext = strchr (str, '+'); 4628 4629 if (ext != NULL) 4630 len = ext - str; 4631 else 4632 len = strlen (str); 4633 4634 if (len >= 2 && strncmp (str, "no", 2) == 0) 4635 { 4636 adding_ext = 0; 4637 len -= 2; 4638 str += 2; 4639 } 4640 else if (len > 0) 4641 adding_ext = 1; 4642 4643 if (len == 0) 4644 { 4645 error ("missing feature modifier after %qs", "+no"); 4646 return; 4647 } 4648 4649 /* Scan over the extensions table trying to find an exact match. */ 4650 for (opt = all_extensions; opt->name != NULL; opt++) 4651 { 4652 if (strlen (opt->name) == len && strncmp (opt->name, str, len) == 0) 4653 { 4654 /* Add or remove the extension. */ 4655 if (adding_ext) 4656 aarch64_isa_flags |= opt->flags_on; 4657 else 4658 aarch64_isa_flags &= ~(opt->flags_off); 4659 break; 4660 } 4661 } 4662 4663 if (opt->name == NULL) 4664 { 4665 /* Extension not found in list. */ 4666 error ("unknown feature modifier %qs", str); 4667 return; 4668 } 4669 4670 str = ext; 4671 }; 4672 4673 return; 4674 } 4675 4676 /* Parse the ARCH string. */ 4677 4678 static void 4679 aarch64_parse_arch (void) 4680 { 4681 char *ext; 4682 const struct processor *arch; 4683 char *str = (char *) alloca (strlen (aarch64_arch_string) + 1); 4684 size_t len; 4685 4686 strcpy (str, aarch64_arch_string); 4687 4688 ext = strchr (str, '+'); 4689 4690 if (ext != NULL) 4691 len = ext - str; 4692 else 4693 len = strlen (str); 4694 4695 if (len == 0) 4696 { 4697 error ("missing arch name in -march=%qs", str); 4698 return; 4699 } 4700 4701 /* Loop through the list of supported ARCHs to find a match. */ 4702 for (arch = all_architectures; arch->name != NULL; arch++) 4703 { 4704 if (strlen (arch->name) == len && strncmp (arch->name, str, len) == 0) 4705 { 4706 selected_arch = arch; 4707 aarch64_isa_flags = selected_arch->flags; 4708 selected_cpu = &all_cores[selected_arch->core]; 4709 4710 if (ext != NULL) 4711 { 4712 /* ARCH string contains at least one extension. */ 4713 aarch64_parse_extension (ext); 4714 } 4715 4716 return; 4717 } 4718 } 4719 4720 /* ARCH name not found in list. */ 4721 error ("unknown value %qs for -march", str); 4722 return; 4723 } 4724 4725 /* Parse the CPU string. */ 4726 4727 static void 4728 aarch64_parse_cpu (void) 4729 { 4730 char *ext; 4731 const struct processor *cpu; 4732 char *str = (char *) alloca (strlen (aarch64_cpu_string) + 1); 4733 size_t len; 4734 4735 strcpy (str, aarch64_cpu_string); 4736 4737 ext = strchr (str, '+'); 4738 4739 if (ext != NULL) 4740 len = ext - str; 4741 else 4742 len = strlen (str); 4743 4744 if (len == 0) 4745 { 4746 error ("missing cpu name in -mcpu=%qs", str); 4747 return; 4748 } 4749 4750 /* Loop through the list of supported CPUs to find a match. */ 4751 for (cpu = all_cores; cpu->name != NULL; cpu++) 4752 { 4753 if (strlen (cpu->name) == len && strncmp (cpu->name, str, len) == 0) 4754 { 4755 selected_cpu = cpu; 4756 aarch64_isa_flags = selected_cpu->flags; 4757 4758 if (ext != NULL) 4759 { 4760 /* CPU string contains at least one extension. */ 4761 aarch64_parse_extension (ext); 4762 } 4763 4764 return; 4765 } 4766 } 4767 4768 /* CPU name not found in list. */ 4769 error ("unknown value %qs for -mcpu", str); 4770 return; 4771 } 4772 4773 /* Parse the TUNE string. */ 4774 4775 static void 4776 aarch64_parse_tune (void) 4777 { 4778 const struct processor *cpu; 4779 char *str = (char *) alloca (strlen (aarch64_tune_string) + 1); 4780 strcpy (str, aarch64_tune_string); 4781 4782 /* Loop through the list of supported CPUs to find a match. */ 4783 for (cpu = all_cores; cpu->name != NULL; cpu++) 4784 { 4785 if (strcmp (cpu->name, str) == 0) 4786 { 4787 selected_tune = cpu; 4788 return; 4789 } 4790 } 4791 4792 /* CPU name not found in list. */ 4793 error ("unknown value %qs for -mtune", str); 4794 return; 4795 } 4796 4797 4798 /* Implement TARGET_OPTION_OVERRIDE. */ 4799 4800 static void 4801 aarch64_override_options (void) 4802 { 4803 /* march wins over mcpu, so when march is defined, mcpu takes the same value, 4804 otherwise march remains undefined. mtune can be used with either march or 4805 mcpu. */ 4806 4807 if (aarch64_arch_string) 4808 { 4809 aarch64_parse_arch (); 4810 aarch64_cpu_string = NULL; 4811 } 4812 4813 if (aarch64_cpu_string) 4814 { 4815 aarch64_parse_cpu (); 4816 selected_arch = NULL; 4817 } 4818 4819 if (aarch64_tune_string) 4820 { 4821 aarch64_parse_tune (); 4822 } 4823 4824 initialize_aarch64_code_model (); 4825 4826 aarch64_build_bitmask_table (); 4827 4828 /* This target defaults to strict volatile bitfields. */ 4829 if (flag_strict_volatile_bitfields < 0 && abi_version_at_least (2)) 4830 flag_strict_volatile_bitfields = 1; 4831 4832 /* If the user did not specify a processor, choose the default 4833 one for them. This will be the CPU set during configuration using 4834 --with-cpu, otherwise it is "generic". */ 4835 if (!selected_cpu) 4836 { 4837 selected_cpu = &all_cores[TARGET_CPU_DEFAULT & 0x3f]; 4838 aarch64_isa_flags = TARGET_CPU_DEFAULT >> 6; 4839 } 4840 4841 gcc_assert (selected_cpu); 4842 4843 /* The selected cpu may be an architecture, so lookup tuning by core ID. */ 4844 if (!selected_tune) 4845 selected_tune = &all_cores[selected_cpu->core]; 4846 4847 aarch64_tune_flags = selected_tune->flags; 4848 aarch64_tune = selected_tune->core; 4849 aarch64_tune_params = selected_tune->tune; 4850 4851 if (aarch64_fix_a53_err835769 == 2) 4852 { 4853 #ifdef TARGET_FIX_ERR_A53_835769_DEFAULT 4854 aarch64_fix_a53_err835769 = 1; 4855 #else 4856 aarch64_fix_a53_err835769 = 0; 4857 #endif 4858 } 4859 4860 aarch64_override_options_after_change (); 4861 } 4862 4863 /* Implement targetm.override_options_after_change. */ 4864 4865 static void 4866 aarch64_override_options_after_change (void) 4867 { 4868 faked_omit_frame_pointer = false; 4869 4870 /* To omit leaf frame pointers, we need to turn flag_omit_frame_pointer on so 4871 that aarch64_frame_pointer_required will be called. We need to remember 4872 whether flag_omit_frame_pointer was turned on normally or just faked. */ 4873 4874 if (flag_omit_leaf_frame_pointer && !flag_omit_frame_pointer) 4875 { 4876 flag_omit_frame_pointer = true; 4877 faked_omit_frame_pointer = true; 4878 } 4879 } 4880 4881 static struct machine_function * 4882 aarch64_init_machine_status (void) 4883 { 4884 struct machine_function *machine; 4885 machine = ggc_alloc_cleared_machine_function (); 4886 return machine; 4887 } 4888 4889 void 4890 aarch64_init_expanders (void) 4891 { 4892 init_machine_status = aarch64_init_machine_status; 4893 } 4894 4895 /* A checking mechanism for the implementation of the various code models. */ 4896 static void 4897 initialize_aarch64_code_model (void) 4898 { 4899 if (flag_pic) 4900 { 4901 switch (aarch64_cmodel_var) 4902 { 4903 case AARCH64_CMODEL_TINY: 4904 aarch64_cmodel = AARCH64_CMODEL_TINY_PIC; 4905 break; 4906 case AARCH64_CMODEL_SMALL: 4907 aarch64_cmodel = AARCH64_CMODEL_SMALL_PIC; 4908 break; 4909 case AARCH64_CMODEL_LARGE: 4910 sorry ("code model %qs with -f%s", "large", 4911 flag_pic > 1 ? "PIC" : "pic"); 4912 default: 4913 gcc_unreachable (); 4914 } 4915 } 4916 else 4917 aarch64_cmodel = aarch64_cmodel_var; 4918 } 4919 4920 /* Return true if SYMBOL_REF X binds locally. */ 4921 4922 static bool 4923 aarch64_symbol_binds_local_p (const_rtx x) 4924 { 4925 return (SYMBOL_REF_DECL (x) 4926 ? targetm.binds_local_p (SYMBOL_REF_DECL (x)) 4927 : SYMBOL_REF_LOCAL_P (x)); 4928 } 4929 4930 /* Return true if SYMBOL_REF X is thread local */ 4931 static bool 4932 aarch64_tls_symbol_p (rtx x) 4933 { 4934 if (! TARGET_HAVE_TLS) 4935 return false; 4936 4937 if (GET_CODE (x) != SYMBOL_REF) 4938 return false; 4939 4940 return SYMBOL_REF_TLS_MODEL (x) != 0; 4941 } 4942 4943 /* Classify a TLS symbol into one of the TLS kinds. */ 4944 enum aarch64_symbol_type 4945 aarch64_classify_tls_symbol (rtx x) 4946 { 4947 enum tls_model tls_kind = tls_symbolic_operand_type (x); 4948 4949 switch (tls_kind) 4950 { 4951 case TLS_MODEL_GLOBAL_DYNAMIC: 4952 case TLS_MODEL_LOCAL_DYNAMIC: 4953 return TARGET_TLS_DESC ? SYMBOL_SMALL_TLSDESC : SYMBOL_SMALL_TLSGD; 4954 4955 case TLS_MODEL_INITIAL_EXEC: 4956 return SYMBOL_SMALL_GOTTPREL; 4957 4958 case TLS_MODEL_LOCAL_EXEC: 4959 return SYMBOL_SMALL_TPREL; 4960 4961 case TLS_MODEL_EMULATED: 4962 case TLS_MODEL_NONE: 4963 return SYMBOL_FORCE_TO_MEM; 4964 4965 default: 4966 gcc_unreachable (); 4967 } 4968 } 4969 4970 /* Return the method that should be used to access SYMBOL_REF or 4971 LABEL_REF X in context CONTEXT. */ 4972 enum aarch64_symbol_type 4973 aarch64_classify_symbol (rtx x, 4974 enum aarch64_symbol_context context ATTRIBUTE_UNUSED) 4975 { 4976 if (GET_CODE (x) == LABEL_REF) 4977 { 4978 switch (aarch64_cmodel) 4979 { 4980 case AARCH64_CMODEL_LARGE: 4981 return SYMBOL_FORCE_TO_MEM; 4982 4983 case AARCH64_CMODEL_TINY_PIC: 4984 case AARCH64_CMODEL_TINY: 4985 case AARCH64_CMODEL_SMALL_PIC: 4986 case AARCH64_CMODEL_SMALL: 4987 return SYMBOL_SMALL_ABSOLUTE; 4988 4989 default: 4990 gcc_unreachable (); 4991 } 4992 } 4993 4994 gcc_assert (GET_CODE (x) == SYMBOL_REF); 4995 4996 switch (aarch64_cmodel) 4997 { 4998 case AARCH64_CMODEL_LARGE: 4999 return SYMBOL_FORCE_TO_MEM; 5000 5001 case AARCH64_CMODEL_TINY: 5002 case AARCH64_CMODEL_SMALL: 5003 5004 /* This is needed to get DFmode, TImode constants to be loaded off 5005 the constant pool. Is it necessary to dump TImode values into 5006 the constant pool. We don't handle TImode constant loads properly 5007 yet and hence need to use the constant pool. */ 5008 if (CONSTANT_POOL_ADDRESS_P (x)) 5009 return SYMBOL_FORCE_TO_MEM; 5010 5011 if (aarch64_tls_symbol_p (x)) 5012 return aarch64_classify_tls_symbol (x); 5013 5014 if (SYMBOL_REF_WEAK (x)) 5015 return SYMBOL_FORCE_TO_MEM; 5016 5017 return SYMBOL_SMALL_ABSOLUTE; 5018 5019 case AARCH64_CMODEL_TINY_PIC: 5020 case AARCH64_CMODEL_SMALL_PIC: 5021 5022 if (CONSTANT_POOL_ADDRESS_P (x)) 5023 return SYMBOL_FORCE_TO_MEM; 5024 5025 if (aarch64_tls_symbol_p (x)) 5026 return aarch64_classify_tls_symbol (x); 5027 5028 if (!aarch64_symbol_binds_local_p (x)) 5029 return SYMBOL_SMALL_GOT; 5030 5031 return SYMBOL_SMALL_ABSOLUTE; 5032 5033 default: 5034 gcc_unreachable (); 5035 } 5036 /* By default push everything into the constant pool. */ 5037 return SYMBOL_FORCE_TO_MEM; 5038 } 5039 5040 /* Return true if X is a symbolic constant that can be used in context 5041 CONTEXT. If it is, store the type of the symbol in *SYMBOL_TYPE. */ 5042 5043 bool 5044 aarch64_symbolic_constant_p (rtx x, enum aarch64_symbol_context context, 5045 enum aarch64_symbol_type *symbol_type) 5046 { 5047 rtx offset; 5048 split_const (x, &x, &offset); 5049 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF) 5050 *symbol_type = aarch64_classify_symbol (x, context); 5051 else 5052 return false; 5053 5054 /* No checking of offset at this point. */ 5055 return true; 5056 } 5057 5058 bool 5059 aarch64_constant_address_p (rtx x) 5060 { 5061 return (CONSTANT_P (x) && memory_address_p (DImode, x)); 5062 } 5063 5064 bool 5065 aarch64_legitimate_pic_operand_p (rtx x) 5066 { 5067 if (GET_CODE (x) == SYMBOL_REF 5068 || (GET_CODE (x) == CONST 5069 && GET_CODE (XEXP (x, 0)) == PLUS 5070 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)) 5071 return false; 5072 5073 return true; 5074 } 5075 5076 /* Return true if X holds either a quarter-precision or 5077 floating-point +0.0 constant. */ 5078 static bool 5079 aarch64_valid_floating_const (enum machine_mode mode, rtx x) 5080 { 5081 if (!CONST_DOUBLE_P (x)) 5082 return false; 5083 5084 /* TODO: We could handle moving 0.0 to a TFmode register, 5085 but first we would like to refactor the movtf_aarch64 5086 to be more amicable to split moves properly and 5087 correctly gate on TARGET_SIMD. For now - reject all 5088 constants which are not to SFmode or DFmode registers. */ 5089 if (!(mode == SFmode || mode == DFmode)) 5090 return false; 5091 5092 if (aarch64_float_const_zero_rtx_p (x)) 5093 return true; 5094 return aarch64_float_const_representable_p (x); 5095 } 5096 5097 static bool 5098 aarch64_legitimate_constant_p (enum machine_mode mode, rtx x) 5099 { 5100 /* Do not allow vector struct mode constants. We could support 5101 0 and -1 easily, but they need support in aarch64-simd.md. */ 5102 if (TARGET_SIMD && aarch64_vect_struct_mode_p (mode)) 5103 return false; 5104 5105 /* This could probably go away because 5106 we now decompose CONST_INTs according to expand_mov_immediate. */ 5107 if ((GET_CODE (x) == CONST_VECTOR 5108 && aarch64_simd_valid_immediate (x, mode, false, 5109 NULL, NULL, NULL, NULL, NULL) != -1) 5110 || CONST_INT_P (x) || aarch64_valid_floating_const (mode, x)) 5111 return !targetm.cannot_force_const_mem (mode, x); 5112 5113 if (GET_CODE (x) == HIGH 5114 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0)))) 5115 return true; 5116 5117 return aarch64_constant_address_p (x); 5118 } 5119 5120 rtx 5121 aarch64_load_tp (rtx target) 5122 { 5123 if (!target 5124 || GET_MODE (target) != Pmode 5125 || !register_operand (target, Pmode)) 5126 target = gen_reg_rtx (Pmode); 5127 5128 /* Can return in any reg. */ 5129 emit_insn (gen_aarch64_load_tp_hard (target)); 5130 return target; 5131 } 5132 5133 /* On AAPCS systems, this is the "struct __va_list". */ 5134 static GTY(()) tree va_list_type; 5135 5136 /* Implement TARGET_BUILD_BUILTIN_VA_LIST. 5137 Return the type to use as __builtin_va_list. 5138 5139 AAPCS64 \S 7.1.4 requires that va_list be a typedef for a type defined as: 5140 5141 struct __va_list 5142 { 5143 void *__stack; 5144 void *__gr_top; 5145 void *__vr_top; 5146 int __gr_offs; 5147 int __vr_offs; 5148 }; */ 5149 5150 static tree 5151 aarch64_build_builtin_va_list (void) 5152 { 5153 tree va_list_name; 5154 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff; 5155 5156 /* Create the type. */ 5157 va_list_type = lang_hooks.types.make_type (RECORD_TYPE); 5158 /* Give it the required name. */ 5159 va_list_name = build_decl (BUILTINS_LOCATION, 5160 TYPE_DECL, 5161 get_identifier ("__va_list"), 5162 va_list_type); 5163 DECL_ARTIFICIAL (va_list_name) = 1; 5164 TYPE_NAME (va_list_type) = va_list_name; 5165 TYPE_STUB_DECL (va_list_type) = va_list_name; 5166 5167 /* Create the fields. */ 5168 f_stack = build_decl (BUILTINS_LOCATION, 5169 FIELD_DECL, get_identifier ("__stack"), 5170 ptr_type_node); 5171 f_grtop = build_decl (BUILTINS_LOCATION, 5172 FIELD_DECL, get_identifier ("__gr_top"), 5173 ptr_type_node); 5174 f_vrtop = build_decl (BUILTINS_LOCATION, 5175 FIELD_DECL, get_identifier ("__vr_top"), 5176 ptr_type_node); 5177 f_groff = build_decl (BUILTINS_LOCATION, 5178 FIELD_DECL, get_identifier ("__gr_offs"), 5179 integer_type_node); 5180 f_vroff = build_decl (BUILTINS_LOCATION, 5181 FIELD_DECL, get_identifier ("__vr_offs"), 5182 integer_type_node); 5183 5184 DECL_ARTIFICIAL (f_stack) = 1; 5185 DECL_ARTIFICIAL (f_grtop) = 1; 5186 DECL_ARTIFICIAL (f_vrtop) = 1; 5187 DECL_ARTIFICIAL (f_groff) = 1; 5188 DECL_ARTIFICIAL (f_vroff) = 1; 5189 5190 DECL_FIELD_CONTEXT (f_stack) = va_list_type; 5191 DECL_FIELD_CONTEXT (f_grtop) = va_list_type; 5192 DECL_FIELD_CONTEXT (f_vrtop) = va_list_type; 5193 DECL_FIELD_CONTEXT (f_groff) = va_list_type; 5194 DECL_FIELD_CONTEXT (f_vroff) = va_list_type; 5195 5196 TYPE_FIELDS (va_list_type) = f_stack; 5197 DECL_CHAIN (f_stack) = f_grtop; 5198 DECL_CHAIN (f_grtop) = f_vrtop; 5199 DECL_CHAIN (f_vrtop) = f_groff; 5200 DECL_CHAIN (f_groff) = f_vroff; 5201 5202 /* Compute its layout. */ 5203 layout_type (va_list_type); 5204 5205 return va_list_type; 5206 } 5207 5208 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */ 5209 static void 5210 aarch64_expand_builtin_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED) 5211 { 5212 const CUMULATIVE_ARGS *cum; 5213 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff; 5214 tree stack, grtop, vrtop, groff, vroff; 5215 tree t; 5216 int gr_save_area_size; 5217 int vr_save_area_size; 5218 int vr_offset; 5219 5220 cum = &crtl->args.info; 5221 gr_save_area_size 5222 = (NUM_ARG_REGS - cum->aapcs_ncrn) * UNITS_PER_WORD; 5223 vr_save_area_size 5224 = (NUM_FP_ARG_REGS - cum->aapcs_nvrn) * UNITS_PER_VREG; 5225 5226 if (TARGET_GENERAL_REGS_ONLY) 5227 { 5228 if (cum->aapcs_nvrn > 0) 5229 sorry ("%qs and floating point or vector arguments", 5230 "-mgeneral-regs-only"); 5231 vr_save_area_size = 0; 5232 } 5233 5234 f_stack = TYPE_FIELDS (va_list_type_node); 5235 f_grtop = DECL_CHAIN (f_stack); 5236 f_vrtop = DECL_CHAIN (f_grtop); 5237 f_groff = DECL_CHAIN (f_vrtop); 5238 f_vroff = DECL_CHAIN (f_groff); 5239 5240 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), valist, f_stack, 5241 NULL_TREE); 5242 grtop = build3 (COMPONENT_REF, TREE_TYPE (f_grtop), valist, f_grtop, 5243 NULL_TREE); 5244 vrtop = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop), valist, f_vrtop, 5245 NULL_TREE); 5246 groff = build3 (COMPONENT_REF, TREE_TYPE (f_groff), valist, f_groff, 5247 NULL_TREE); 5248 vroff = build3 (COMPONENT_REF, TREE_TYPE (f_vroff), valist, f_vroff, 5249 NULL_TREE); 5250 5251 /* Emit code to initialize STACK, which points to the next varargs stack 5252 argument. CUM->AAPCS_STACK_SIZE gives the number of stack words used 5253 by named arguments. STACK is 8-byte aligned. */ 5254 t = make_tree (TREE_TYPE (stack), virtual_incoming_args_rtx); 5255 if (cum->aapcs_stack_size > 0) 5256 t = fold_build_pointer_plus_hwi (t, cum->aapcs_stack_size * UNITS_PER_WORD); 5257 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), stack, t); 5258 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 5259 5260 /* Emit code to initialize GRTOP, the top of the GR save area. 5261 virtual_incoming_args_rtx should have been 16 byte aligned. */ 5262 t = make_tree (TREE_TYPE (grtop), virtual_incoming_args_rtx); 5263 t = build2 (MODIFY_EXPR, TREE_TYPE (grtop), grtop, t); 5264 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 5265 5266 /* Emit code to initialize VRTOP, the top of the VR save area. 5267 This address is gr_save_area_bytes below GRTOP, rounded 5268 down to the next 16-byte boundary. */ 5269 t = make_tree (TREE_TYPE (vrtop), virtual_incoming_args_rtx); 5270 vr_offset = AARCH64_ROUND_UP (gr_save_area_size, 5271 STACK_BOUNDARY / BITS_PER_UNIT); 5272 5273 if (vr_offset) 5274 t = fold_build_pointer_plus_hwi (t, -vr_offset); 5275 t = build2 (MODIFY_EXPR, TREE_TYPE (vrtop), vrtop, t); 5276 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 5277 5278 /* Emit code to initialize GROFF, the offset from GRTOP of the 5279 next GPR argument. */ 5280 t = build2 (MODIFY_EXPR, TREE_TYPE (groff), groff, 5281 build_int_cst (TREE_TYPE (groff), -gr_save_area_size)); 5282 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 5283 5284 /* Likewise emit code to initialize VROFF, the offset from FTOP 5285 of the next VR argument. */ 5286 t = build2 (MODIFY_EXPR, TREE_TYPE (vroff), vroff, 5287 build_int_cst (TREE_TYPE (vroff), -vr_save_area_size)); 5288 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 5289 } 5290 5291 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */ 5292 5293 static tree 5294 aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p, 5295 gimple_seq *post_p ATTRIBUTE_UNUSED) 5296 { 5297 tree addr; 5298 bool indirect_p; 5299 bool is_ha; /* is HFA or HVA. */ 5300 bool dw_align; /* double-word align. */ 5301 enum machine_mode ag_mode = VOIDmode; 5302 int nregs; 5303 enum machine_mode mode; 5304 5305 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff; 5306 tree stack, f_top, f_off, off, arg, roundup, on_stack; 5307 HOST_WIDE_INT size, rsize, adjust, align; 5308 tree t, u, cond1, cond2; 5309 5310 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false); 5311 if (indirect_p) 5312 type = build_pointer_type (type); 5313 5314 mode = TYPE_MODE (type); 5315 5316 f_stack = TYPE_FIELDS (va_list_type_node); 5317 f_grtop = DECL_CHAIN (f_stack); 5318 f_vrtop = DECL_CHAIN (f_grtop); 5319 f_groff = DECL_CHAIN (f_vrtop); 5320 f_vroff = DECL_CHAIN (f_groff); 5321 5322 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), unshare_expr (valist), 5323 f_stack, NULL_TREE); 5324 size = int_size_in_bytes (type); 5325 align = aarch64_function_arg_alignment (mode, type) / BITS_PER_UNIT; 5326 5327 dw_align = false; 5328 adjust = 0; 5329 if (aarch64_vfp_is_call_or_return_candidate (mode, 5330 type, 5331 &ag_mode, 5332 &nregs, 5333 &is_ha)) 5334 { 5335 /* TYPE passed in fp/simd registers. */ 5336 if (TARGET_GENERAL_REGS_ONLY) 5337 sorry ("%qs and floating point or vector arguments", 5338 "-mgeneral-regs-only"); 5339 5340 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop), 5341 unshare_expr (valist), f_vrtop, NULL_TREE); 5342 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_vroff), 5343 unshare_expr (valist), f_vroff, NULL_TREE); 5344 5345 rsize = nregs * UNITS_PER_VREG; 5346 5347 if (is_ha) 5348 { 5349 if (BYTES_BIG_ENDIAN && GET_MODE_SIZE (ag_mode) < UNITS_PER_VREG) 5350 adjust = UNITS_PER_VREG - GET_MODE_SIZE (ag_mode); 5351 } 5352 else if (BLOCK_REG_PADDING (mode, type, 1) == downward 5353 && size < UNITS_PER_VREG) 5354 { 5355 adjust = UNITS_PER_VREG - size; 5356 } 5357 } 5358 else 5359 { 5360 /* TYPE passed in general registers. */ 5361 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_grtop), 5362 unshare_expr (valist), f_grtop, NULL_TREE); 5363 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_groff), 5364 unshare_expr (valist), f_groff, NULL_TREE); 5365 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD; 5366 nregs = rsize / UNITS_PER_WORD; 5367 5368 if (align > 8) 5369 dw_align = true; 5370 5371 if (BLOCK_REG_PADDING (mode, type, 1) == downward 5372 && size < UNITS_PER_WORD) 5373 { 5374 adjust = UNITS_PER_WORD - size; 5375 } 5376 } 5377 5378 /* Get a local temporary for the field value. */ 5379 off = get_initialized_tmp_var (f_off, pre_p, NULL); 5380 5381 /* Emit code to branch if off >= 0. */ 5382 t = build2 (GE_EXPR, boolean_type_node, off, 5383 build_int_cst (TREE_TYPE (off), 0)); 5384 cond1 = build3 (COND_EXPR, ptr_type_node, t, NULL_TREE, NULL_TREE); 5385 5386 if (dw_align) 5387 { 5388 /* Emit: offs = (offs + 15) & -16. */ 5389 t = build2 (PLUS_EXPR, TREE_TYPE (off), off, 5390 build_int_cst (TREE_TYPE (off), 15)); 5391 t = build2 (BIT_AND_EXPR, TREE_TYPE (off), t, 5392 build_int_cst (TREE_TYPE (off), -16)); 5393 roundup = build2 (MODIFY_EXPR, TREE_TYPE (off), off, t); 5394 } 5395 else 5396 roundup = NULL; 5397 5398 /* Update ap.__[g|v]r_offs */ 5399 t = build2 (PLUS_EXPR, TREE_TYPE (off), off, 5400 build_int_cst (TREE_TYPE (off), rsize)); 5401 t = build2 (MODIFY_EXPR, TREE_TYPE (f_off), unshare_expr (f_off), t); 5402 5403 /* String up. */ 5404 if (roundup) 5405 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t); 5406 5407 /* [cond2] if (ap.__[g|v]r_offs > 0) */ 5408 u = build2 (GT_EXPR, boolean_type_node, unshare_expr (f_off), 5409 build_int_cst (TREE_TYPE (f_off), 0)); 5410 cond2 = build3 (COND_EXPR, ptr_type_node, u, NULL_TREE, NULL_TREE); 5411 5412 /* String up: make sure the assignment happens before the use. */ 5413 t = build2 (COMPOUND_EXPR, TREE_TYPE (cond2), t, cond2); 5414 COND_EXPR_ELSE (cond1) = t; 5415 5416 /* Prepare the trees handling the argument that is passed on the stack; 5417 the top level node will store in ON_STACK. */ 5418 arg = get_initialized_tmp_var (stack, pre_p, NULL); 5419 if (align > 8) 5420 { 5421 /* if (alignof(type) > 8) (arg = arg + 15) & -16; */ 5422 t = fold_convert (intDI_type_node, arg); 5423 t = build2 (PLUS_EXPR, TREE_TYPE (t), t, 5424 build_int_cst (TREE_TYPE (t), 15)); 5425 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t, 5426 build_int_cst (TREE_TYPE (t), -16)); 5427 t = fold_convert (TREE_TYPE (arg), t); 5428 roundup = build2 (MODIFY_EXPR, TREE_TYPE (arg), arg, t); 5429 } 5430 else 5431 roundup = NULL; 5432 /* Advance ap.__stack */ 5433 t = fold_convert (intDI_type_node, arg); 5434 t = build2 (PLUS_EXPR, TREE_TYPE (t), t, 5435 build_int_cst (TREE_TYPE (t), size + 7)); 5436 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t, 5437 build_int_cst (TREE_TYPE (t), -8)); 5438 t = fold_convert (TREE_TYPE (arg), t); 5439 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), unshare_expr (stack), t); 5440 /* String up roundup and advance. */ 5441 if (roundup) 5442 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t); 5443 /* String up with arg */ 5444 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), t, arg); 5445 /* Big-endianness related address adjustment. */ 5446 if (BLOCK_REG_PADDING (mode, type, 1) == downward 5447 && size < UNITS_PER_WORD) 5448 { 5449 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (arg), arg, 5450 size_int (UNITS_PER_WORD - size)); 5451 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), on_stack, t); 5452 } 5453 5454 COND_EXPR_THEN (cond1) = unshare_expr (on_stack); 5455 COND_EXPR_THEN (cond2) = unshare_expr (on_stack); 5456 5457 /* Adjustment to OFFSET in the case of BIG_ENDIAN. */ 5458 t = off; 5459 if (adjust) 5460 t = build2 (PREINCREMENT_EXPR, TREE_TYPE (off), off, 5461 build_int_cst (TREE_TYPE (off), adjust)); 5462 5463 t = fold_convert (sizetype, t); 5464 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (f_top), f_top, t); 5465 5466 if (is_ha) 5467 { 5468 /* type ha; // treat as "struct {ftype field[n];}" 5469 ... [computing offs] 5470 for (i = 0; i <nregs; ++i, offs += 16) 5471 ha.field[i] = *((ftype *)(ap.__vr_top + offs)); 5472 return ha; */ 5473 int i; 5474 tree tmp_ha, field_t, field_ptr_t; 5475 5476 /* Declare a local variable. */ 5477 tmp_ha = create_tmp_var_raw (type, "ha"); 5478 gimple_add_tmp_var (tmp_ha); 5479 5480 /* Establish the base type. */ 5481 switch (ag_mode) 5482 { 5483 case SFmode: 5484 field_t = float_type_node; 5485 field_ptr_t = float_ptr_type_node; 5486 break; 5487 case DFmode: 5488 field_t = double_type_node; 5489 field_ptr_t = double_ptr_type_node; 5490 break; 5491 case TFmode: 5492 field_t = long_double_type_node; 5493 field_ptr_t = long_double_ptr_type_node; 5494 break; 5495 /* The half precision and quad precision are not fully supported yet. Enable 5496 the following code after the support is complete. Need to find the correct 5497 type node for __fp16 *. */ 5498 #if 0 5499 case HFmode: 5500 field_t = float_type_node; 5501 field_ptr_t = float_ptr_type_node; 5502 break; 5503 #endif 5504 case V2SImode: 5505 case V4SImode: 5506 { 5507 tree innertype = make_signed_type (GET_MODE_PRECISION (SImode)); 5508 field_t = build_vector_type_for_mode (innertype, ag_mode); 5509 field_ptr_t = build_pointer_type (field_t); 5510 } 5511 break; 5512 default: 5513 gcc_assert (0); 5514 } 5515 5516 /* *(field_ptr_t)&ha = *((field_ptr_t)vr_saved_area */ 5517 tmp_ha = build1 (ADDR_EXPR, field_ptr_t, tmp_ha); 5518 addr = t; 5519 t = fold_convert (field_ptr_t, addr); 5520 t = build2 (MODIFY_EXPR, field_t, 5521 build1 (INDIRECT_REF, field_t, tmp_ha), 5522 build1 (INDIRECT_REF, field_t, t)); 5523 5524 /* ha.field[i] = *((field_ptr_t)vr_saved_area + i) */ 5525 for (i = 1; i < nregs; ++i) 5526 { 5527 addr = fold_build_pointer_plus_hwi (addr, UNITS_PER_VREG); 5528 u = fold_convert (field_ptr_t, addr); 5529 u = build2 (MODIFY_EXPR, field_t, 5530 build2 (MEM_REF, field_t, tmp_ha, 5531 build_int_cst (field_ptr_t, 5532 (i * 5533 int_size_in_bytes (field_t)))), 5534 build1 (INDIRECT_REF, field_t, u)); 5535 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), t, u); 5536 } 5537 5538 u = fold_convert (TREE_TYPE (f_top), tmp_ha); 5539 t = build2 (COMPOUND_EXPR, TREE_TYPE (f_top), t, u); 5540 } 5541 5542 COND_EXPR_ELSE (cond2) = t; 5543 addr = fold_convert (build_pointer_type (type), cond1); 5544 addr = build_va_arg_indirect_ref (addr); 5545 5546 if (indirect_p) 5547 addr = build_va_arg_indirect_ref (addr); 5548 5549 return addr; 5550 } 5551 5552 /* Implement TARGET_SETUP_INCOMING_VARARGS. */ 5553 5554 static void 5555 aarch64_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode, 5556 tree type, int *pretend_size ATTRIBUTE_UNUSED, 5557 int no_rtl) 5558 { 5559 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); 5560 CUMULATIVE_ARGS local_cum; 5561 int gr_saved, vr_saved; 5562 5563 /* The caller has advanced CUM up to, but not beyond, the last named 5564 argument. Advance a local copy of CUM past the last "real" named 5565 argument, to find out how many registers are left over. */ 5566 local_cum = *cum; 5567 aarch64_function_arg_advance (pack_cumulative_args(&local_cum), mode, type, true); 5568 5569 /* Found out how many registers we need to save. */ 5570 gr_saved = NUM_ARG_REGS - local_cum.aapcs_ncrn; 5571 vr_saved = NUM_FP_ARG_REGS - local_cum.aapcs_nvrn; 5572 5573 if (TARGET_GENERAL_REGS_ONLY) 5574 { 5575 if (local_cum.aapcs_nvrn > 0) 5576 sorry ("%qs and floating point or vector arguments", 5577 "-mgeneral-regs-only"); 5578 vr_saved = 0; 5579 } 5580 5581 if (!no_rtl) 5582 { 5583 if (gr_saved > 0) 5584 { 5585 rtx ptr, mem; 5586 5587 /* virtual_incoming_args_rtx should have been 16-byte aligned. */ 5588 ptr = plus_constant (Pmode, virtual_incoming_args_rtx, 5589 - gr_saved * UNITS_PER_WORD); 5590 mem = gen_frame_mem (BLKmode, ptr); 5591 set_mem_alias_set (mem, get_varargs_alias_set ()); 5592 5593 move_block_from_reg (local_cum.aapcs_ncrn + R0_REGNUM, 5594 mem, gr_saved); 5595 } 5596 if (vr_saved > 0) 5597 { 5598 /* We can't use move_block_from_reg, because it will use 5599 the wrong mode, storing D regs only. */ 5600 enum machine_mode mode = TImode; 5601 int off, i; 5602 5603 /* Set OFF to the offset from virtual_incoming_args_rtx of 5604 the first vector register. The VR save area lies below 5605 the GR one, and is aligned to 16 bytes. */ 5606 off = -AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD, 5607 STACK_BOUNDARY / BITS_PER_UNIT); 5608 off -= vr_saved * UNITS_PER_VREG; 5609 5610 for (i = local_cum.aapcs_nvrn; i < NUM_FP_ARG_REGS; ++i) 5611 { 5612 rtx ptr, mem; 5613 5614 ptr = plus_constant (Pmode, virtual_incoming_args_rtx, off); 5615 mem = gen_frame_mem (mode, ptr); 5616 set_mem_alias_set (mem, get_varargs_alias_set ()); 5617 aarch64_emit_move (mem, gen_rtx_REG (mode, V0_REGNUM + i)); 5618 off += UNITS_PER_VREG; 5619 } 5620 } 5621 } 5622 5623 /* We don't save the size into *PRETEND_SIZE because we want to avoid 5624 any complication of having crtl->args.pretend_args_size changed. */ 5625 cfun->machine->saved_varargs_size 5626 = (AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD, 5627 STACK_BOUNDARY / BITS_PER_UNIT) 5628 + vr_saved * UNITS_PER_VREG); 5629 } 5630 5631 static void 5632 aarch64_conditional_register_usage (void) 5633 { 5634 int i; 5635 if (!TARGET_FLOAT) 5636 { 5637 for (i = V0_REGNUM; i <= V31_REGNUM; i++) 5638 { 5639 fixed_regs[i] = 1; 5640 call_used_regs[i] = 1; 5641 } 5642 } 5643 } 5644 5645 /* Walk down the type tree of TYPE counting consecutive base elements. 5646 If *MODEP is VOIDmode, then set it to the first valid floating point 5647 type. If a non-floating point type is found, or if a floating point 5648 type that doesn't match a non-VOIDmode *MODEP is found, then return -1, 5649 otherwise return the count in the sub-tree. */ 5650 static int 5651 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep) 5652 { 5653 enum machine_mode mode; 5654 HOST_WIDE_INT size; 5655 5656 switch (TREE_CODE (type)) 5657 { 5658 case REAL_TYPE: 5659 mode = TYPE_MODE (type); 5660 if (mode != DFmode && mode != SFmode && mode != TFmode) 5661 return -1; 5662 5663 if (*modep == VOIDmode) 5664 *modep = mode; 5665 5666 if (*modep == mode) 5667 return 1; 5668 5669 break; 5670 5671 case COMPLEX_TYPE: 5672 mode = TYPE_MODE (TREE_TYPE (type)); 5673 if (mode != DFmode && mode != SFmode && mode != TFmode) 5674 return -1; 5675 5676 if (*modep == VOIDmode) 5677 *modep = mode; 5678 5679 if (*modep == mode) 5680 return 2; 5681 5682 break; 5683 5684 case VECTOR_TYPE: 5685 /* Use V2SImode and V4SImode as representatives of all 64-bit 5686 and 128-bit vector types. */ 5687 size = int_size_in_bytes (type); 5688 switch (size) 5689 { 5690 case 8: 5691 mode = V2SImode; 5692 break; 5693 case 16: 5694 mode = V4SImode; 5695 break; 5696 default: 5697 return -1; 5698 } 5699 5700 if (*modep == VOIDmode) 5701 *modep = mode; 5702 5703 /* Vector modes are considered to be opaque: two vectors are 5704 equivalent for the purposes of being homogeneous aggregates 5705 if they are the same size. */ 5706 if (*modep == mode) 5707 return 1; 5708 5709 break; 5710 5711 case ARRAY_TYPE: 5712 { 5713 int count; 5714 tree index = TYPE_DOMAIN (type); 5715 5716 /* Can't handle incomplete types. */ 5717 if (!COMPLETE_TYPE_P (type)) 5718 return -1; 5719 5720 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep); 5721 if (count == -1 5722 || !index 5723 || !TYPE_MAX_VALUE (index) 5724 || !host_integerp (TYPE_MAX_VALUE (index), 1) 5725 || !TYPE_MIN_VALUE (index) 5726 || !host_integerp (TYPE_MIN_VALUE (index), 1) 5727 || count < 0) 5728 return -1; 5729 5730 count *= (1 + tree_low_cst (TYPE_MAX_VALUE (index), 1) 5731 - tree_low_cst (TYPE_MIN_VALUE (index), 1)); 5732 5733 /* There must be no padding. */ 5734 if (!host_integerp (TYPE_SIZE (type), 1) 5735 || (tree_low_cst (TYPE_SIZE (type), 1) 5736 != count * GET_MODE_BITSIZE (*modep))) 5737 return -1; 5738 5739 return count; 5740 } 5741 5742 case RECORD_TYPE: 5743 { 5744 int count = 0; 5745 int sub_count; 5746 tree field; 5747 5748 /* Can't handle incomplete types. */ 5749 if (!COMPLETE_TYPE_P (type)) 5750 return -1; 5751 5752 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field)) 5753 { 5754 if (TREE_CODE (field) != FIELD_DECL) 5755 continue; 5756 5757 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep); 5758 if (sub_count < 0) 5759 return -1; 5760 count += sub_count; 5761 } 5762 5763 /* There must be no padding. */ 5764 if (!host_integerp (TYPE_SIZE (type), 1) 5765 || (tree_low_cst (TYPE_SIZE (type), 1) 5766 != count * GET_MODE_BITSIZE (*modep))) 5767 return -1; 5768 5769 return count; 5770 } 5771 5772 case UNION_TYPE: 5773 case QUAL_UNION_TYPE: 5774 { 5775 /* These aren't very interesting except in a degenerate case. */ 5776 int count = 0; 5777 int sub_count; 5778 tree field; 5779 5780 /* Can't handle incomplete types. */ 5781 if (!COMPLETE_TYPE_P (type)) 5782 return -1; 5783 5784 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field)) 5785 { 5786 if (TREE_CODE (field) != FIELD_DECL) 5787 continue; 5788 5789 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep); 5790 if (sub_count < 0) 5791 return -1; 5792 count = count > sub_count ? count : sub_count; 5793 } 5794 5795 /* There must be no padding. */ 5796 if (!host_integerp (TYPE_SIZE (type), 1) 5797 || (tree_low_cst (TYPE_SIZE (type), 1) 5798 != count * GET_MODE_BITSIZE (*modep))) 5799 return -1; 5800 5801 return count; 5802 } 5803 5804 default: 5805 break; 5806 } 5807 5808 return -1; 5809 } 5810 5811 /* Return TRUE if the type, as described by TYPE and MODE, is a composite 5812 type as described in AAPCS64 \S 4.3. This includes aggregate, union and 5813 array types. The C99 floating-point complex types are also considered 5814 as composite types, according to AAPCS64 \S 7.1.1. The complex integer 5815 types, which are GCC extensions and out of the scope of AAPCS64, are 5816 treated as composite types here as well. 5817 5818 Note that MODE itself is not sufficient in determining whether a type 5819 is such a composite type or not. This is because 5820 stor-layout.c:compute_record_mode may have already changed the MODE 5821 (BLKmode) of a RECORD_TYPE TYPE to some other mode. For example, a 5822 structure with only one field may have its MODE set to the mode of the 5823 field. Also an integer mode whose size matches the size of the 5824 RECORD_TYPE type may be used to substitute the original mode 5825 (i.e. BLKmode) in certain circumstances. In other words, MODE cannot be 5826 solely relied on. */ 5827 5828 static bool 5829 aarch64_composite_type_p (const_tree type, 5830 enum machine_mode mode) 5831 { 5832 if (type && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE)) 5833 return true; 5834 5835 if (mode == BLKmode 5836 || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT 5837 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT) 5838 return true; 5839 5840 return false; 5841 } 5842 5843 /* Return TRUE if the type, as described by TYPE and MODE, is a short vector 5844 type as described in AAPCS64 \S 4.1.2. 5845 5846 See the comment above aarch64_composite_type_p for the notes on MODE. */ 5847 5848 static bool 5849 aarch64_short_vector_p (const_tree type, 5850 enum machine_mode mode) 5851 { 5852 HOST_WIDE_INT size = -1; 5853 5854 if (type && TREE_CODE (type) == VECTOR_TYPE) 5855 size = int_size_in_bytes (type); 5856 else if (!aarch64_composite_type_p (type, mode) 5857 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT 5858 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)) 5859 size = GET_MODE_SIZE (mode); 5860 5861 return (size == 8 || size == 16) ? true : false; 5862 } 5863 5864 /* Return TRUE if an argument, whose type is described by TYPE and MODE, 5865 shall be passed or returned in simd/fp register(s) (providing these 5866 parameter passing registers are available). 5867 5868 Upon successful return, *COUNT returns the number of needed registers, 5869 *BASE_MODE returns the mode of the individual register and when IS_HAF 5870 is not NULL, *IS_HA indicates whether or not the argument is a homogeneous 5871 floating-point aggregate or a homogeneous short-vector aggregate. */ 5872 5873 static bool 5874 aarch64_vfp_is_call_or_return_candidate (enum machine_mode mode, 5875 const_tree type, 5876 enum machine_mode *base_mode, 5877 int *count, 5878 bool *is_ha) 5879 { 5880 enum machine_mode new_mode = VOIDmode; 5881 bool composite_p = aarch64_composite_type_p (type, mode); 5882 5883 if (is_ha != NULL) *is_ha = false; 5884 5885 if ((!composite_p && GET_MODE_CLASS (mode) == MODE_FLOAT) 5886 || aarch64_short_vector_p (type, mode)) 5887 { 5888 *count = 1; 5889 new_mode = mode; 5890 } 5891 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT) 5892 { 5893 if (is_ha != NULL) *is_ha = true; 5894 *count = 2; 5895 new_mode = GET_MODE_INNER (mode); 5896 } 5897 else if (type && composite_p) 5898 { 5899 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode); 5900 5901 if (ag_count > 0 && ag_count <= HA_MAX_NUM_FLDS) 5902 { 5903 if (is_ha != NULL) *is_ha = true; 5904 *count = ag_count; 5905 } 5906 else 5907 return false; 5908 } 5909 else 5910 return false; 5911 5912 *base_mode = new_mode; 5913 return true; 5914 } 5915 5916 /* Implement TARGET_STRUCT_VALUE_RTX. */ 5917 5918 static rtx 5919 aarch64_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED, 5920 int incoming ATTRIBUTE_UNUSED) 5921 { 5922 return gen_rtx_REG (Pmode, AARCH64_STRUCT_VALUE_REGNUM); 5923 } 5924 5925 /* Implements target hook vector_mode_supported_p. */ 5926 static bool 5927 aarch64_vector_mode_supported_p (enum machine_mode mode) 5928 { 5929 if (TARGET_SIMD 5930 && (mode == V4SImode || mode == V8HImode 5931 || mode == V16QImode || mode == V2DImode 5932 || mode == V2SImode || mode == V4HImode 5933 || mode == V8QImode || mode == V2SFmode 5934 || mode == V4SFmode || mode == V2DFmode)) 5935 return true; 5936 5937 return false; 5938 } 5939 5940 /* Return quad mode as the preferred SIMD mode. */ 5941 static enum machine_mode 5942 aarch64_preferred_simd_mode (enum machine_mode mode) 5943 { 5944 if (TARGET_SIMD) 5945 switch (mode) 5946 { 5947 case DFmode: 5948 return V2DFmode; 5949 case SFmode: 5950 return V4SFmode; 5951 case SImode: 5952 return V4SImode; 5953 case HImode: 5954 return V8HImode; 5955 case QImode: 5956 return V16QImode; 5957 case DImode: 5958 return V2DImode; 5959 break; 5960 5961 default:; 5962 } 5963 return word_mode; 5964 } 5965 5966 /* Return the bitmask of possible vector sizes for the vectorizer 5967 to iterate over. */ 5968 static unsigned int 5969 aarch64_autovectorize_vector_sizes (void) 5970 { 5971 return (16 | 8); 5972 } 5973 5974 /* A table to help perform AArch64-specific name mangling for AdvSIMD 5975 vector types in order to conform to the AAPCS64 (see "Procedure 5976 Call Standard for the ARM 64-bit Architecture", Appendix A). To 5977 qualify for emission with the mangled names defined in that document, 5978 a vector type must not only be of the correct mode but also be 5979 composed of AdvSIMD vector element types (e.g. 5980 _builtin_aarch64_simd_qi); these types are registered by 5981 aarch64_init_simd_builtins (). In other words, vector types defined 5982 in other ways e.g. via vector_size attribute will get default 5983 mangled names. */ 5984 typedef struct 5985 { 5986 enum machine_mode mode; 5987 const char *element_type_name; 5988 const char *mangled_name; 5989 } aarch64_simd_mangle_map_entry; 5990 5991 static aarch64_simd_mangle_map_entry aarch64_simd_mangle_map[] = { 5992 /* 64-bit containerized types. */ 5993 { V8QImode, "__builtin_aarch64_simd_qi", "10__Int8x8_t" }, 5994 { V8QImode, "__builtin_aarch64_simd_uqi", "11__Uint8x8_t" }, 5995 { V4HImode, "__builtin_aarch64_simd_hi", "11__Int16x4_t" }, 5996 { V4HImode, "__builtin_aarch64_simd_uhi", "12__Uint16x4_t" }, 5997 { V2SImode, "__builtin_aarch64_simd_si", "11__Int32x2_t" }, 5998 { V2SImode, "__builtin_aarch64_simd_usi", "12__Uint32x2_t" }, 5999 { V2SFmode, "__builtin_aarch64_simd_sf", "13__Float32x2_t" }, 6000 { V8QImode, "__builtin_aarch64_simd_poly8", "11__Poly8x8_t" }, 6001 { V4HImode, "__builtin_aarch64_simd_poly16", "12__Poly16x4_t" }, 6002 /* 128-bit containerized types. */ 6003 { V16QImode, "__builtin_aarch64_simd_qi", "11__Int8x16_t" }, 6004 { V16QImode, "__builtin_aarch64_simd_uqi", "12__Uint8x16_t" }, 6005 { V8HImode, "__builtin_aarch64_simd_hi", "11__Int16x8_t" }, 6006 { V8HImode, "__builtin_aarch64_simd_uhi", "12__Uint16x8_t" }, 6007 { V4SImode, "__builtin_aarch64_simd_si", "11__Int32x4_t" }, 6008 { V4SImode, "__builtin_aarch64_simd_usi", "12__Uint32x4_t" }, 6009 { V2DImode, "__builtin_aarch64_simd_di", "11__Int64x2_t" }, 6010 { V2DImode, "__builtin_aarch64_simd_udi", "12__Uint64x2_t" }, 6011 { V4SFmode, "__builtin_aarch64_simd_sf", "13__Float32x4_t" }, 6012 { V2DFmode, "__builtin_aarch64_simd_df", "13__Float64x2_t" }, 6013 { V16QImode, "__builtin_aarch64_simd_poly8", "12__Poly8x16_t" }, 6014 { V8HImode, "__builtin_aarch64_simd_poly16", "12__Poly16x8_t" }, 6015 { VOIDmode, NULL, NULL } 6016 }; 6017 6018 /* Implement TARGET_MANGLE_TYPE. */ 6019 6020 static const char * 6021 aarch64_mangle_type (const_tree type) 6022 { 6023 /* The AArch64 ABI documents say that "__va_list" has to be 6024 managled as if it is in the "std" namespace. */ 6025 if (lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type)) 6026 return "St9__va_list"; 6027 6028 /* Check the mode of the vector type, and the name of the vector 6029 element type, against the table. */ 6030 if (TREE_CODE (type) == VECTOR_TYPE) 6031 { 6032 aarch64_simd_mangle_map_entry *pos = aarch64_simd_mangle_map; 6033 6034 while (pos->mode != VOIDmode) 6035 { 6036 tree elt_type = TREE_TYPE (type); 6037 6038 if (pos->mode == TYPE_MODE (type) 6039 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL 6040 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))), 6041 pos->element_type_name)) 6042 return pos->mangled_name; 6043 6044 pos++; 6045 } 6046 } 6047 6048 /* Use the default mangling. */ 6049 return NULL; 6050 } 6051 6052 6053 /* Return true iff X is a MEM rtx. */ 6054 6055 static int 6056 is_mem_p (rtx *x, void *data ATTRIBUTE_UNUSED) 6057 { 6058 return MEM_P (*x); 6059 } 6060 6061 6062 /* Return true if mem_insn contains a MEM RTX somewhere in it. */ 6063 6064 static bool 6065 has_memory_op (rtx mem_insn) 6066 { 6067 rtx pattern = PATTERN (mem_insn); 6068 return for_each_rtx (&pattern, is_mem_p, NULL); 6069 } 6070 6071 6072 /* Find the first rtx before insn that will generate an assembly 6073 instruction. */ 6074 6075 static rtx 6076 aarch64_prev_real_insn (rtx insn) 6077 { 6078 if (!insn) 6079 return NULL; 6080 6081 do 6082 { 6083 insn = prev_real_insn (insn); 6084 } 6085 while (insn && recog_memoized (insn) < 0); 6086 6087 return insn; 6088 } 6089 6090 /* Return true iff t1 is the v8type of a multiply-accumulate instruction. */ 6091 6092 static bool 6093 is_madd_op (enum attr_v8type t1) 6094 { 6095 return t1 == V8TYPE_MADD 6096 || t1 == V8TYPE_MADDL; 6097 } 6098 6099 6100 /* Check if there is a register dependency between a load and the insn 6101 for which we hold recog_data. */ 6102 6103 static bool 6104 dep_between_memop_and_curr (rtx memop) 6105 { 6106 rtx load_reg; 6107 int opno; 6108 6109 gcc_assert (GET_CODE (memop) == SET); 6110 6111 if (!REG_P (SET_DEST (memop))) 6112 return false; 6113 6114 load_reg = SET_DEST (memop); 6115 for (opno = 1; opno < recog_data.n_operands; opno++) 6116 { 6117 rtx operand = recog_data.operand[opno]; 6118 if (REG_P (operand) 6119 && reg_overlap_mentioned_p (load_reg, operand)) 6120 return true; 6121 6122 } 6123 return false; 6124 } 6125 6126 6127 6128 /* When working around the Cortex-A53 erratum 835769, 6129 given rtx_insn INSN, return true if it is a 64-bit multiply-accumulate 6130 instruction and has a preceding memory instruction such that a NOP 6131 should be inserted between them. */ 6132 6133 bool 6134 aarch64_madd_needs_nop (rtx insn) 6135 { 6136 enum attr_v8type attr_type; 6137 rtx prev; 6138 rtx body; 6139 6140 if (!aarch64_fix_a53_err835769) 6141 return false; 6142 6143 if (recog_memoized (insn) < 0) 6144 return false; 6145 6146 attr_type = get_attr_v8type (insn); 6147 if (!is_madd_op (attr_type)) 6148 return false; 6149 6150 prev = aarch64_prev_real_insn (insn); 6151 /* aarch64_prev_real_insn can call recog_memoized on insns other than INSN. 6152 Restore recog state to INSN to avoid state corruption. */ 6153 extract_constrain_insn_cached (insn); 6154 6155 if (!prev || !has_memory_op (prev)) 6156 return false; 6157 6158 body = single_set (prev); 6159 6160 /* If the previous insn is a memory op and there is no dependency between 6161 it and the madd, emit a nop between them. If we know it's a memop but 6162 body is NULL, return true to be safe. */ 6163 if (GET_MODE (recog_data.operand[0]) == DImode 6164 && (!body || !dep_between_memop_and_curr (body))) 6165 return true; 6166 6167 return false; 6168 6169 } 6170 6171 /* Implement FINAL_PRESCAN_INSN. */ 6172 6173 void 6174 aarch64_final_prescan_insn (rtx insn) 6175 { 6176 if (aarch64_madd_needs_nop (insn)) 6177 fprintf (asm_out_file, "\tnop // between mem op and mult-accumulate\n"); 6178 } 6179 6180 6181 /* Return the equivalent letter for size. */ 6182 static unsigned char 6183 sizetochar (int size) 6184 { 6185 switch (size) 6186 { 6187 case 64: return 'd'; 6188 case 32: return 's'; 6189 case 16: return 'h'; 6190 case 8 : return 'b'; 6191 default: gcc_unreachable (); 6192 } 6193 } 6194 6195 /* Return true iff x is a uniform vector of floating-point 6196 constants, and the constant can be represented in 6197 quarter-precision form. Note, as aarch64_float_const_representable 6198 rejects both +0.0 and -0.0, we will also reject +0.0 and -0.0. */ 6199 static bool 6200 aarch64_vect_float_const_representable_p (rtx x) 6201 { 6202 int i = 0; 6203 REAL_VALUE_TYPE r0, ri; 6204 rtx x0, xi; 6205 6206 if (GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_FLOAT) 6207 return false; 6208 6209 x0 = CONST_VECTOR_ELT (x, 0); 6210 if (!CONST_DOUBLE_P (x0)) 6211 return false; 6212 6213 REAL_VALUE_FROM_CONST_DOUBLE (r0, x0); 6214 6215 for (i = 1; i < CONST_VECTOR_NUNITS (x); i++) 6216 { 6217 xi = CONST_VECTOR_ELT (x, i); 6218 if (!CONST_DOUBLE_P (xi)) 6219 return false; 6220 6221 REAL_VALUE_FROM_CONST_DOUBLE (ri, xi); 6222 if (!REAL_VALUES_EQUAL (r0, ri)) 6223 return false; 6224 } 6225 6226 return aarch64_float_const_representable_p (x0); 6227 } 6228 6229 /* TODO: This function returns values similar to those 6230 returned by neon_valid_immediate in gcc/config/arm/arm.c 6231 but the API here is different enough that these magic numbers 6232 are not used. It should be sufficient to return true or false. */ 6233 static int 6234 aarch64_simd_valid_immediate (rtx op, enum machine_mode mode, int inverse, 6235 rtx *modconst, int *elementwidth, 6236 unsigned char *elementchar, 6237 int *mvn, int *shift) 6238 { 6239 #define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG) \ 6240 matches = 1; \ 6241 for (i = 0; i < idx; i += (STRIDE)) \ 6242 if (!(TEST)) \ 6243 matches = 0; \ 6244 if (matches) \ 6245 { \ 6246 immtype = (CLASS); \ 6247 elsize = (ELSIZE); \ 6248 elchar = sizetochar (elsize); \ 6249 eshift = (SHIFT); \ 6250 emvn = (NEG); \ 6251 break; \ 6252 } 6253 6254 unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op); 6255 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode)); 6256 unsigned char bytes[16]; 6257 unsigned char elchar = 0; 6258 int immtype = -1, matches; 6259 unsigned int invmask = inverse ? 0xff : 0; 6260 int eshift, emvn; 6261 6262 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT) 6263 { 6264 bool simd_imm_zero = aarch64_simd_imm_zero_p (op, mode); 6265 int elem_width = GET_MODE_BITSIZE (GET_MODE (CONST_VECTOR_ELT (op, 0))); 6266 6267 if (!(simd_imm_zero 6268 || aarch64_vect_float_const_representable_p (op))) 6269 return -1; 6270 6271 if (modconst) 6272 *modconst = CONST_VECTOR_ELT (op, 0); 6273 6274 if (elementwidth) 6275 *elementwidth = elem_width; 6276 6277 if (elementchar) 6278 *elementchar = sizetochar (elem_width); 6279 6280 if (shift) 6281 *shift = 0; 6282 6283 if (simd_imm_zero) 6284 return 19; 6285 else 6286 return 18; 6287 } 6288 6289 /* Splat vector constant out into a byte vector. */ 6290 for (i = 0; i < n_elts; i++) 6291 { 6292 rtx el = CONST_VECTOR_ELT (op, i); 6293 unsigned HOST_WIDE_INT elpart; 6294 unsigned int part, parts; 6295 6296 if (GET_CODE (el) == CONST_INT) 6297 { 6298 elpart = INTVAL (el); 6299 parts = 1; 6300 } 6301 else if (GET_CODE (el) == CONST_DOUBLE) 6302 { 6303 elpart = CONST_DOUBLE_LOW (el); 6304 parts = 2; 6305 } 6306 else 6307 gcc_unreachable (); 6308 6309 for (part = 0; part < parts; part++) 6310 { 6311 unsigned int byte; 6312 for (byte = 0; byte < innersize; byte++) 6313 { 6314 bytes[idx++] = (elpart & 0xff) ^ invmask; 6315 elpart >>= BITS_PER_UNIT; 6316 } 6317 if (GET_CODE (el) == CONST_DOUBLE) 6318 elpart = CONST_DOUBLE_HIGH (el); 6319 } 6320 } 6321 6322 /* Sanity check. */ 6323 gcc_assert (idx == GET_MODE_SIZE (mode)); 6324 6325 do 6326 { 6327 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0 6328 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0); 6329 6330 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1] 6331 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0); 6332 6333 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0 6334 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0); 6335 6336 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0 6337 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3], 24, 0); 6338 6339 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0, 0, 0); 6340 6341 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1], 8, 0); 6342 6343 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff 6344 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1); 6345 6346 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1] 6347 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1); 6348 6349 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff 6350 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1); 6351 6352 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff 6353 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3], 24, 1); 6354 6355 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff, 0, 1); 6356 6357 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1], 8, 1); 6358 6359 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1] 6360 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0); 6361 6362 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1] 6363 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1); 6364 6365 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff 6366 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 0, 0); 6367 6368 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0 6369 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 0, 1); 6370 6371 CHECK (1, 8, 16, bytes[i] == bytes[0], 0, 0); 6372 6373 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff) 6374 && bytes[i] == bytes[(i + 8) % idx], 0, 0); 6375 } 6376 while (0); 6377 6378 /* TODO: Currently the assembler cannot handle types 12 to 15. 6379 And there is no way to specify cmode through the compiler. 6380 Disable them till there is support in the assembler. */ 6381 if (immtype == -1 6382 || (immtype >= 12 && immtype <= 15) 6383 || immtype == 18) 6384 return -1; 6385 6386 6387 if (elementwidth) 6388 *elementwidth = elsize; 6389 6390 if (elementchar) 6391 *elementchar = elchar; 6392 6393 if (mvn) 6394 *mvn = emvn; 6395 6396 if (shift) 6397 *shift = eshift; 6398 6399 if (modconst) 6400 { 6401 unsigned HOST_WIDE_INT imm = 0; 6402 6403 /* Un-invert bytes of recognized vector, if necessary. */ 6404 if (invmask != 0) 6405 for (i = 0; i < idx; i++) 6406 bytes[i] ^= invmask; 6407 6408 if (immtype == 17) 6409 { 6410 /* FIXME: Broken on 32-bit H_W_I hosts. */ 6411 gcc_assert (sizeof (HOST_WIDE_INT) == 8); 6412 6413 for (i = 0; i < 8; i++) 6414 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0) 6415 << (i * BITS_PER_UNIT); 6416 6417 *modconst = GEN_INT (imm); 6418 } 6419 else 6420 { 6421 unsigned HOST_WIDE_INT imm = 0; 6422 6423 for (i = 0; i < elsize / BITS_PER_UNIT; i++) 6424 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT); 6425 6426 /* Construct 'abcdefgh' because the assembler cannot handle 6427 generic constants. */ 6428 gcc_assert (shift != NULL && mvn != NULL); 6429 if (*mvn) 6430 imm = ~imm; 6431 imm = (imm >> *shift) & 0xff; 6432 *modconst = GEN_INT (imm); 6433 } 6434 } 6435 6436 return immtype; 6437 #undef CHECK 6438 } 6439 6440 /* Return TRUE if rtx X is legal for use as either a AdvSIMD MOVI instruction 6441 (or, implicitly, MVNI) immediate. Write back width per element 6442 to *ELEMENTWIDTH, and a modified constant (whatever should be output 6443 for a MOVI instruction) in *MODCONST. */ 6444 int 6445 aarch64_simd_immediate_valid_for_move (rtx op, enum machine_mode mode, 6446 rtx *modconst, int *elementwidth, 6447 unsigned char *elementchar, 6448 int *mvn, int *shift) 6449 { 6450 rtx tmpconst; 6451 int tmpwidth; 6452 unsigned char tmpwidthc; 6453 int tmpmvn = 0, tmpshift = 0; 6454 int retval = aarch64_simd_valid_immediate (op, mode, 0, &tmpconst, 6455 &tmpwidth, &tmpwidthc, 6456 &tmpmvn, &tmpshift); 6457 6458 if (retval == -1) 6459 return 0; 6460 6461 if (modconst) 6462 *modconst = tmpconst; 6463 6464 if (elementwidth) 6465 *elementwidth = tmpwidth; 6466 6467 if (elementchar) 6468 *elementchar = tmpwidthc; 6469 6470 if (mvn) 6471 *mvn = tmpmvn; 6472 6473 if (shift) 6474 *shift = tmpshift; 6475 6476 return 1; 6477 } 6478 6479 static bool 6480 aarch64_const_vec_all_same_int_p (rtx x, 6481 HOST_WIDE_INT minval, 6482 HOST_WIDE_INT maxval) 6483 { 6484 HOST_WIDE_INT firstval; 6485 int count, i; 6486 6487 if (GET_CODE (x) != CONST_VECTOR 6488 || GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_INT) 6489 return false; 6490 6491 firstval = INTVAL (CONST_VECTOR_ELT (x, 0)); 6492 if (firstval < minval || firstval > maxval) 6493 return false; 6494 6495 count = CONST_VECTOR_NUNITS (x); 6496 for (i = 1; i < count; i++) 6497 if (INTVAL (CONST_VECTOR_ELT (x, i)) != firstval) 6498 return false; 6499 6500 return true; 6501 } 6502 6503 /* Check of immediate shift constants are within range. */ 6504 bool 6505 aarch64_simd_shift_imm_p (rtx x, enum machine_mode mode, bool left) 6506 { 6507 int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT; 6508 if (left) 6509 return aarch64_const_vec_all_same_int_p (x, 0, bit_width - 1); 6510 else 6511 return aarch64_const_vec_all_same_int_p (x, 1, bit_width); 6512 } 6513 6514 /* Return true if X is a uniform vector where all elements 6515 are either the floating-point constant 0.0 or the 6516 integer constant 0. */ 6517 bool 6518 aarch64_simd_imm_zero_p (rtx x, enum machine_mode mode) 6519 { 6520 return x == CONST0_RTX (mode); 6521 } 6522 6523 bool 6524 aarch64_simd_imm_scalar_p (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED) 6525 { 6526 HOST_WIDE_INT imm = INTVAL (x); 6527 int i; 6528 6529 for (i = 0; i < 8; i++) 6530 { 6531 unsigned int byte = imm & 0xff; 6532 if (byte != 0xff && byte != 0) 6533 return false; 6534 imm >>= 8; 6535 } 6536 6537 return true; 6538 } 6539 6540 /* Return a const_int vector of VAL. */ 6541 rtx 6542 aarch64_simd_gen_const_vector_dup (enum machine_mode mode, int val) 6543 { 6544 int nunits = GET_MODE_NUNITS (mode); 6545 rtvec v = rtvec_alloc (nunits); 6546 int i; 6547 6548 for (i=0; i < nunits; i++) 6549 RTVEC_ELT (v, i) = GEN_INT (val); 6550 6551 return gen_rtx_CONST_VECTOR (mode, v); 6552 } 6553 6554 /* Construct and return a PARALLEL RTX vector. */ 6555 rtx 6556 aarch64_simd_vect_par_cnst_half (enum machine_mode mode, bool high) 6557 { 6558 int nunits = GET_MODE_NUNITS (mode); 6559 rtvec v = rtvec_alloc (nunits / 2); 6560 int base = high ? nunits / 2 : 0; 6561 rtx t1; 6562 int i; 6563 6564 for (i=0; i < nunits / 2; i++) 6565 RTVEC_ELT (v, i) = GEN_INT (base + i); 6566 6567 t1 = gen_rtx_PARALLEL (mode, v); 6568 return t1; 6569 } 6570 6571 /* Bounds-check lanes. Ensure OPERAND lies between LOW (inclusive) and 6572 HIGH (exclusive). */ 6573 void 6574 aarch64_simd_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high) 6575 { 6576 HOST_WIDE_INT lane; 6577 gcc_assert (GET_CODE (operand) == CONST_INT); 6578 lane = INTVAL (operand); 6579 6580 if (lane < low || lane >= high) 6581 error ("lane out of range"); 6582 } 6583 6584 void 6585 aarch64_simd_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high) 6586 { 6587 gcc_assert (GET_CODE (operand) == CONST_INT); 6588 HOST_WIDE_INT lane = INTVAL (operand); 6589 6590 if (lane < low || lane >= high) 6591 error ("constant out of range"); 6592 } 6593 6594 /* Emit code to reinterpret one AdvSIMD type as another, 6595 without altering bits. */ 6596 void 6597 aarch64_simd_reinterpret (rtx dest, rtx src) 6598 { 6599 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src)); 6600 } 6601 6602 /* Emit code to place a AdvSIMD pair result in memory locations (with equal 6603 registers). */ 6604 void 6605 aarch64_simd_emit_pair_result_insn (enum machine_mode mode, 6606 rtx (*intfn) (rtx, rtx, rtx), rtx destaddr, 6607 rtx op1) 6608 { 6609 rtx mem = gen_rtx_MEM (mode, destaddr); 6610 rtx tmp1 = gen_reg_rtx (mode); 6611 rtx tmp2 = gen_reg_rtx (mode); 6612 6613 emit_insn (intfn (tmp1, op1, tmp2)); 6614 6615 emit_move_insn (mem, tmp1); 6616 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode)); 6617 emit_move_insn (mem, tmp2); 6618 } 6619 6620 /* Return TRUE if OP is a valid vector addressing mode. */ 6621 bool 6622 aarch64_simd_mem_operand_p (rtx op) 6623 { 6624 return MEM_P (op) && (GET_CODE (XEXP (op, 0)) == POST_INC 6625 || GET_CODE (XEXP (op, 0)) == REG); 6626 } 6627 6628 /* Set up OPERANDS for a register copy from SRC to DEST, taking care 6629 not to early-clobber SRC registers in the process. 6630 6631 We assume that the operands described by SRC and DEST represent a 6632 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the 6633 number of components into which the copy has been decomposed. */ 6634 void 6635 aarch64_simd_disambiguate_copy (rtx *operands, rtx *dest, 6636 rtx *src, unsigned int count) 6637 { 6638 unsigned int i; 6639 6640 if (!reg_overlap_mentioned_p (operands[0], operands[1]) 6641 || REGNO (operands[0]) < REGNO (operands[1])) 6642 { 6643 for (i = 0; i < count; i++) 6644 { 6645 operands[2 * i] = dest[i]; 6646 operands[2 * i + 1] = src[i]; 6647 } 6648 } 6649 else 6650 { 6651 for (i = 0; i < count; i++) 6652 { 6653 operands[2 * i] = dest[count - i - 1]; 6654 operands[2 * i + 1] = src[count - i - 1]; 6655 } 6656 } 6657 } 6658 6659 /* Compute and return the length of aarch64_simd_mov<mode>, where <mode> is 6660 one of VSTRUCT modes: OI, CI or XI. */ 6661 int 6662 aarch64_simd_attr_length_move (rtx insn) 6663 { 6664 enum machine_mode mode; 6665 6666 extract_insn_cached (insn); 6667 6668 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1])) 6669 { 6670 mode = GET_MODE (recog_data.operand[0]); 6671 switch (mode) 6672 { 6673 case OImode: 6674 return 8; 6675 case CImode: 6676 return 12; 6677 case XImode: 6678 return 16; 6679 default: 6680 gcc_unreachable (); 6681 } 6682 } 6683 return 4; 6684 } 6685 6686 /* Implement target hook TARGET_VECTOR_ALIGNMENT. The AAPCS64 sets the maximum 6687 alignment of a vector to 128 bits. */ 6688 static HOST_WIDE_INT 6689 aarch64_simd_vector_alignment (const_tree type) 6690 { 6691 HOST_WIDE_INT align = tree_low_cst (TYPE_SIZE (type), 0); 6692 return MIN (align, 128); 6693 } 6694 6695 /* Implement target hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE. */ 6696 static bool 6697 aarch64_simd_vector_alignment_reachable (const_tree type, bool is_packed) 6698 { 6699 if (is_packed) 6700 return false; 6701 6702 /* We guarantee alignment for vectors up to 128-bits. */ 6703 if (tree_int_cst_compare (TYPE_SIZE (type), 6704 bitsize_int (BIGGEST_ALIGNMENT)) > 0) 6705 return false; 6706 6707 /* Vectors whose size is <= BIGGEST_ALIGNMENT are naturally aligned. */ 6708 return true; 6709 } 6710 6711 /* If VALS is a vector constant that can be loaded into a register 6712 using DUP, generate instructions to do so and return an RTX to 6713 assign to the register. Otherwise return NULL_RTX. */ 6714 static rtx 6715 aarch64_simd_dup_constant (rtx vals) 6716 { 6717 enum machine_mode mode = GET_MODE (vals); 6718 enum machine_mode inner_mode = GET_MODE_INNER (mode); 6719 int n_elts = GET_MODE_NUNITS (mode); 6720 bool all_same = true; 6721 rtx x; 6722 int i; 6723 6724 if (GET_CODE (vals) != CONST_VECTOR) 6725 return NULL_RTX; 6726 6727 for (i = 1; i < n_elts; ++i) 6728 { 6729 x = CONST_VECTOR_ELT (vals, i); 6730 if (!rtx_equal_p (x, CONST_VECTOR_ELT (vals, 0))) 6731 all_same = false; 6732 } 6733 6734 if (!all_same) 6735 return NULL_RTX; 6736 6737 /* We can load this constant by using DUP and a constant in a 6738 single ARM register. This will be cheaper than a vector 6739 load. */ 6740 x = copy_to_mode_reg (inner_mode, CONST_VECTOR_ELT (vals, 0)); 6741 return gen_rtx_VEC_DUPLICATE (mode, x); 6742 } 6743 6744 6745 /* Generate code to load VALS, which is a PARALLEL containing only 6746 constants (for vec_init) or CONST_VECTOR, efficiently into a 6747 register. Returns an RTX to copy into the register, or NULL_RTX 6748 for a PARALLEL that can not be converted into a CONST_VECTOR. */ 6749 static rtx 6750 aarch64_simd_make_constant (rtx vals) 6751 { 6752 enum machine_mode mode = GET_MODE (vals); 6753 rtx const_dup; 6754 rtx const_vec = NULL_RTX; 6755 int n_elts = GET_MODE_NUNITS (mode); 6756 int n_const = 0; 6757 int i; 6758 6759 if (GET_CODE (vals) == CONST_VECTOR) 6760 const_vec = vals; 6761 else if (GET_CODE (vals) == PARALLEL) 6762 { 6763 /* A CONST_VECTOR must contain only CONST_INTs and 6764 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF). 6765 Only store valid constants in a CONST_VECTOR. */ 6766 for (i = 0; i < n_elts; ++i) 6767 { 6768 rtx x = XVECEXP (vals, 0, i); 6769 if (CONST_INT_P (x) || CONST_DOUBLE_P (x)) 6770 n_const++; 6771 } 6772 if (n_const == n_elts) 6773 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)); 6774 } 6775 else 6776 gcc_unreachable (); 6777 6778 if (const_vec != NULL_RTX 6779 && aarch64_simd_immediate_valid_for_move (const_vec, mode, NULL, NULL, 6780 NULL, NULL, NULL)) 6781 /* Load using MOVI/MVNI. */ 6782 return const_vec; 6783 else if ((const_dup = aarch64_simd_dup_constant (vals)) != NULL_RTX) 6784 /* Loaded using DUP. */ 6785 return const_dup; 6786 else if (const_vec != NULL_RTX) 6787 /* Load from constant pool. We can not take advantage of single-cycle 6788 LD1 because we need a PC-relative addressing mode. */ 6789 return const_vec; 6790 else 6791 /* A PARALLEL containing something not valid inside CONST_VECTOR. 6792 We can not construct an initializer. */ 6793 return NULL_RTX; 6794 } 6795 6796 void 6797 aarch64_expand_vector_init (rtx target, rtx vals) 6798 { 6799 enum machine_mode mode = GET_MODE (target); 6800 enum machine_mode inner_mode = GET_MODE_INNER (mode); 6801 int n_elts = GET_MODE_NUNITS (mode); 6802 int n_var = 0, one_var = -1; 6803 bool all_same = true; 6804 rtx x, mem; 6805 int i; 6806 6807 x = XVECEXP (vals, 0, 0); 6808 if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x)) 6809 n_var = 1, one_var = 0; 6810 6811 for (i = 1; i < n_elts; ++i) 6812 { 6813 x = XVECEXP (vals, 0, i); 6814 if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x)) 6815 ++n_var, one_var = i; 6816 6817 if (!rtx_equal_p (x, XVECEXP (vals, 0, 0))) 6818 all_same = false; 6819 } 6820 6821 if (n_var == 0) 6822 { 6823 rtx constant = aarch64_simd_make_constant (vals); 6824 if (constant != NULL_RTX) 6825 { 6826 emit_move_insn (target, constant); 6827 return; 6828 } 6829 } 6830 6831 /* Splat a single non-constant element if we can. */ 6832 if (all_same) 6833 { 6834 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0)); 6835 aarch64_emit_move (target, gen_rtx_VEC_DUPLICATE (mode, x)); 6836 return; 6837 } 6838 6839 /* One field is non-constant. Load constant then overwrite varying 6840 field. This is more efficient than using the stack. */ 6841 if (n_var == 1) 6842 { 6843 rtx copy = copy_rtx (vals); 6844 rtx index = GEN_INT (one_var); 6845 enum insn_code icode; 6846 6847 /* Load constant part of vector, substitute neighboring value for 6848 varying element. */ 6849 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, one_var ^ 1); 6850 aarch64_expand_vector_init (target, copy); 6851 6852 /* Insert variable. */ 6853 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var)); 6854 icode = optab_handler (vec_set_optab, mode); 6855 gcc_assert (icode != CODE_FOR_nothing); 6856 emit_insn (GEN_FCN (icode) (target, x, index)); 6857 return; 6858 } 6859 6860 /* Construct the vector in memory one field at a time 6861 and load the whole vector. */ 6862 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode)); 6863 for (i = 0; i < n_elts; i++) 6864 emit_move_insn (adjust_address_nv (mem, inner_mode, 6865 i * GET_MODE_SIZE (inner_mode)), 6866 XVECEXP (vals, 0, i)); 6867 emit_move_insn (target, mem); 6868 6869 } 6870 6871 static unsigned HOST_WIDE_INT 6872 aarch64_shift_truncation_mask (enum machine_mode mode) 6873 { 6874 return 6875 (aarch64_vector_mode_supported_p (mode) 6876 || aarch64_vect_struct_mode_p (mode)) ? 0 : (GET_MODE_BITSIZE (mode) - 1); 6877 } 6878 6879 #ifndef TLS_SECTION_ASM_FLAG 6880 #define TLS_SECTION_ASM_FLAG 'T' 6881 #endif 6882 6883 void 6884 aarch64_elf_asm_named_section (const char *name, unsigned int flags, 6885 tree decl ATTRIBUTE_UNUSED) 6886 { 6887 char flagchars[10], *f = flagchars; 6888 6889 /* If we have already declared this section, we can use an 6890 abbreviated form to switch back to it -- unless this section is 6891 part of a COMDAT groups, in which case GAS requires the full 6892 declaration every time. */ 6893 if (!(HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE)) 6894 && (flags & SECTION_DECLARED)) 6895 { 6896 fprintf (asm_out_file, "\t.section\t%s\n", name); 6897 return; 6898 } 6899 6900 if (!(flags & SECTION_DEBUG)) 6901 *f++ = 'a'; 6902 if (flags & SECTION_WRITE) 6903 *f++ = 'w'; 6904 if (flags & SECTION_CODE) 6905 *f++ = 'x'; 6906 if (flags & SECTION_SMALL) 6907 *f++ = 's'; 6908 if (flags & SECTION_MERGE) 6909 *f++ = 'M'; 6910 if (flags & SECTION_STRINGS) 6911 *f++ = 'S'; 6912 if (flags & SECTION_TLS) 6913 *f++ = TLS_SECTION_ASM_FLAG; 6914 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE)) 6915 *f++ = 'G'; 6916 *f = '\0'; 6917 6918 fprintf (asm_out_file, "\t.section\t%s,\"%s\"", name, flagchars); 6919 6920 if (!(flags & SECTION_NOTYPE)) 6921 { 6922 const char *type; 6923 const char *format; 6924 6925 if (flags & SECTION_BSS) 6926 type = "nobits"; 6927 else 6928 type = "progbits"; 6929 6930 #ifdef TYPE_OPERAND_FMT 6931 format = "," TYPE_OPERAND_FMT; 6932 #else 6933 format = ",@%s"; 6934 #endif 6935 6936 fprintf (asm_out_file, format, type); 6937 6938 if (flags & SECTION_ENTSIZE) 6939 fprintf (asm_out_file, ",%d", flags & SECTION_ENTSIZE); 6940 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE)) 6941 { 6942 if (TREE_CODE (decl) == IDENTIFIER_NODE) 6943 fprintf (asm_out_file, ",%s,comdat", IDENTIFIER_POINTER (decl)); 6944 else 6945 fprintf (asm_out_file, ",%s,comdat", 6946 IDENTIFIER_POINTER (DECL_COMDAT_GROUP (decl))); 6947 } 6948 } 6949 6950 putc ('\n', asm_out_file); 6951 } 6952 6953 /* Select a format to encode pointers in exception handling data. */ 6954 int 6955 aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED, int global) 6956 { 6957 int type; 6958 switch (aarch64_cmodel) 6959 { 6960 case AARCH64_CMODEL_TINY: 6961 case AARCH64_CMODEL_TINY_PIC: 6962 case AARCH64_CMODEL_SMALL: 6963 case AARCH64_CMODEL_SMALL_PIC: 6964 /* text+got+data < 4Gb. 4-byte signed relocs are sufficient 6965 for everything. */ 6966 type = DW_EH_PE_sdata4; 6967 break; 6968 default: 6969 /* No assumptions here. 8-byte relocs required. */ 6970 type = DW_EH_PE_sdata8; 6971 break; 6972 } 6973 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type; 6974 } 6975 6976 /* Emit load exclusive. */ 6977 6978 static void 6979 aarch64_emit_load_exclusive (enum machine_mode mode, rtx rval, 6980 rtx mem, rtx model_rtx) 6981 { 6982 rtx (*gen) (rtx, rtx, rtx); 6983 6984 switch (mode) 6985 { 6986 case QImode: gen = gen_aarch64_load_exclusiveqi; break; 6987 case HImode: gen = gen_aarch64_load_exclusivehi; break; 6988 case SImode: gen = gen_aarch64_load_exclusivesi; break; 6989 case DImode: gen = gen_aarch64_load_exclusivedi; break; 6990 default: 6991 gcc_unreachable (); 6992 } 6993 6994 emit_insn (gen (rval, mem, model_rtx)); 6995 } 6996 6997 /* Emit store exclusive. */ 6998 6999 static void 7000 aarch64_emit_store_exclusive (enum machine_mode mode, rtx bval, 7001 rtx rval, rtx mem, rtx model_rtx) 7002 { 7003 rtx (*gen) (rtx, rtx, rtx, rtx); 7004 7005 switch (mode) 7006 { 7007 case QImode: gen = gen_aarch64_store_exclusiveqi; break; 7008 case HImode: gen = gen_aarch64_store_exclusivehi; break; 7009 case SImode: gen = gen_aarch64_store_exclusivesi; break; 7010 case DImode: gen = gen_aarch64_store_exclusivedi; break; 7011 default: 7012 gcc_unreachable (); 7013 } 7014 7015 emit_insn (gen (bval, rval, mem, model_rtx)); 7016 } 7017 7018 /* Mark the previous jump instruction as unlikely. */ 7019 7020 static void 7021 aarch64_emit_unlikely_jump (rtx insn) 7022 { 7023 rtx very_unlikely = GEN_INT (REG_BR_PROB_BASE / 100 - 1); 7024 7025 insn = emit_jump_insn (insn); 7026 add_reg_note (insn, REG_BR_PROB, very_unlikely); 7027 } 7028 7029 /* Expand a compare and swap pattern. */ 7030 7031 void 7032 aarch64_expand_compare_and_swap (rtx operands[]) 7033 { 7034 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x; 7035 enum machine_mode mode, cmp_mode; 7036 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx); 7037 7038 bval = operands[0]; 7039 rval = operands[1]; 7040 mem = operands[2]; 7041 oldval = operands[3]; 7042 newval = operands[4]; 7043 is_weak = operands[5]; 7044 mod_s = operands[6]; 7045 mod_f = operands[7]; 7046 mode = GET_MODE (mem); 7047 cmp_mode = mode; 7048 7049 /* Normally the succ memory model must be stronger than fail, but in the 7050 unlikely event of fail being ACQUIRE and succ being RELEASE we need to 7051 promote succ to ACQ_REL so that we don't lose the acquire semantics. */ 7052 7053 if (INTVAL (mod_f) == MEMMODEL_ACQUIRE 7054 && INTVAL (mod_s) == MEMMODEL_RELEASE) 7055 mod_s = GEN_INT (MEMMODEL_ACQ_REL); 7056 7057 switch (mode) 7058 { 7059 case QImode: 7060 case HImode: 7061 /* For short modes, we're going to perform the comparison in SImode, 7062 so do the zero-extension now. */ 7063 cmp_mode = SImode; 7064 rval = gen_reg_rtx (SImode); 7065 oldval = convert_modes (SImode, mode, oldval, true); 7066 /* Fall through. */ 7067 7068 case SImode: 7069 case DImode: 7070 /* Force the value into a register if needed. */ 7071 if (!aarch64_plus_operand (oldval, mode)) 7072 oldval = force_reg (cmp_mode, oldval); 7073 break; 7074 7075 default: 7076 gcc_unreachable (); 7077 } 7078 7079 switch (mode) 7080 { 7081 case QImode: gen = gen_atomic_compare_and_swapqi_1; break; 7082 case HImode: gen = gen_atomic_compare_and_swaphi_1; break; 7083 case SImode: gen = gen_atomic_compare_and_swapsi_1; break; 7084 case DImode: gen = gen_atomic_compare_and_swapdi_1; break; 7085 default: 7086 gcc_unreachable (); 7087 } 7088 7089 emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f)); 7090 7091 if (mode == QImode || mode == HImode) 7092 emit_move_insn (operands[1], gen_lowpart (mode, rval)); 7093 7094 x = gen_rtx_REG (CCmode, CC_REGNUM); 7095 x = gen_rtx_EQ (SImode, x, const0_rtx); 7096 emit_insn (gen_rtx_SET (VOIDmode, bval, x)); 7097 } 7098 7099 /* Split a compare and swap pattern. */ 7100 7101 void 7102 aarch64_split_compare_and_swap (rtx operands[]) 7103 { 7104 rtx rval, mem, oldval, newval, scratch; 7105 enum machine_mode mode; 7106 bool is_weak; 7107 rtx label1, label2, x, cond; 7108 7109 rval = operands[0]; 7110 mem = operands[1]; 7111 oldval = operands[2]; 7112 newval = operands[3]; 7113 is_weak = (operands[4] != const0_rtx); 7114 scratch = operands[7]; 7115 mode = GET_MODE (mem); 7116 7117 label1 = NULL_RTX; 7118 if (!is_weak) 7119 { 7120 label1 = gen_label_rtx (); 7121 emit_label (label1); 7122 } 7123 label2 = gen_label_rtx (); 7124 7125 aarch64_emit_load_exclusive (mode, rval, mem, operands[5]); 7126 7127 cond = aarch64_gen_compare_reg (NE, rval, oldval); 7128 x = gen_rtx_NE (VOIDmode, cond, const0_rtx); 7129 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x, 7130 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx); 7131 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x)); 7132 7133 aarch64_emit_store_exclusive (mode, scratch, mem, newval, operands[5]); 7134 7135 if (!is_weak) 7136 { 7137 x = gen_rtx_NE (VOIDmode, scratch, const0_rtx); 7138 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x, 7139 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx); 7140 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x)); 7141 } 7142 else 7143 { 7144 cond = gen_rtx_REG (CCmode, CC_REGNUM); 7145 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx); 7146 emit_insn (gen_rtx_SET (VOIDmode, cond, x)); 7147 } 7148 7149 emit_label (label2); 7150 } 7151 7152 /* Split an atomic operation. */ 7153 7154 void 7155 aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem, 7156 rtx value, rtx model_rtx, rtx cond) 7157 { 7158 enum machine_mode mode = GET_MODE (mem); 7159 enum machine_mode wmode = (mode == DImode ? DImode : SImode); 7160 rtx label, x; 7161 7162 label = gen_label_rtx (); 7163 emit_label (label); 7164 7165 if (new_out) 7166 new_out = gen_lowpart (wmode, new_out); 7167 if (old_out) 7168 old_out = gen_lowpart (wmode, old_out); 7169 else 7170 old_out = new_out; 7171 value = simplify_gen_subreg (wmode, value, mode, 0); 7172 7173 aarch64_emit_load_exclusive (mode, old_out, mem, model_rtx); 7174 7175 switch (code) 7176 { 7177 case SET: 7178 new_out = value; 7179 break; 7180 7181 case NOT: 7182 x = gen_rtx_AND (wmode, old_out, value); 7183 emit_insn (gen_rtx_SET (VOIDmode, new_out, x)); 7184 x = gen_rtx_NOT (wmode, new_out); 7185 emit_insn (gen_rtx_SET (VOIDmode, new_out, x)); 7186 break; 7187 7188 case MINUS: 7189 if (CONST_INT_P (value)) 7190 { 7191 value = GEN_INT (-INTVAL (value)); 7192 code = PLUS; 7193 } 7194 /* Fall through. */ 7195 7196 default: 7197 x = gen_rtx_fmt_ee (code, wmode, old_out, value); 7198 emit_insn (gen_rtx_SET (VOIDmode, new_out, x)); 7199 break; 7200 } 7201 7202 aarch64_emit_store_exclusive (mode, cond, mem, 7203 gen_lowpart (mode, new_out), model_rtx); 7204 7205 x = gen_rtx_NE (VOIDmode, cond, const0_rtx); 7206 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x, 7207 gen_rtx_LABEL_REF (Pmode, label), pc_rtx); 7208 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x)); 7209 } 7210 7211 static void 7212 aarch64_print_extension (void) 7213 { 7214 const struct aarch64_option_extension *opt = NULL; 7215 7216 for (opt = all_extensions; opt->name != NULL; opt++) 7217 if ((aarch64_isa_flags & opt->flags_on) == opt->flags_on) 7218 asm_fprintf (asm_out_file, "+%s", opt->name); 7219 7220 asm_fprintf (asm_out_file, "\n"); 7221 } 7222 7223 static void 7224 aarch64_start_file (void) 7225 { 7226 if (selected_arch) 7227 { 7228 asm_fprintf (asm_out_file, "\t.arch %s", selected_arch->name); 7229 aarch64_print_extension (); 7230 } 7231 else if (selected_cpu) 7232 { 7233 asm_fprintf (asm_out_file, "\t.cpu %s", selected_cpu->name); 7234 aarch64_print_extension (); 7235 } 7236 default_file_start(); 7237 } 7238 7239 /* Target hook for c_mode_for_suffix. */ 7240 static enum machine_mode 7241 aarch64_c_mode_for_suffix (char suffix) 7242 { 7243 if (suffix == 'q') 7244 return TFmode; 7245 7246 return VOIDmode; 7247 } 7248 7249 /* We can only represent floating point constants which will fit in 7250 "quarter-precision" values. These values are characterised by 7251 a sign bit, a 4-bit mantissa and a 3-bit exponent. And are given 7252 by: 7253 7254 (-1)^s * (n/16) * 2^r 7255 7256 Where: 7257 's' is the sign bit. 7258 'n' is an integer in the range 16 <= n <= 31. 7259 'r' is an integer in the range -3 <= r <= 4. */ 7260 7261 /* Return true iff X can be represented by a quarter-precision 7262 floating point immediate operand X. Note, we cannot represent 0.0. */ 7263 bool 7264 aarch64_float_const_representable_p (rtx x) 7265 { 7266 /* This represents our current view of how many bits 7267 make up the mantissa. */ 7268 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1; 7269 int exponent; 7270 unsigned HOST_WIDE_INT mantissa, mask; 7271 HOST_WIDE_INT m1, m2; 7272 REAL_VALUE_TYPE r, m; 7273 7274 if (!CONST_DOUBLE_P (x)) 7275 return false; 7276 7277 REAL_VALUE_FROM_CONST_DOUBLE (r, x); 7278 7279 /* We cannot represent infinities, NaNs or +/-zero. We won't 7280 know if we have +zero until we analyse the mantissa, but we 7281 can reject the other invalid values. */ 7282 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) 7283 || REAL_VALUE_MINUS_ZERO (r)) 7284 return false; 7285 7286 /* Extract exponent. */ 7287 r = real_value_abs (&r); 7288 exponent = REAL_EXP (&r); 7289 7290 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the 7291 highest (sign) bit, with a fixed binary point at bit point_pos. 7292 m1 holds the low part of the mantissa, m2 the high part. 7293 WARNING: If we ever have a representation using more than 2 * H_W_I - 1 7294 bits for the mantissa, this can fail (low bits will be lost). */ 7295 real_ldexp (&m, &r, point_pos - exponent); 7296 REAL_VALUE_TO_INT (&m1, &m2, m); 7297 7298 /* If the low part of the mantissa has bits set we cannot represent 7299 the value. */ 7300 if (m1 != 0) 7301 return false; 7302 /* We have rejected the lower HOST_WIDE_INT, so update our 7303 understanding of how many bits lie in the mantissa and 7304 look only at the high HOST_WIDE_INT. */ 7305 mantissa = m2; 7306 point_pos -= HOST_BITS_PER_WIDE_INT; 7307 7308 /* We can only represent values with a mantissa of the form 1.xxxx. */ 7309 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1; 7310 if ((mantissa & mask) != 0) 7311 return false; 7312 7313 /* Having filtered unrepresentable values, we may now remove all 7314 but the highest 5 bits. */ 7315 mantissa >>= point_pos - 5; 7316 7317 /* We cannot represent the value 0.0, so reject it. This is handled 7318 elsewhere. */ 7319 if (mantissa == 0) 7320 return false; 7321 7322 /* Then, as bit 4 is always set, we can mask it off, leaving 7323 the mantissa in the range [0, 15]. */ 7324 mantissa &= ~(1 << 4); 7325 gcc_assert (mantissa <= 15); 7326 7327 /* GCC internally does not use IEEE754-like encoding (where normalized 7328 significands are in the range [1, 2). GCC uses [0.5, 1) (see real.c). 7329 Our mantissa values are shifted 4 places to the left relative to 7330 normalized IEEE754 so we must modify the exponent returned by REAL_EXP 7331 by 5 places to correct for GCC's representation. */ 7332 exponent = 5 - exponent; 7333 7334 return (exponent >= 0 && exponent <= 7); 7335 } 7336 7337 char* 7338 aarch64_output_simd_mov_immediate (rtx *const_vector, 7339 enum machine_mode mode, 7340 unsigned width) 7341 { 7342 int is_valid; 7343 unsigned char widthc; 7344 int lane_width_bits; 7345 static char templ[40]; 7346 int shift = 0, mvn = 0; 7347 const char *mnemonic; 7348 unsigned int lane_count = 0; 7349 7350 is_valid = 7351 aarch64_simd_immediate_valid_for_move (*const_vector, mode, 7352 const_vector, &lane_width_bits, 7353 &widthc, &mvn, &shift); 7354 gcc_assert (is_valid); 7355 7356 mode = GET_MODE_INNER (mode); 7357 if (mode == SFmode || mode == DFmode) 7358 { 7359 bool zero_p = 7360 aarch64_float_const_zero_rtx_p (*const_vector); 7361 gcc_assert (shift == 0); 7362 mnemonic = zero_p ? "movi" : "fmov"; 7363 } 7364 else 7365 mnemonic = mvn ? "mvni" : "movi"; 7366 7367 gcc_assert (lane_width_bits != 0); 7368 lane_count = width / lane_width_bits; 7369 7370 if (lane_count == 1) 7371 snprintf (templ, sizeof (templ), "%s\t%%d0, %%1", mnemonic); 7372 else if (shift) 7373 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, %%1, lsl %d", 7374 mnemonic, lane_count, widthc, shift); 7375 else 7376 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, %%1", 7377 mnemonic, lane_count, widthc); 7378 return templ; 7379 } 7380 7381 /* Split operands into moves from op[1] + op[2] into op[0]. */ 7382 7383 void 7384 aarch64_split_combinev16qi (rtx operands[3]) 7385 { 7386 unsigned int dest = REGNO (operands[0]); 7387 unsigned int src1 = REGNO (operands[1]); 7388 unsigned int src2 = REGNO (operands[2]); 7389 enum machine_mode halfmode = GET_MODE (operands[1]); 7390 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode); 7391 rtx destlo, desthi; 7392 7393 gcc_assert (halfmode == V16QImode); 7394 7395 if (src1 == dest && src2 == dest + halfregs) 7396 { 7397 /* No-op move. Can't split to nothing; emit something. */ 7398 emit_note (NOTE_INSN_DELETED); 7399 return; 7400 } 7401 7402 /* Preserve register attributes for variable tracking. */ 7403 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0); 7404 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs, 7405 GET_MODE_SIZE (halfmode)); 7406 7407 /* Special case of reversed high/low parts. */ 7408 if (reg_overlap_mentioned_p (operands[2], destlo) 7409 && reg_overlap_mentioned_p (operands[1], desthi)) 7410 { 7411 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2])); 7412 emit_insn (gen_xorv16qi3 (operands[2], operands[1], operands[2])); 7413 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2])); 7414 } 7415 else if (!reg_overlap_mentioned_p (operands[2], destlo)) 7416 { 7417 /* Try to avoid unnecessary moves if part of the result 7418 is in the right place already. */ 7419 if (src1 != dest) 7420 emit_move_insn (destlo, operands[1]); 7421 if (src2 != dest + halfregs) 7422 emit_move_insn (desthi, operands[2]); 7423 } 7424 else 7425 { 7426 if (src2 != dest + halfregs) 7427 emit_move_insn (desthi, operands[2]); 7428 if (src1 != dest) 7429 emit_move_insn (destlo, operands[1]); 7430 } 7431 } 7432 7433 /* vec_perm support. */ 7434 7435 #define MAX_VECT_LEN 16 7436 7437 struct expand_vec_perm_d 7438 { 7439 rtx target, op0, op1; 7440 unsigned char perm[MAX_VECT_LEN]; 7441 enum machine_mode vmode; 7442 unsigned char nelt; 7443 bool one_vector_p; 7444 bool testing_p; 7445 }; 7446 7447 /* Generate a variable permutation. */ 7448 7449 static void 7450 aarch64_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel) 7451 { 7452 enum machine_mode vmode = GET_MODE (target); 7453 bool one_vector_p = rtx_equal_p (op0, op1); 7454 7455 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode); 7456 gcc_checking_assert (GET_MODE (op0) == vmode); 7457 gcc_checking_assert (GET_MODE (op1) == vmode); 7458 gcc_checking_assert (GET_MODE (sel) == vmode); 7459 gcc_checking_assert (TARGET_SIMD); 7460 7461 if (one_vector_p) 7462 { 7463 if (vmode == V8QImode) 7464 { 7465 /* Expand the argument to a V16QI mode by duplicating it. */ 7466 rtx pair = gen_reg_rtx (V16QImode); 7467 emit_insn (gen_aarch64_combinev8qi (pair, op0, op0)); 7468 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel)); 7469 } 7470 else 7471 { 7472 emit_insn (gen_aarch64_tbl1v16qi (target, op0, sel)); 7473 } 7474 } 7475 else 7476 { 7477 rtx pair; 7478 7479 if (vmode == V8QImode) 7480 { 7481 pair = gen_reg_rtx (V16QImode); 7482 emit_insn (gen_aarch64_combinev8qi (pair, op0, op1)); 7483 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel)); 7484 } 7485 else 7486 { 7487 pair = gen_reg_rtx (OImode); 7488 emit_insn (gen_aarch64_combinev16qi (pair, op0, op1)); 7489 emit_insn (gen_aarch64_tbl2v16qi (target, pair, sel)); 7490 } 7491 } 7492 } 7493 7494 void 7495 aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel) 7496 { 7497 enum machine_mode vmode = GET_MODE (target); 7498 unsigned int i, nelt = GET_MODE_NUNITS (vmode); 7499 bool one_vector_p = rtx_equal_p (op0, op1); 7500 rtx rmask[MAX_VECT_LEN], mask; 7501 7502 gcc_checking_assert (!BYTES_BIG_ENDIAN); 7503 7504 /* The TBL instruction does not use a modulo index, so we must take care 7505 of that ourselves. */ 7506 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1); 7507 for (i = 0; i < nelt; ++i) 7508 rmask[i] = mask; 7509 mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask)); 7510 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN); 7511 7512 aarch64_expand_vec_perm_1 (target, op0, op1, sel); 7513 } 7514 7515 /* Recognize patterns suitable for the TRN instructions. */ 7516 static bool 7517 aarch64_evpc_trn (struct expand_vec_perm_d *d) 7518 { 7519 unsigned int i, odd, mask, nelt = d->nelt; 7520 rtx out, in0, in1, x; 7521 rtx (*gen) (rtx, rtx, rtx); 7522 enum machine_mode vmode = d->vmode; 7523 7524 if (GET_MODE_UNIT_SIZE (vmode) > 8) 7525 return false; 7526 7527 /* Note that these are little-endian tests. 7528 We correct for big-endian later. */ 7529 if (d->perm[0] == 0) 7530 odd = 0; 7531 else if (d->perm[0] == 1) 7532 odd = 1; 7533 else 7534 return false; 7535 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1); 7536 7537 for (i = 0; i < nelt; i += 2) 7538 { 7539 if (d->perm[i] != i + odd) 7540 return false; 7541 if (d->perm[i + 1] != ((i + nelt + odd) & mask)) 7542 return false; 7543 } 7544 7545 /* Success! */ 7546 if (d->testing_p) 7547 return true; 7548 7549 in0 = d->op0; 7550 in1 = d->op1; 7551 if (BYTES_BIG_ENDIAN) 7552 { 7553 x = in0, in0 = in1, in1 = x; 7554 odd = !odd; 7555 } 7556 out = d->target; 7557 7558 if (odd) 7559 { 7560 switch (vmode) 7561 { 7562 case V16QImode: gen = gen_aarch64_trn2v16qi; break; 7563 case V8QImode: gen = gen_aarch64_trn2v8qi; break; 7564 case V8HImode: gen = gen_aarch64_trn2v8hi; break; 7565 case V4HImode: gen = gen_aarch64_trn2v4hi; break; 7566 case V4SImode: gen = gen_aarch64_trn2v4si; break; 7567 case V2SImode: gen = gen_aarch64_trn2v2si; break; 7568 case V2DImode: gen = gen_aarch64_trn2v2di; break; 7569 case V4SFmode: gen = gen_aarch64_trn2v4sf; break; 7570 case V2SFmode: gen = gen_aarch64_trn2v2sf; break; 7571 case V2DFmode: gen = gen_aarch64_trn2v2df; break; 7572 default: 7573 return false; 7574 } 7575 } 7576 else 7577 { 7578 switch (vmode) 7579 { 7580 case V16QImode: gen = gen_aarch64_trn1v16qi; break; 7581 case V8QImode: gen = gen_aarch64_trn1v8qi; break; 7582 case V8HImode: gen = gen_aarch64_trn1v8hi; break; 7583 case V4HImode: gen = gen_aarch64_trn1v4hi; break; 7584 case V4SImode: gen = gen_aarch64_trn1v4si; break; 7585 case V2SImode: gen = gen_aarch64_trn1v2si; break; 7586 case V2DImode: gen = gen_aarch64_trn1v2di; break; 7587 case V4SFmode: gen = gen_aarch64_trn1v4sf; break; 7588 case V2SFmode: gen = gen_aarch64_trn1v2sf; break; 7589 case V2DFmode: gen = gen_aarch64_trn1v2df; break; 7590 default: 7591 return false; 7592 } 7593 } 7594 7595 emit_insn (gen (out, in0, in1)); 7596 return true; 7597 } 7598 7599 /* Recognize patterns suitable for the UZP instructions. */ 7600 static bool 7601 aarch64_evpc_uzp (struct expand_vec_perm_d *d) 7602 { 7603 unsigned int i, odd, mask, nelt = d->nelt; 7604 rtx out, in0, in1, x; 7605 rtx (*gen) (rtx, rtx, rtx); 7606 enum machine_mode vmode = d->vmode; 7607 7608 if (GET_MODE_UNIT_SIZE (vmode) > 8) 7609 return false; 7610 7611 /* Note that these are little-endian tests. 7612 We correct for big-endian later. */ 7613 if (d->perm[0] == 0) 7614 odd = 0; 7615 else if (d->perm[0] == 1) 7616 odd = 1; 7617 else 7618 return false; 7619 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1); 7620 7621 for (i = 0; i < nelt; i++) 7622 { 7623 unsigned elt = (i * 2 + odd) & mask; 7624 if (d->perm[i] != elt) 7625 return false; 7626 } 7627 7628 /* Success! */ 7629 if (d->testing_p) 7630 return true; 7631 7632 in0 = d->op0; 7633 in1 = d->op1; 7634 if (BYTES_BIG_ENDIAN) 7635 { 7636 x = in0, in0 = in1, in1 = x; 7637 odd = !odd; 7638 } 7639 out = d->target; 7640 7641 if (odd) 7642 { 7643 switch (vmode) 7644 { 7645 case V16QImode: gen = gen_aarch64_uzp2v16qi; break; 7646 case V8QImode: gen = gen_aarch64_uzp2v8qi; break; 7647 case V8HImode: gen = gen_aarch64_uzp2v8hi; break; 7648 case V4HImode: gen = gen_aarch64_uzp2v4hi; break; 7649 case V4SImode: gen = gen_aarch64_uzp2v4si; break; 7650 case V2SImode: gen = gen_aarch64_uzp2v2si; break; 7651 case V2DImode: gen = gen_aarch64_uzp2v2di; break; 7652 case V4SFmode: gen = gen_aarch64_uzp2v4sf; break; 7653 case V2SFmode: gen = gen_aarch64_uzp2v2sf; break; 7654 case V2DFmode: gen = gen_aarch64_uzp2v2df; break; 7655 default: 7656 return false; 7657 } 7658 } 7659 else 7660 { 7661 switch (vmode) 7662 { 7663 case V16QImode: gen = gen_aarch64_uzp1v16qi; break; 7664 case V8QImode: gen = gen_aarch64_uzp1v8qi; break; 7665 case V8HImode: gen = gen_aarch64_uzp1v8hi; break; 7666 case V4HImode: gen = gen_aarch64_uzp1v4hi; break; 7667 case V4SImode: gen = gen_aarch64_uzp1v4si; break; 7668 case V2SImode: gen = gen_aarch64_uzp1v2si; break; 7669 case V2DImode: gen = gen_aarch64_uzp1v2di; break; 7670 case V4SFmode: gen = gen_aarch64_uzp1v4sf; break; 7671 case V2SFmode: gen = gen_aarch64_uzp1v2sf; break; 7672 case V2DFmode: gen = gen_aarch64_uzp1v2df; break; 7673 default: 7674 return false; 7675 } 7676 } 7677 7678 emit_insn (gen (out, in0, in1)); 7679 return true; 7680 } 7681 7682 /* Recognize patterns suitable for the ZIP instructions. */ 7683 static bool 7684 aarch64_evpc_zip (struct expand_vec_perm_d *d) 7685 { 7686 unsigned int i, high, mask, nelt = d->nelt; 7687 rtx out, in0, in1, x; 7688 rtx (*gen) (rtx, rtx, rtx); 7689 enum machine_mode vmode = d->vmode; 7690 7691 if (GET_MODE_UNIT_SIZE (vmode) > 8) 7692 return false; 7693 7694 /* Note that these are little-endian tests. 7695 We correct for big-endian later. */ 7696 high = nelt / 2; 7697 if (d->perm[0] == high) 7698 /* Do Nothing. */ 7699 ; 7700 else if (d->perm[0] == 0) 7701 high = 0; 7702 else 7703 return false; 7704 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1); 7705 7706 for (i = 0; i < nelt / 2; i++) 7707 { 7708 unsigned elt = (i + high) & mask; 7709 if (d->perm[i * 2] != elt) 7710 return false; 7711 elt = (elt + nelt) & mask; 7712 if (d->perm[i * 2 + 1] != elt) 7713 return false; 7714 } 7715 7716 /* Success! */ 7717 if (d->testing_p) 7718 return true; 7719 7720 in0 = d->op0; 7721 in1 = d->op1; 7722 if (BYTES_BIG_ENDIAN) 7723 { 7724 x = in0, in0 = in1, in1 = x; 7725 high = !high; 7726 } 7727 out = d->target; 7728 7729 if (high) 7730 { 7731 switch (vmode) 7732 { 7733 case V16QImode: gen = gen_aarch64_zip2v16qi; break; 7734 case V8QImode: gen = gen_aarch64_zip2v8qi; break; 7735 case V8HImode: gen = gen_aarch64_zip2v8hi; break; 7736 case V4HImode: gen = gen_aarch64_zip2v4hi; break; 7737 case V4SImode: gen = gen_aarch64_zip2v4si; break; 7738 case V2SImode: gen = gen_aarch64_zip2v2si; break; 7739 case V2DImode: gen = gen_aarch64_zip2v2di; break; 7740 case V4SFmode: gen = gen_aarch64_zip2v4sf; break; 7741 case V2SFmode: gen = gen_aarch64_zip2v2sf; break; 7742 case V2DFmode: gen = gen_aarch64_zip2v2df; break; 7743 default: 7744 return false; 7745 } 7746 } 7747 else 7748 { 7749 switch (vmode) 7750 { 7751 case V16QImode: gen = gen_aarch64_zip1v16qi; break; 7752 case V8QImode: gen = gen_aarch64_zip1v8qi; break; 7753 case V8HImode: gen = gen_aarch64_zip1v8hi; break; 7754 case V4HImode: gen = gen_aarch64_zip1v4hi; break; 7755 case V4SImode: gen = gen_aarch64_zip1v4si; break; 7756 case V2SImode: gen = gen_aarch64_zip1v2si; break; 7757 case V2DImode: gen = gen_aarch64_zip1v2di; break; 7758 case V4SFmode: gen = gen_aarch64_zip1v4sf; break; 7759 case V2SFmode: gen = gen_aarch64_zip1v2sf; break; 7760 case V2DFmode: gen = gen_aarch64_zip1v2df; break; 7761 default: 7762 return false; 7763 } 7764 } 7765 7766 emit_insn (gen (out, in0, in1)); 7767 return true; 7768 } 7769 7770 static bool 7771 aarch64_evpc_tbl (struct expand_vec_perm_d *d) 7772 { 7773 rtx rperm[MAX_VECT_LEN], sel; 7774 enum machine_mode vmode = d->vmode; 7775 unsigned int i, nelt = d->nelt; 7776 7777 /* TODO: ARM's TBL indexing is little-endian. In order to handle GCC's 7778 numbering of elements for big-endian, we must reverse the order. */ 7779 if (BYTES_BIG_ENDIAN) 7780 return false; 7781 7782 if (d->testing_p) 7783 return true; 7784 7785 /* Generic code will try constant permutation twice. Once with the 7786 original mode and again with the elements lowered to QImode. 7787 So wait and don't do the selector expansion ourselves. */ 7788 if (vmode != V8QImode && vmode != V16QImode) 7789 return false; 7790 7791 for (i = 0; i < nelt; ++i) 7792 rperm[i] = GEN_INT (d->perm[i]); 7793 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm)); 7794 sel = force_reg (vmode, sel); 7795 7796 aarch64_expand_vec_perm_1 (d->target, d->op0, d->op1, sel); 7797 return true; 7798 } 7799 7800 static bool 7801 aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d) 7802 { 7803 /* The pattern matching functions above are written to look for a small 7804 number to begin the sequence (0, 1, N/2). If we begin with an index 7805 from the second operand, we can swap the operands. */ 7806 if (d->perm[0] >= d->nelt) 7807 { 7808 unsigned i, nelt = d->nelt; 7809 rtx x; 7810 7811 for (i = 0; i < nelt; ++i) 7812 d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1); 7813 7814 x = d->op0; 7815 d->op0 = d->op1; 7816 d->op1 = x; 7817 } 7818 7819 if (TARGET_SIMD) 7820 { 7821 if (aarch64_evpc_zip (d)) 7822 return true; 7823 else if (aarch64_evpc_uzp (d)) 7824 return true; 7825 else if (aarch64_evpc_trn (d)) 7826 return true; 7827 return aarch64_evpc_tbl (d); 7828 } 7829 return false; 7830 } 7831 7832 /* Expand a vec_perm_const pattern. */ 7833 7834 bool 7835 aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel) 7836 { 7837 struct expand_vec_perm_d d; 7838 int i, nelt, which; 7839 7840 d.target = target; 7841 d.op0 = op0; 7842 d.op1 = op1; 7843 7844 d.vmode = GET_MODE (target); 7845 gcc_assert (VECTOR_MODE_P (d.vmode)); 7846 d.nelt = nelt = GET_MODE_NUNITS (d.vmode); 7847 d.testing_p = false; 7848 7849 for (i = which = 0; i < nelt; ++i) 7850 { 7851 rtx e = XVECEXP (sel, 0, i); 7852 int ei = INTVAL (e) & (2 * nelt - 1); 7853 which |= (ei < nelt ? 1 : 2); 7854 d.perm[i] = ei; 7855 } 7856 7857 switch (which) 7858 { 7859 default: 7860 gcc_unreachable (); 7861 7862 case 3: 7863 d.one_vector_p = false; 7864 if (!rtx_equal_p (op0, op1)) 7865 break; 7866 7867 /* The elements of PERM do not suggest that only the first operand 7868 is used, but both operands are identical. Allow easier matching 7869 of the permutation by folding the permutation into the single 7870 input vector. */ 7871 /* Fall Through. */ 7872 case 2: 7873 for (i = 0; i < nelt; ++i) 7874 d.perm[i] &= nelt - 1; 7875 d.op0 = op1; 7876 d.one_vector_p = true; 7877 break; 7878 7879 case 1: 7880 d.op1 = op0; 7881 d.one_vector_p = true; 7882 break; 7883 } 7884 7885 return aarch64_expand_vec_perm_const_1 (&d); 7886 } 7887 7888 static bool 7889 aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode, 7890 const unsigned char *sel) 7891 { 7892 struct expand_vec_perm_d d; 7893 unsigned int i, nelt, which; 7894 bool ret; 7895 7896 d.vmode = vmode; 7897 d.nelt = nelt = GET_MODE_NUNITS (d.vmode); 7898 d.testing_p = true; 7899 memcpy (d.perm, sel, nelt); 7900 7901 /* Calculate whether all elements are in one vector. */ 7902 for (i = which = 0; i < nelt; ++i) 7903 { 7904 unsigned char e = d.perm[i]; 7905 gcc_assert (e < 2 * nelt); 7906 which |= (e < nelt ? 1 : 2); 7907 } 7908 7909 /* If all elements are from the second vector, reindex as if from the 7910 first vector. */ 7911 if (which == 2) 7912 for (i = 0; i < nelt; ++i) 7913 d.perm[i] -= nelt; 7914 7915 /* Check whether the mask can be applied to a single vector. */ 7916 d.one_vector_p = (which != 3); 7917 7918 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1); 7919 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2); 7920 if (!d.one_vector_p) 7921 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3); 7922 7923 start_sequence (); 7924 ret = aarch64_expand_vec_perm_const_1 (&d); 7925 end_sequence (); 7926 7927 return ret; 7928 } 7929 7930 #undef TARGET_ADDRESS_COST 7931 #define TARGET_ADDRESS_COST aarch64_address_cost 7932 7933 /* This hook will determines whether unnamed bitfields affect the alignment 7934 of the containing structure. The hook returns true if the structure 7935 should inherit the alignment requirements of an unnamed bitfield's 7936 type. */ 7937 #undef TARGET_ALIGN_ANON_BITFIELD 7938 #define TARGET_ALIGN_ANON_BITFIELD hook_bool_void_true 7939 7940 #undef TARGET_ASM_ALIGNED_DI_OP 7941 #define TARGET_ASM_ALIGNED_DI_OP "\t.xword\t" 7942 7943 #undef TARGET_ASM_ALIGNED_HI_OP 7944 #define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t" 7945 7946 #undef TARGET_ASM_ALIGNED_SI_OP 7947 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t" 7948 7949 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK 7950 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK \ 7951 hook_bool_const_tree_hwi_hwi_const_tree_true 7952 7953 #undef TARGET_ASM_FILE_START 7954 #define TARGET_ASM_FILE_START aarch64_start_file 7955 7956 #undef TARGET_ASM_OUTPUT_MI_THUNK 7957 #define TARGET_ASM_OUTPUT_MI_THUNK aarch64_output_mi_thunk 7958 7959 #undef TARGET_ASM_SELECT_RTX_SECTION 7960 #define TARGET_ASM_SELECT_RTX_SECTION aarch64_select_rtx_section 7961 7962 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE 7963 #define TARGET_ASM_TRAMPOLINE_TEMPLATE aarch64_asm_trampoline_template 7964 7965 #undef TARGET_BUILD_BUILTIN_VA_LIST 7966 #define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list 7967 7968 #undef TARGET_CALLEE_COPIES 7969 #define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false 7970 7971 #undef TARGET_CAN_ELIMINATE 7972 #define TARGET_CAN_ELIMINATE aarch64_can_eliminate 7973 7974 #undef TARGET_CANNOT_FORCE_CONST_MEM 7975 #define TARGET_CANNOT_FORCE_CONST_MEM aarch64_cannot_force_const_mem 7976 7977 #undef TARGET_CONDITIONAL_REGISTER_USAGE 7978 #define TARGET_CONDITIONAL_REGISTER_USAGE aarch64_conditional_register_usage 7979 7980 /* Only the least significant bit is used for initialization guard 7981 variables. */ 7982 #undef TARGET_CXX_GUARD_MASK_BIT 7983 #define TARGET_CXX_GUARD_MASK_BIT hook_bool_void_true 7984 7985 #undef TARGET_C_MODE_FOR_SUFFIX 7986 #define TARGET_C_MODE_FOR_SUFFIX aarch64_c_mode_for_suffix 7987 7988 #ifdef TARGET_BIG_ENDIAN_DEFAULT 7989 #undef TARGET_DEFAULT_TARGET_FLAGS 7990 #define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END) 7991 #endif 7992 7993 #undef TARGET_CLASS_MAX_NREGS 7994 #define TARGET_CLASS_MAX_NREGS aarch64_class_max_nregs 7995 7996 #undef TARGET_BUILTIN_DECL 7997 #define TARGET_BUILTIN_DECL aarch64_builtin_decl 7998 7999 #undef TARGET_EXPAND_BUILTIN 8000 #define TARGET_EXPAND_BUILTIN aarch64_expand_builtin 8001 8002 #undef TARGET_EXPAND_BUILTIN_VA_START 8003 #define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start 8004 8005 #undef TARGET_FUNCTION_ARG 8006 #define TARGET_FUNCTION_ARG aarch64_function_arg 8007 8008 #undef TARGET_FUNCTION_ARG_ADVANCE 8009 #define TARGET_FUNCTION_ARG_ADVANCE aarch64_function_arg_advance 8010 8011 #undef TARGET_FUNCTION_ARG_BOUNDARY 8012 #define TARGET_FUNCTION_ARG_BOUNDARY aarch64_function_arg_boundary 8013 8014 #undef TARGET_FUNCTION_OK_FOR_SIBCALL 8015 #define TARGET_FUNCTION_OK_FOR_SIBCALL aarch64_function_ok_for_sibcall 8016 8017 #undef TARGET_FUNCTION_VALUE 8018 #define TARGET_FUNCTION_VALUE aarch64_function_value 8019 8020 #undef TARGET_FUNCTION_VALUE_REGNO_P 8021 #define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p 8022 8023 #undef TARGET_FRAME_POINTER_REQUIRED 8024 #define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required 8025 8026 #undef TARGET_GIMPLIFY_VA_ARG_EXPR 8027 #define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr 8028 8029 #undef TARGET_INIT_BUILTINS 8030 #define TARGET_INIT_BUILTINS aarch64_init_builtins 8031 8032 #undef TARGET_LEGITIMATE_ADDRESS_P 8033 #define TARGET_LEGITIMATE_ADDRESS_P aarch64_legitimate_address_hook_p 8034 8035 #undef TARGET_LEGITIMATE_CONSTANT_P 8036 #define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p 8037 8038 #undef TARGET_LIBGCC_CMP_RETURN_MODE 8039 #define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode 8040 8041 #undef TARGET_MANGLE_TYPE 8042 #define TARGET_MANGLE_TYPE aarch64_mangle_type 8043 8044 #undef TARGET_MEMORY_MOVE_COST 8045 #define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost 8046 8047 #undef TARGET_MUST_PASS_IN_STACK 8048 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size 8049 8050 /* This target hook should return true if accesses to volatile bitfields 8051 should use the narrowest mode possible. It should return false if these 8052 accesses should use the bitfield container type. */ 8053 #undef TARGET_NARROW_VOLATILE_BITFIELD 8054 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false 8055 8056 #undef TARGET_OPTION_OVERRIDE 8057 #define TARGET_OPTION_OVERRIDE aarch64_override_options 8058 8059 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE 8060 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \ 8061 aarch64_override_options_after_change 8062 8063 #undef TARGET_PASS_BY_REFERENCE 8064 #define TARGET_PASS_BY_REFERENCE aarch64_pass_by_reference 8065 8066 #undef TARGET_PREFERRED_RELOAD_CLASS 8067 #define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class 8068 8069 #undef TARGET_SECONDARY_RELOAD 8070 #define TARGET_SECONDARY_RELOAD aarch64_secondary_reload 8071 8072 #undef TARGET_SHIFT_TRUNCATION_MASK 8073 #define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask 8074 8075 #undef TARGET_SETUP_INCOMING_VARARGS 8076 #define TARGET_SETUP_INCOMING_VARARGS aarch64_setup_incoming_varargs 8077 8078 #undef TARGET_STRUCT_VALUE_RTX 8079 #define TARGET_STRUCT_VALUE_RTX aarch64_struct_value_rtx 8080 8081 #undef TARGET_REGISTER_MOVE_COST 8082 #define TARGET_REGISTER_MOVE_COST aarch64_register_move_cost 8083 8084 #undef TARGET_RETURN_IN_MEMORY 8085 #define TARGET_RETURN_IN_MEMORY aarch64_return_in_memory 8086 8087 #undef TARGET_RETURN_IN_MSB 8088 #define TARGET_RETURN_IN_MSB aarch64_return_in_msb 8089 8090 #undef TARGET_RTX_COSTS 8091 #define TARGET_RTX_COSTS aarch64_rtx_costs 8092 8093 #undef TARGET_TRAMPOLINE_INIT 8094 #define TARGET_TRAMPOLINE_INIT aarch64_trampoline_init 8095 8096 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P 8097 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P aarch64_use_blocks_for_constant_p 8098 8099 #undef TARGET_VECTOR_MODE_SUPPORTED_P 8100 #define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p 8101 8102 #undef TARGET_ARRAY_MODE_SUPPORTED_P 8103 #define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p 8104 8105 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE 8106 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode 8107 8108 #undef TARGET_VECTORIZE_BUILTINS 8109 #define TARGET_VECTORIZE_BUILTINS 8110 8111 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION 8112 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \ 8113 aarch64_builtin_vectorized_function 8114 8115 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES 8116 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \ 8117 aarch64_autovectorize_vector_sizes 8118 8119 /* Section anchor support. */ 8120 8121 #undef TARGET_MIN_ANCHOR_OFFSET 8122 #define TARGET_MIN_ANCHOR_OFFSET -256 8123 8124 /* Limit the maximum anchor offset to 4k-1, since that's the limit for a 8125 byte offset; we can do much more for larger data types, but have no way 8126 to determine the size of the access. We assume accesses are aligned. */ 8127 #undef TARGET_MAX_ANCHOR_OFFSET 8128 #define TARGET_MAX_ANCHOR_OFFSET 4095 8129 8130 #undef TARGET_VECTOR_ALIGNMENT 8131 #define TARGET_VECTOR_ALIGNMENT aarch64_simd_vector_alignment 8132 8133 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE 8134 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \ 8135 aarch64_simd_vector_alignment_reachable 8136 8137 /* vec_perm support. */ 8138 8139 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK 8140 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \ 8141 aarch64_vectorize_vec_perm_const_ok 8142 8143 8144 #undef TARGET_FIXED_CONDITION_CODE_REGS 8145 #define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs 8146 8147 struct gcc_target targetm = TARGET_INITIALIZER; 8148 8149 #include "gt-aarch64.h" 8150