1 /* Auxiliary functions for pipeline descriptions pattern of Andes 2 NDS32 cpu for GNU compiler 3 Copyright (C) 2012-2022 Free Software Foundation, Inc. 4 Contributed by Andes Technology Corporation. 5 6 This file is part of GCC. 7 8 GCC is free software; you can redistribute it and/or modify it 9 under the terms of the GNU General Public License as published 10 by the Free Software Foundation; either version 3, or (at your 11 option) any later version. 12 13 GCC is distributed in the hope that it will be useful, but WITHOUT 14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public 16 License for more details. 17 18 You should have received a copy of the GNU General Public License 19 along with GCC; see the file COPYING3. If not see 20 <http://www.gnu.org/licenses/>. */ 21 22 /* ------------------------------------------------------------------------ */ 23 24 #define IN_TARGET_CODE 1 25 26 #include "config.h" 27 #include "system.h" 28 #include "coretypes.h" 29 #include "backend.h" 30 #include "rtl.h" 31 #include "insn-attr.h" 32 #include "insn-codes.h" 33 #include "target.h" 34 35 #include "nds32-protos.h" 36 37 /* ------------------------------------------------------------------------ */ 38 39 namespace nds32 { 40 namespace scheduling { 41 42 /* Classify the memory access direction. It's unknown if the offset register 43 is not a constant value. */ 44 enum memory_access_direction 45 { 46 MEM_ACCESS_DIR_POS, 47 MEM_ACCESS_DIR_NEG, 48 MEM_ACCESS_DIR_UNKNOWN 49 }; 50 51 /* A safe wrapper to the function reg_overlap_mentioned_p (). */ 52 bool 53 reg_overlap_p (rtx x, rtx in) 54 { 55 if (x == NULL_RTX || in == NULL_RTX) 56 return false; 57 58 return static_cast <bool> (reg_overlap_mentioned_p (x, in)); 59 } 60 61 62 /* Determine the memory access direction of a load/store insn. */ 63 memory_access_direction 64 determine_access_direction (rtx_insn *insn) 65 { 66 int post_update_rtx_index; 67 rtx plus_rtx; 68 rtx mem_rtx; 69 rtx offset_rtx; 70 71 switch (get_attr_type (insn)) 72 { 73 case TYPE_LOAD_MULTIPLE: 74 gcc_assert (parallel_elements (insn) >= 2); 75 76 post_update_rtx_index = find_post_update_rtx (insn); 77 if (post_update_rtx_index != -1) 78 plus_rtx = SET_SRC (parallel_element (insn, post_update_rtx_index)); 79 else 80 { 81 /* (parallel 82 [(set (reg) (mem (reg))) : index 0 83 (set (reg) (mem (plus (reg) (...)))) : index 1 84 ...]) */ 85 mem_rtx = SET_SRC (parallel_element (insn, 1)); 86 if (GET_CODE (mem_rtx) == UNSPEC) 87 mem_rtx = XVECEXP (mem_rtx, 0, 0); 88 gcc_assert (MEM_P (mem_rtx)); 89 plus_rtx = XEXP (mem_rtx, 0); 90 } 91 break; 92 93 case TYPE_STORE_MULTIPLE: 94 gcc_assert (parallel_elements (insn) >= 2); 95 96 post_update_rtx_index = find_post_update_rtx (insn); 97 if (post_update_rtx_index != -1) 98 plus_rtx = SET_SRC (parallel_element (insn, post_update_rtx_index)); 99 else 100 { 101 /* (parallel 102 [(set (mem (reg)) (reg)) : index 0 103 (set (mem (plus (reg) (...))) (reg)) : index 1 104 ...]) */ 105 mem_rtx = SET_DEST (parallel_element (insn, 1)); 106 if (GET_CODE (mem_rtx) == UNSPEC) 107 mem_rtx = XVECEXP (mem_rtx, 0, 0); 108 gcc_assert (MEM_P (mem_rtx)); 109 plus_rtx = XEXP (mem_rtx, 0); 110 } 111 break; 112 113 case TYPE_LOAD: 114 case TYPE_STORE: 115 mem_rtx = extract_mem_rtx (insn); 116 117 switch (GET_CODE (XEXP (mem_rtx, 0))) 118 { 119 case POST_INC: 120 /* (mem (post_inc (...))) */ 121 return MEM_ACCESS_DIR_POS; 122 123 case POST_DEC: 124 /* (mem (post_dec (...))) */ 125 return MEM_ACCESS_DIR_NEG; 126 127 case PLUS: 128 /* (mem (plus (reg) (...))) */ 129 plus_rtx = XEXP (mem_rtx, 0); 130 break; 131 132 case POST_MODIFY: 133 /* (mem (post_modify (reg) (plus (reg) (...)))) */ 134 plus_rtx = XEXP (XEXP (mem_rtx, 0), 1); 135 break; 136 137 default: 138 gcc_unreachable (); 139 } 140 break; 141 142 default: 143 gcc_unreachable (); 144 } 145 146 gcc_assert (GET_CODE (plus_rtx) == PLUS); 147 148 offset_rtx = XEXP (plus_rtx, 1); 149 if (GET_CODE (offset_rtx) == CONST_INT) 150 { 151 if (INTVAL (offset_rtx) < 0) 152 return MEM_ACCESS_DIR_NEG; 153 else 154 return MEM_ACCESS_DIR_POS; 155 } 156 157 return MEM_ACCESS_DIR_UNKNOWN; 158 } 159 160 /* Return the nth load/store operation in the real micro-operation 161 accessing order. */ 162 rtx 163 extract_nth_access_rtx (rtx_insn *insn, int n) 164 { 165 int n_elems = parallel_elements (insn); 166 int post_update_rtx_index = find_post_update_rtx (insn); 167 memory_access_direction direction = determine_access_direction (insn); 168 169 gcc_assert (direction != MEM_ACCESS_DIR_UNKNOWN); 170 171 /* Reverse the order if the direction negative. */ 172 if (direction == MEM_ACCESS_DIR_NEG) 173 n = -1 * n - 1; 174 175 if (post_update_rtx_index != -1) 176 { 177 if (n >= 0 && post_update_rtx_index <= n) 178 ++n; 179 else if (n < 0 && post_update_rtx_index >= n + n_elems) 180 --n; 181 } 182 183 return parallel_element (insn, n); 184 } 185 186 /* Returns the register operated by the nth load/store operation in the real 187 micro-operation accessing order. This function assumes INSN must be a 188 multiple-word load/store insn. */ 189 rtx 190 extract_nth_lmsw_access_reg (rtx_insn *insn, int n) 191 { 192 rtx nth_rtx = extract_nth_access_rtx (insn, n); 193 194 if (nth_rtx == NULL_RTX) 195 return NULL_RTX; 196 197 switch (get_attr_type (insn)) 198 { 199 case TYPE_LOAD_MULTIPLE: 200 return SET_DEST (nth_rtx); 201 202 case TYPE_STORE_MULTIPLE: 203 return SET_SRC (nth_rtx); 204 205 default: 206 gcc_unreachable (); 207 } 208 } 209 210 /* Returns the register operated by the nth load/store operation in the real 211 micro-operation accessing order. This function assumes INSN must be a 212 double-word load/store insn. */ 213 rtx 214 extract_nth_ls2_access_reg (rtx_insn *insn, int n) 215 { 216 rtx reg; 217 machine_mode mode; 218 219 if (post_update_insn_p (insn)) 220 { 221 memory_access_direction direction = determine_access_direction (insn); 222 gcc_assert (direction != MEM_ACCESS_DIR_UNKNOWN); 223 224 /* Reverse the order if the direction negative. */ 225 if (direction == MEM_ACCESS_DIR_NEG) 226 n = -1 * n - 1; 227 } 228 229 /* Handle the out-of-range case. */ 230 if (n < -2 || n > 1) 231 return NULL_RTX; 232 233 /* Convert the index to a positive one. */ 234 if (n < 0) 235 n = 2 + n; 236 237 switch (get_attr_type (insn)) 238 { 239 case TYPE_LOAD: 240 reg = SET_DEST (PATTERN (insn)); 241 break; 242 243 case TYPE_STORE: 244 reg = SET_SRC (PATTERN (insn)); 245 break; 246 247 default: 248 gcc_unreachable (); 249 } 250 251 gcc_assert (REG_P (reg) || GET_CODE (reg) == SUBREG); 252 253 switch (GET_MODE (reg)) 254 { 255 case E_DImode: 256 mode = SImode; 257 break; 258 259 case E_DFmode: 260 mode = SFmode; 261 break; 262 263 default: 264 gcc_unreachable (); 265 } 266 267 if (n == 0) 268 return gen_lowpart (mode, reg); 269 else 270 return gen_highpart (mode, reg); 271 } 272 273 /* Returns the register operated by the nth load/store operation in the real 274 micro-operation accessing order. */ 275 rtx 276 extract_nth_access_reg (rtx_insn *insn, int index) 277 { 278 switch (GET_CODE (PATTERN (insn))) 279 { 280 case PARALLEL: 281 return extract_nth_lmsw_access_reg (insn, index); 282 283 case SET: 284 return extract_nth_ls2_access_reg (insn, index); 285 286 default: 287 gcc_unreachable (); 288 } 289 } 290 291 /* Determine if the latency is occured when the consumer PBSADA_INSN uses the 292 value of DEF_REG in its Ra or Rb fields. */ 293 bool 294 pbsada_insn_ra_rb_dep_reg_p (rtx pbsada_insn, rtx def_reg) 295 { 296 rtx unspec_rtx = SET_SRC (PATTERN (pbsada_insn)); 297 gcc_assert (GET_CODE (unspec_rtx) == UNSPEC); 298 299 rtx pbsada_ra = XVECEXP (unspec_rtx, 0, 0); 300 rtx pbsada_rb = XVECEXP (unspec_rtx, 0, 1); 301 302 if (rtx_equal_p (def_reg, pbsada_ra) 303 || rtx_equal_p (def_reg, pbsada_rb)) 304 return true; 305 306 return false; 307 } 308 309 /* Determine if the latency is occured when the consumer PBSADA_INSN uses the 310 value of DEF_REG in its Rt field. */ 311 bool 312 pbsada_insn_rt_dep_reg_p (rtx pbsada_insn, rtx def_reg) 313 { 314 rtx pbsada_rt = SET_DEST (PATTERN (pbsada_insn)); 315 316 if (rtx_equal_p (def_reg, pbsada_rt)) 317 return true; 318 319 return false; 320 } 321 322 /* Check if INSN is a movd44 insn consuming DEF_REG. */ 323 bool 324 movd44_even_dep_p (rtx_insn *insn, rtx def_reg) 325 { 326 if (!movd44_insn_p (insn)) 327 return false; 328 329 rtx use_rtx = SET_SRC (PATTERN (insn)); 330 331 if (REG_P (def_reg)) 332 { 333 return rtx_equal_p (def_reg, use_rtx); 334 } 335 else if (GET_CODE (def_reg) == SUBREG 336 && GET_MODE (def_reg) == SImode 337 && rtx_equal_p (SUBREG_REG (def_reg), use_rtx)) 338 { 339 if (TARGET_BIG_ENDIAN && SUBREG_BYTE (def_reg) == 4) 340 return true; 341 342 if (!TARGET_BIG_ENDIAN && SUBREG_BYTE (def_reg) == 0) 343 return true; 344 345 return false; 346 } 347 348 return false; 349 } 350 351 /* Check if INSN is a wext insn consuming DEF_REG. */ 352 bool 353 wext_odd_dep_p (rtx insn, rtx def_reg) 354 { 355 rtx shift_rtx = XEXP (SET_SRC (PATTERN (insn)), 0); 356 rtx use_reg = XEXP (shift_rtx, 0); 357 rtx pos_rtx = XEXP (shift_rtx, 1); 358 359 if (REG_P (pos_rtx) && reg_overlap_p (def_reg, pos_rtx)) 360 return true; 361 362 if (GET_MODE (def_reg) == DImode) 363 return reg_overlap_p (def_reg, use_reg); 364 365 gcc_assert (REG_P (def_reg) || GET_CODE (def_reg) == SUBREG); 366 gcc_assert (REG_P (use_reg) || GET_CODE (use_reg) == SUBREG); 367 368 if (REG_P (def_reg)) 369 { 370 if REG_P (use_reg) 371 { 372 if (!TARGET_BIG_ENDIAN) 373 return REGNO (def_reg) == REGNO (use_reg) + 1; 374 else 375 return REGNO (def_reg) == REGNO (use_reg); 376 } 377 else 378 return true; 379 } 380 381 if (GET_CODE (def_reg) == SUBREG) 382 { 383 if (!reg_overlap_p (def_reg, use_reg)) 384 return false; 385 386 if (GET_CODE (use_reg) == SUBREG) 387 return true; 388 389 if (!TARGET_BIG_ENDIAN) 390 return SUBREG_BYTE (def_reg) == 4; 391 else 392 return SUBREG_BYTE (def_reg) == 0; 393 } 394 395 return false; 396 } 397 398 /* Check if INSN is a bpick insn consuming DEF_REG. */ 399 bool 400 bpick_ra_rb_dep_p (rtx insn, rtx def_reg) 401 { 402 rtx ior_rtx = SET_SRC (PATTERN (insn)); 403 rtx and1_rtx = XEXP (ior_rtx, 0); 404 rtx and2_rtx = XEXP (ior_rtx, 1); 405 rtx reg1_0 = XEXP (and1_rtx, 0); 406 rtx reg1_1 = XEXP (and1_rtx, 1); 407 rtx reg2_0 = XEXP (and2_rtx, 0); 408 rtx reg2_1 = XEXP (and2_rtx, 1); 409 410 if (GET_CODE (reg1_0) == NOT) 411 { 412 if (rtx_equal_p (reg1_0, reg2_0)) 413 return reg_overlap_p (def_reg, reg1_1) 414 || reg_overlap_p (def_reg, reg2_1); 415 416 if (rtx_equal_p (reg1_0, reg2_1)) 417 return reg_overlap_p (def_reg, reg1_1) 418 || reg_overlap_p (def_reg, reg2_0); 419 } 420 421 if (GET_CODE (reg1_1) == NOT) 422 { 423 if (rtx_equal_p (reg1_1, reg2_0)) 424 return reg_overlap_p (def_reg, reg1_0) 425 || reg_overlap_p (def_reg, reg2_1); 426 427 if (rtx_equal_p (reg1_1, reg2_1)) 428 return reg_overlap_p (def_reg, reg1_0) 429 || reg_overlap_p (def_reg, reg2_0); 430 } 431 432 if (GET_CODE (reg2_0) == NOT) 433 { 434 if (rtx_equal_p (reg2_0, reg1_0)) 435 return reg_overlap_p (def_reg, reg2_1) 436 || reg_overlap_p (def_reg, reg1_1); 437 438 if (rtx_equal_p (reg2_0, reg1_1)) 439 return reg_overlap_p (def_reg, reg2_1) 440 || reg_overlap_p (def_reg, reg1_0); 441 } 442 443 if (GET_CODE (reg2_1) == NOT) 444 { 445 if (rtx_equal_p (reg2_1, reg1_0)) 446 return reg_overlap_p (def_reg, reg2_0) 447 || reg_overlap_p (def_reg, reg1_1); 448 449 if (rtx_equal_p (reg2_1, reg1_1)) 450 return reg_overlap_p (def_reg, reg2_0) 451 || reg_overlap_p (def_reg, reg1_0); 452 } 453 454 gcc_unreachable (); 455 } 456 } // namespace scheduling 457 } // namespace nds32 458 459 /* ------------------------------------------------------------------------ */ 460 461 using namespace nds32; 462 using namespace nds32::scheduling; 463 464 namespace { // anonymous namespace 465 466 /* Check the dependency between the producer defining DEF_REG and CONSUMER 467 requiring input operand at II. */ 468 bool 469 n7_consumed_by_ii_dep_p (rtx_insn *consumer, rtx def_reg) 470 { 471 rtx use_rtx; 472 473 switch (get_attr_type (consumer)) 474 { 475 /* MOVD44_E */ 476 case TYPE_ALU: 477 if (movd44_even_dep_p (consumer, def_reg)) 478 return true; 479 480 use_rtx = SET_SRC (PATTERN (consumer)); 481 break; 482 483 case TYPE_MUL: 484 use_rtx = SET_SRC (PATTERN (consumer)); 485 break; 486 487 case TYPE_MAC: 488 use_rtx = extract_mac_non_acc_rtx (consumer); 489 break; 490 491 /* Some special instructions, divmodsi4 and udivmodsi4, produce two 492 results, the quotient and the remainder. It requires two micro- 493 operations in order to write two registers. We have to check the 494 dependency from the producer to the first micro-operation. */ 495 case TYPE_DIV: 496 if (divmod_p (consumer)) 497 use_rtx = SET_SRC (parallel_element (consumer, 0)); 498 else 499 use_rtx = SET_SRC (PATTERN (consumer)); 500 break; 501 502 case TYPE_LOAD: 503 /* ADDR_IN_bi_Ra, ADDR_IN_!bi */ 504 if (post_update_insn_p (consumer)) 505 use_rtx = extract_base_reg (consumer); 506 else 507 use_rtx = extract_mem_rtx (consumer); 508 break; 509 510 case TYPE_STORE: 511 /* ADDR_IN_bi_Ra, ADDR_IN_!bi */ 512 if (post_update_insn_p (consumer)) 513 use_rtx = extract_base_reg (consumer); 514 else 515 use_rtx = extract_mem_rtx (consumer); 516 517 if (reg_overlap_p (def_reg, use_rtx)) 518 return true; 519 520 /* ST_bi, ST_!bi_RI */ 521 if (!post_update_insn_p (consumer) 522 && !immed_offset_p (extract_mem_rtx (consumer))) 523 return false; 524 525 use_rtx = SET_SRC (PATTERN (consumer)); 526 break; 527 528 case TYPE_LOAD_MULTIPLE: 529 use_rtx = extract_base_reg (consumer); 530 break; 531 532 case TYPE_STORE_MULTIPLE: 533 /* ADDR_IN */ 534 use_rtx = extract_base_reg (consumer); 535 if (reg_overlap_p (def_reg, use_rtx)) 536 return true; 537 538 /* SMW (N, 1) */ 539 use_rtx = extract_nth_access_rtx (consumer, 0); 540 break; 541 542 case TYPE_BRANCH: 543 use_rtx = PATTERN (consumer); 544 break; 545 546 default: 547 gcc_unreachable (); 548 } 549 550 if (reg_overlap_p (def_reg, use_rtx)) 551 return true; 552 553 return false; 554 } 555 556 /* Check the dependency between the producer defining DEF_REG and CONSUMER 557 requiring input operand at AG (II). */ 558 bool 559 n8_consumed_by_addr_in_p (rtx_insn *consumer, rtx def_reg) 560 { 561 rtx use_rtx; 562 563 switch (get_attr_type (consumer)) 564 { 565 case TYPE_BRANCH: 566 use_rtx = extract_branch_target_rtx (consumer); 567 break; 568 569 case TYPE_LOAD: 570 if (load_single_p (consumer)) 571 use_rtx = extract_mem_rtx (consumer); 572 else 573 use_rtx = extract_base_reg (consumer); 574 break; 575 576 case TYPE_STORE: 577 if (store_single_p (consumer) 578 && (!post_update_insn_p (consumer) 579 || immed_offset_p (extract_mem_rtx (consumer)))) 580 use_rtx = extract_mem_rtx (consumer); 581 else 582 use_rtx = extract_base_reg (consumer); 583 break; 584 585 case TYPE_LOAD_MULTIPLE: 586 case TYPE_STORE_MULTIPLE: 587 use_rtx = extract_base_reg (consumer); 588 break; 589 590 default: 591 gcc_unreachable (); 592 } 593 594 return reg_overlap_p (def_reg, use_rtx); 595 } 596 597 /* Check the dependency between the producer defining DEF_REG and CONSUMER 598 requiring input operand at EX. */ 599 bool 600 n8_consumed_by_ex_p (rtx_insn *consumer, rtx def_reg) 601 { 602 rtx use_rtx; 603 604 switch (get_attr_type (consumer)) 605 { 606 case TYPE_ALU: 607 if (movd44_even_dep_p (consumer, def_reg)) 608 return true; 609 610 use_rtx = SET_SRC (PATTERN (consumer)); 611 break; 612 613 case TYPE_MUL: 614 use_rtx = SET_SRC (PATTERN (consumer)); 615 break; 616 617 case TYPE_MAC: 618 use_rtx = extract_mac_non_acc_rtx (consumer); 619 break; 620 621 /* Some special instructions, divmodsi4 and udivmodsi4, produce two 622 results, the quotient and the remainder. It requires two micro- 623 operations in order to write two registers. We have to check the 624 dependency from the producer to the first micro-operation. */ 625 case TYPE_DIV: 626 if (divmod_p (consumer)) 627 use_rtx = SET_SRC (parallel_element (consumer, 0)); 628 else 629 use_rtx = SET_SRC (PATTERN (consumer)); 630 break; 631 632 case TYPE_BRANCH: 633 use_rtx = extract_branch_condition_rtx (consumer); 634 break; 635 636 case TYPE_STORE: 637 /* exclude ST_!bi_RR */ 638 if (!post_update_insn_p (consumer) 639 && !immed_offset_p (extract_mem_rtx (consumer))) 640 return false; 641 642 use_rtx = SET_SRC (PATTERN (consumer)); 643 break; 644 645 case TYPE_STORE_MULTIPLE: 646 use_rtx = extract_nth_access_rtx (consumer, 0); 647 break; 648 649 default: 650 gcc_unreachable (); 651 } 652 653 return reg_overlap_p (def_reg, use_rtx); 654 } 655 656 /* Check the dependency between the producer defining DEF_REG and CONSUMER 657 requiring input operand at AG (II). */ 658 bool 659 e8_consumed_by_addr_in_p (rtx_insn *consumer, rtx def_reg) 660 { 661 return n8_consumed_by_addr_in_p (consumer, def_reg); 662 } 663 664 /* Check the dependency between the producer defining DEF_REG and CONSUMER 665 requiring input operand at EX. */ 666 bool 667 e8_consumed_by_ex_p (rtx_insn *consumer, rtx def_reg) 668 { 669 rtx use_rtx; 670 671 switch (get_attr_type (consumer)) 672 { 673 case TYPE_ALU: 674 case TYPE_STORE: 675 use_rtx = SET_SRC (PATTERN (consumer)); 676 break; 677 678 case TYPE_MUL: 679 case TYPE_MAC: 680 case TYPE_DIV: 681 case TYPE_BRANCH: 682 case TYPE_STORE_MULTIPLE: 683 return n8_consumed_by_ex_p (consumer, def_reg); 684 685 default: 686 gcc_unreachable (); 687 } 688 689 return reg_overlap_p (def_reg, use_rtx); 690 } 691 692 /* Check the dependency between the producer defining DEF_REG and CONSUMER 693 requiring input operand at EX. */ 694 bool 695 n9_2r1w_consumed_by_ex_dep_p (rtx_insn *consumer, rtx def_reg) 696 { 697 rtx use_rtx; 698 699 switch (get_attr_type (consumer)) 700 { 701 case TYPE_ALU: 702 if (movd44_even_dep_p (consumer, def_reg)) 703 return true; 704 705 use_rtx = SET_SRC (PATTERN (consumer)); 706 break; 707 708 case TYPE_PBSAD: 709 case TYPE_MUL: 710 use_rtx = SET_SRC (PATTERN (consumer)); 711 break; 712 713 case TYPE_ALU_SHIFT: 714 use_rtx = extract_shift_reg (consumer); 715 break; 716 717 case TYPE_PBSADA: 718 return pbsada_insn_ra_rb_dep_reg_p (consumer, def_reg); 719 720 case TYPE_MAC: 721 use_rtx = PATTERN (consumer); 722 break; 723 724 case TYPE_DIV: 725 if (divmod_p (consumer)) 726 use_rtx = SET_SRC (parallel_element (consumer, 0)); 727 else 728 use_rtx = SET_SRC (PATTERN (consumer)); 729 break; 730 731 case TYPE_MMU: 732 if (GET_CODE (PATTERN (consumer)) == SET) 733 use_rtx = SET_SRC (PATTERN (consumer)); 734 else 735 return true; 736 break; 737 738 case TYPE_LOAD: 739 /* ADDR_IN_bi_Ra, ADDR_IN_!bi */ 740 if (post_update_insn_p (consumer)) 741 use_rtx = extract_base_reg (consumer); 742 else 743 use_rtx = extract_mem_rtx (consumer); 744 break; 745 746 case TYPE_STORE: 747 /* ADDR_IN_bi_Ra, ADDR_IN_!bi */ 748 if (post_update_insn_p (consumer)) 749 use_rtx = extract_base_reg (consumer); 750 else 751 use_rtx = extract_mem_rtx (consumer); 752 753 if (reg_overlap_p (def_reg, use_rtx)) 754 return true; 755 756 /* exclude ST_!bi_RR */ 757 if (!post_update_insn_p (consumer) 758 && !immed_offset_p (extract_mem_rtx (consumer))) 759 return false; 760 761 use_rtx = SET_SRC (PATTERN (consumer)); 762 break; 763 764 case TYPE_LOAD_MULTIPLE: 765 use_rtx = extract_base_reg (consumer); 766 break; 767 768 case TYPE_STORE_MULTIPLE: 769 /* ADDR_IN */ 770 use_rtx = extract_base_reg (consumer); 771 if (reg_overlap_p (def_reg, use_rtx)) 772 return true; 773 774 /* SMW (N, 1) */ 775 use_rtx = extract_nth_access_rtx (consumer, 0); 776 break; 777 778 case TYPE_BRANCH: 779 use_rtx = PATTERN (consumer); 780 break; 781 782 default: 783 gcc_unreachable (); 784 } 785 786 if (reg_overlap_p (def_reg, use_rtx)) 787 return true; 788 789 return false; 790 } 791 792 /* Check the dependency between the producer defining DEF_REG and CONSUMER 793 requiring input operand at EX. */ 794 bool 795 n9_3r2w_consumed_by_ex_dep_p (rtx_insn *consumer, rtx def_reg) 796 { 797 rtx use_rtx; 798 799 switch (get_attr_type (consumer)) 800 { 801 case TYPE_ALU: 802 case TYPE_PBSAD: 803 case TYPE_MUL: 804 use_rtx = SET_SRC (PATTERN (consumer)); 805 break; 806 807 case TYPE_ALU_SHIFT: 808 use_rtx = extract_shift_reg (consumer); 809 break; 810 811 case TYPE_PBSADA: 812 return pbsada_insn_ra_rb_dep_reg_p (consumer, def_reg); 813 814 case TYPE_MAC: 815 use_rtx = extract_mac_non_acc_rtx (consumer); 816 break; 817 818 /* Some special instructions, divmodsi4 and udivmodsi4, produce two 819 results, the quotient and the remainder. In 2R1W configuration, 820 it requires two micro-operations in order to write two registers. 821 We have to check the dependency from the producer to the first 822 micro-operation. */ 823 case TYPE_DIV: 824 if (divmod_p (consumer)) 825 use_rtx = SET_SRC (parallel_element (consumer, 0)); 826 else 827 use_rtx = SET_SRC (PATTERN (consumer)); 828 break; 829 830 case TYPE_MMU: 831 if (GET_CODE (PATTERN (consumer)) == SET) 832 use_rtx = SET_SRC (PATTERN (consumer)); 833 else 834 return true; 835 break; 836 837 case TYPE_LOAD: 838 case TYPE_STORE: 839 use_rtx = extract_mem_rtx (consumer); 840 break; 841 842 case TYPE_LOAD_MULTIPLE: 843 case TYPE_STORE_MULTIPLE: 844 use_rtx = extract_base_reg (consumer); 845 break; 846 847 case TYPE_BRANCH: 848 use_rtx = PATTERN (consumer); 849 break; 850 851 default: 852 gcc_unreachable (); 853 } 854 855 if (reg_overlap_p (def_reg, use_rtx)) 856 return true; 857 858 return false; 859 } 860 861 /* Check the dependency between the producer defining DEF_REG and CONSUMER 862 requiring input operand at EX. */ 863 bool 864 n10_consumed_by_ex_dep_p (rtx_insn *consumer, rtx def_reg) 865 { 866 rtx use_rtx; 867 868 switch (get_attr_type (consumer)) 869 { 870 case TYPE_ALU: 871 case TYPE_PBSAD: 872 case TYPE_MUL: 873 case TYPE_DALU: 874 case TYPE_DALU64: 875 case TYPE_DMUL: 876 case TYPE_DPACK: 877 case TYPE_DINSB: 878 case TYPE_DCMP: 879 case TYPE_DCLIP: 880 case TYPE_DALUROUND: 881 use_rtx = SET_SRC (PATTERN (consumer)); 882 break; 883 884 case TYPE_ALU_SHIFT: 885 use_rtx = extract_shift_reg (consumer); 886 break; 887 888 case TYPE_PBSADA: 889 return pbsada_insn_ra_rb_dep_reg_p (consumer, def_reg); 890 891 case TYPE_MAC: 892 case TYPE_DMAC: 893 use_rtx = extract_mac_non_acc_rtx (consumer); 894 break; 895 896 /* Some special instructions, divmodsi4 and udivmodsi4, produce two 897 results, the quotient and the remainder. */ 898 case TYPE_DIV: 899 if (divmod_p (consumer)) 900 use_rtx = SET_SRC (parallel_element (consumer, 0)); 901 else 902 use_rtx = SET_SRC (PATTERN (consumer)); 903 break; 904 905 case TYPE_DWEXT: 906 return wext_odd_dep_p (consumer, def_reg); 907 908 case TYPE_DBPICK: 909 return bpick_ra_rb_dep_p (consumer, def_reg); 910 911 case TYPE_MMU: 912 if (GET_CODE (PATTERN (consumer)) == SET) 913 use_rtx = SET_SRC (PATTERN (consumer)); 914 else 915 return true; 916 break; 917 918 case TYPE_LOAD: 919 case TYPE_STORE: 920 use_rtx = extract_mem_rtx (consumer); 921 break; 922 923 case TYPE_LOAD_MULTIPLE: 924 case TYPE_STORE_MULTIPLE: 925 use_rtx = extract_base_reg (consumer); 926 break; 927 928 case TYPE_BRANCH: 929 use_rtx = PATTERN (consumer); 930 break; 931 932 default: 933 gcc_unreachable (); 934 } 935 936 if (reg_overlap_p (def_reg, use_rtx)) 937 return true; 938 939 return false; 940 } 941 942 /* Check the dependency between the producer defining DEF_REG and CONSUMER 943 requiring input operand at EX. */ 944 bool 945 gw_consumed_by_ex_dep_p (rtx_insn *consumer, rtx def_reg) 946 { 947 rtx use_rtx; 948 949 switch (get_attr_type (consumer)) 950 { 951 case TYPE_ALU: 952 case TYPE_PBSAD: 953 case TYPE_MUL: 954 case TYPE_DALU: 955 case TYPE_DALU64: 956 case TYPE_DMUL: 957 case TYPE_DPACK: 958 case TYPE_DINSB: 959 case TYPE_DCMP: 960 case TYPE_DCLIP: 961 case TYPE_DALUROUND: 962 use_rtx = SET_SRC (PATTERN (consumer)); 963 break; 964 965 case TYPE_ALU_SHIFT: 966 use_rtx = extract_shift_reg (consumer); 967 break; 968 969 case TYPE_PBSADA: 970 return pbsada_insn_ra_rb_dep_reg_p (consumer, def_reg); 971 972 case TYPE_MAC: 973 case TYPE_DMAC: 974 use_rtx = extract_mac_non_acc_rtx (consumer); 975 break; 976 977 /* Some special instructions, divmodsi4 and udivmodsi4, produce two 978 results, the quotient and the remainder. We have to check the 979 dependency from the producer to the first micro-operation. */ 980 case TYPE_DIV: 981 if (divmod_p (consumer)) 982 use_rtx = SET_SRC (parallel_element (consumer, 0)); 983 else 984 use_rtx = SET_SRC (PATTERN (consumer)); 985 break; 986 987 case TYPE_DWEXT: 988 return wext_odd_dep_p (consumer, def_reg); 989 990 case TYPE_DBPICK: 991 return bpick_ra_rb_dep_p (consumer, def_reg); 992 993 case TYPE_MMU: 994 if (GET_CODE (PATTERN (consumer)) == SET) 995 use_rtx = SET_SRC (PATTERN (consumer)); 996 else 997 return true; 998 break; 999 1000 case TYPE_LOAD: 1001 case TYPE_STORE: 1002 use_rtx = extract_mem_rtx (consumer); 1003 break; 1004 1005 case TYPE_LOAD_MULTIPLE: 1006 case TYPE_STORE_MULTIPLE: 1007 use_rtx = extract_base_reg (consumer); 1008 break; 1009 1010 case TYPE_BRANCH: 1011 use_rtx = PATTERN (consumer); 1012 break; 1013 1014 default: 1015 gcc_unreachable (); 1016 } 1017 1018 if (reg_overlap_p (def_reg, use_rtx)) 1019 return true; 1020 1021 return false; 1022 } 1023 1024 /* Check dependencies from any stages to ALU_E1 (E1). This is a helper 1025 function of n13_consumed_by_e1_dep_p (). */ 1026 bool 1027 n13_alu_e1_insn_dep_reg_p (rtx_insn *alu_e1_insn, rtx def_reg) 1028 { 1029 rtx unspec_rtx, operand_ra, operand_rb; 1030 rtx src_rtx, dst_rtx; 1031 1032 switch (INSN_CODE (alu_e1_insn)) 1033 { 1034 /* BSP and BSE are supported by built-in functions, the corresponding 1035 patterns are formed by UNSPEC RTXs. We have to handle them 1036 individually. */ 1037 case CODE_FOR_unspec_bsp: 1038 case CODE_FOR_unspec_bse: 1039 unspec_rtx = SET_SRC (parallel_element (alu_e1_insn, 0)); 1040 gcc_assert (GET_CODE (unspec_rtx) == UNSPEC); 1041 1042 operand_ra = XVECEXP (unspec_rtx, 0, 0); 1043 operand_rb = XVECEXP (unspec_rtx, 0, 1); 1044 1045 if (rtx_equal_p (def_reg, operand_ra) 1046 || rtx_equal_p (def_reg, operand_rb)) 1047 return true; 1048 1049 return false; 1050 1051 /* Unlink general ALU instructions, MOVD44 requires operands at E1. */ 1052 case CODE_FOR_move_di: 1053 case CODE_FOR_move_df: 1054 src_rtx = SET_SRC (PATTERN (alu_e1_insn)); 1055 dst_rtx = SET_DEST (PATTERN (alu_e1_insn)); 1056 1057 if (REG_P (dst_rtx) && REG_P (src_rtx) 1058 && rtx_equal_p (src_rtx, def_reg)) 1059 return true; 1060 1061 return false; 1062 1063 default: 1064 return false; 1065 } 1066 } 1067 1068 /* Check the dependency between the producer defining DEF_REG and CONSUMER 1069 requiring input operand at E1. Because the address generation unti is 1070 at E1, the address input should be ready at E1. Note that the branch 1071 target is also a kind of addresses, so we have to check it. */ 1072 bool 1073 n13_consumed_by_e1_dep_p (rtx_insn *consumer, rtx def_reg) 1074 { 1075 rtx use_rtx; 1076 1077 switch (get_attr_type (consumer)) 1078 { 1079 /* ALU_E1 */ 1080 case TYPE_ALU: 1081 return n13_alu_e1_insn_dep_reg_p (consumer, def_reg); 1082 1083 case TYPE_PBSADA: 1084 return pbsada_insn_ra_rb_dep_reg_p (consumer, def_reg); 1085 1086 case TYPE_PBSAD: 1087 case TYPE_MUL: 1088 use_rtx = SET_SRC (PATTERN (consumer)); 1089 break; 1090 1091 case TYPE_MAC: 1092 use_rtx = extract_mac_non_acc_rtx (consumer); 1093 break; 1094 1095 case TYPE_DIV: 1096 if (divmod_p (consumer)) 1097 use_rtx = SET_SRC (parallel_element (consumer, 0)); 1098 else 1099 use_rtx = SET_SRC (PATTERN (consumer)); 1100 break; 1101 1102 case TYPE_MMU: 1103 if (GET_CODE (PATTERN (consumer)) == SET) 1104 use_rtx = SET_SRC (PATTERN (consumer)); 1105 else 1106 return true; 1107 break; 1108 1109 case TYPE_BRANCH: 1110 use_rtx = extract_branch_target_rtx (consumer); 1111 break; 1112 1113 case TYPE_LOAD: 1114 case TYPE_STORE: 1115 use_rtx = extract_mem_rtx (consumer); 1116 break; 1117 1118 case TYPE_LOAD_MULTIPLE: 1119 case TYPE_STORE_MULTIPLE: 1120 use_rtx = extract_base_reg (consumer); 1121 break; 1122 1123 default: 1124 return false; 1125 } 1126 1127 if (reg_overlap_p (def_reg, use_rtx)) 1128 return true; 1129 1130 return false; 1131 } 1132 1133 /* Check the dependency between the producer defining DEF_REG and CONSUMER 1134 requiring input operand at E2. */ 1135 bool 1136 n13_consumed_by_e2_dep_p (rtx_insn *consumer, rtx def_reg) 1137 { 1138 rtx use_rtx; 1139 1140 switch (get_attr_type (consumer)) 1141 { 1142 case TYPE_ALU: 1143 case TYPE_STORE: 1144 use_rtx = SET_SRC (PATTERN (consumer)); 1145 break; 1146 1147 case TYPE_ALU_SHIFT: 1148 use_rtx = extract_shift_reg (consumer); 1149 break; 1150 1151 case TYPE_PBSADA: 1152 return pbsada_insn_rt_dep_reg_p (consumer, def_reg); 1153 1154 case TYPE_STORE_MULTIPLE: 1155 use_rtx = extract_nth_access_rtx (consumer, 0); 1156 break; 1157 1158 case TYPE_BRANCH: 1159 use_rtx = extract_branch_condition_rtx (consumer); 1160 break; 1161 1162 default: 1163 gcc_unreachable(); 1164 } 1165 1166 if (reg_overlap_p (def_reg, use_rtx)) 1167 return true; 1168 1169 return false; 1170 } 1171 } // anonymous namespace 1172 1173 /* ------------------------------------------------------------------------ */ 1174 1175 /* Guard functions for N7 core. */ 1176 1177 bool 1178 nds32_n7_load_to_ii_p (rtx_insn *producer, rtx_insn *consumer) 1179 { 1180 if (post_update_insn_p (producer)) 1181 return false; 1182 1183 rtx def_reg = SET_DEST (PATTERN (producer)); 1184 1185 return n7_consumed_by_ii_dep_p (consumer, def_reg); 1186 } 1187 1188 bool 1189 nds32_n7_last_load_to_ii_p (rtx_insn *producer, rtx_insn *consumer) 1190 { 1191 /* If PRODUCER is a post-update LMW insn, the last micro-operation updates 1192 the base register and the result is ready in II stage, so we don't need 1193 to handle that case in this guard function and the corresponding bypass 1194 rule. */ 1195 if (post_update_insn_p (producer)) 1196 return false; 1197 1198 rtx last_def_reg = extract_nth_access_reg (producer, -1); 1199 1200 if (last_def_reg == NULL_RTX) 1201 return false; 1202 1203 gcc_assert (REG_P (last_def_reg) || GET_CODE (last_def_reg) == SUBREG); 1204 1205 return n7_consumed_by_ii_dep_p (consumer, last_def_reg); 1206 } 1207 1208 /* Guard functions for N8 core. */ 1209 1210 bool 1211 nds32_n8_load_to_ii_p (rtx_insn *producer, rtx_insn *consumer) 1212 { 1213 if (post_update_insn_p (producer)) 1214 return false; 1215 1216 rtx def_reg = SET_DEST (PATTERN (producer)); 1217 1218 return n8_consumed_by_addr_in_p (consumer, def_reg); 1219 } 1220 1221 bool 1222 nds32_n8_load_bi_to_ii_p (rtx_insn *producer, rtx_insn *consumer) 1223 { 1224 if (!post_update_insn_p (producer)) 1225 return false; 1226 1227 rtx def_reg = SET_DEST (PATTERN (producer)); 1228 1229 return n8_consumed_by_addr_in_p (consumer, def_reg); 1230 } 1231 1232 bool 1233 nds32_n8_load_to_ex_p (rtx_insn *producer, rtx_insn *consumer) 1234 { 1235 if (post_update_insn_p (producer)) 1236 return false; 1237 1238 rtx def_reg = SET_DEST (PATTERN (producer)); 1239 1240 return n8_consumed_by_ex_p (consumer, def_reg); 1241 } 1242 1243 bool 1244 nds32_n8_ex_to_ii_p (rtx_insn *producer, rtx_insn *consumer) 1245 { 1246 rtx def_reg; 1247 1248 switch (get_attr_type (producer)) 1249 { 1250 case TYPE_ALU: 1251 if (movd44_insn_p (producer)) 1252 def_reg = extract_movd44_odd_reg (producer); 1253 else 1254 def_reg = SET_DEST (PATTERN (producer)); 1255 break; 1256 1257 case TYPE_MUL: 1258 case TYPE_MAC: 1259 def_reg = SET_DEST (PATTERN (producer)); 1260 break; 1261 1262 case TYPE_DIV: 1263 if (divmod_p (producer)) 1264 def_reg = SET_DEST (parallel_element (producer, 1)); 1265 else 1266 def_reg = SET_DEST (PATTERN (producer)); 1267 break; 1268 1269 case TYPE_LOAD: 1270 case TYPE_STORE: 1271 case TYPE_LOAD_MULTIPLE: 1272 case TYPE_STORE_MULTIPLE: 1273 if (!post_update_insn_p (producer)) 1274 return false; 1275 1276 def_reg = extract_base_reg (producer); 1277 break; 1278 1279 default: 1280 gcc_unreachable (); 1281 } 1282 1283 return n8_consumed_by_addr_in_p (consumer, def_reg); 1284 } 1285 1286 bool 1287 nds32_n8_last_load_to_ii_p (rtx_insn *producer, rtx_insn *consumer) 1288 { 1289 /* If PRODUCER is a post-update LMW insn, the last micro-operation updates 1290 the base register and the result is ready in EX stage, so we don't need 1291 to handle that case in this guard function and the corresponding bypass 1292 rule. */ 1293 if (post_update_insn_p (producer)) 1294 return false; 1295 1296 rtx last_def_reg = extract_nth_access_reg (producer, -1); 1297 1298 if (last_def_reg == NULL_RTX) 1299 return false; 1300 1301 gcc_assert (REG_P (last_def_reg) || GET_CODE (last_def_reg) == SUBREG); 1302 1303 return n8_consumed_by_addr_in_p (consumer, last_def_reg); 1304 } 1305 1306 bool 1307 nds32_n8_last_load_two_to_ii_p (rtx_insn *producer, rtx_insn *consumer) 1308 { 1309 int index = -2; 1310 1311 /* If PRODUCER is a post-update insn, there is an additional one micro- 1312 operation inserted in the end, so the last memory access operation should 1313 be handled by this guard function and the corresponding bypass rule. */ 1314 if (post_update_insn_p (producer)) 1315 index = -1; 1316 1317 rtx last_two_def_reg = extract_nth_access_reg (producer, index); 1318 1319 if (last_two_def_reg == NULL_RTX) 1320 return false; 1321 1322 gcc_assert (REG_P (last_two_def_reg) 1323 || GET_CODE (last_two_def_reg) == SUBREG); 1324 1325 return n8_consumed_by_addr_in_p (consumer, last_two_def_reg); 1326 } 1327 1328 bool 1329 nds32_n8_last_load_to_ex_p (rtx_insn *producer, rtx_insn *consumer) 1330 { 1331 /* If PRODUCER is a post-update LMW insn, the last micro-operation updates 1332 the base register and the result is ready in EX stage, so we don't need 1333 to handle that case in this guard function and the corresponding bypass 1334 rule. */ 1335 if (post_update_insn_p (producer)) 1336 return false; 1337 1338 rtx last_def_reg = extract_nth_access_reg (producer, -1); 1339 1340 if (last_def_reg == NULL_RTX) 1341 return false; 1342 1343 gcc_assert (REG_P (last_def_reg) || GET_CODE (last_def_reg) == SUBREG); 1344 1345 return n8_consumed_by_ex_p (consumer, last_def_reg); 1346 } 1347 1348 /* Guard functions for E8 cores. */ 1349 1350 bool 1351 nds32_e8_load_to_ii_p (rtx_insn *producer, rtx_insn *consumer) 1352 { 1353 rtx def_reg = SET_DEST (PATTERN (producer)); 1354 1355 return e8_consumed_by_addr_in_p (consumer, def_reg); 1356 } 1357 1358 bool 1359 nds32_e8_load_to_ex_p (rtx_insn *producer, rtx_insn *consumer) 1360 { 1361 rtx def_reg = SET_DEST (PATTERN (producer)); 1362 1363 return e8_consumed_by_ex_p (consumer, def_reg); 1364 } 1365 1366 bool 1367 nds32_e8_ex_to_ii_p (rtx_insn *producer, rtx_insn *consumer) 1368 { 1369 rtx def_reg; 1370 1371 switch (get_attr_type (producer)) 1372 { 1373 case TYPE_ALU: 1374 /* No data hazards if AGEN's input is produced by MOVI or SETHI. */ 1375 if (GET_CODE (PATTERN (producer)) == SET) 1376 { 1377 rtx dest = SET_DEST (PATTERN (producer)); 1378 rtx src = SET_SRC (PATTERN (producer)); 1379 1380 if ((REG_P (dest) || GET_CODE (dest) == SUBREG) 1381 && (GET_CODE (src) == CONST_INT || GET_CODE (src) == HIGH)) 1382 return false; 1383 } 1384 1385 def_reg = SET_DEST (PATTERN (producer)); 1386 break; 1387 1388 case TYPE_MUL: 1389 case TYPE_MAC: 1390 def_reg = SET_DEST (PATTERN (producer)); 1391 break; 1392 1393 case TYPE_DIV: 1394 if (divmod_p (producer)) 1395 { 1396 rtx def_reg1 = SET_DEST (parallel_element (producer, 0)); 1397 rtx def_reg2 = SET_DEST (parallel_element (producer, 1)); 1398 1399 return (e8_consumed_by_addr_in_p (consumer, def_reg1) 1400 || e8_consumed_by_addr_in_p (consumer, def_reg2)); 1401 } 1402 1403 def_reg = SET_DEST (PATTERN (producer)); 1404 break; 1405 1406 case TYPE_LOAD: 1407 case TYPE_STORE: 1408 case TYPE_LOAD_MULTIPLE: 1409 case TYPE_STORE_MULTIPLE: 1410 if (!post_update_insn_p (producer)) 1411 return false; 1412 1413 def_reg = extract_base_reg (producer); 1414 break; 1415 1416 default: 1417 gcc_unreachable (); 1418 } 1419 1420 return e8_consumed_by_addr_in_p (consumer, def_reg); 1421 } 1422 1423 bool 1424 nds32_e8_last_load_to_ii_p (rtx_insn *producer, rtx_insn *consumer) 1425 { 1426 rtx last_def_reg = extract_nth_access_reg (producer, -1); 1427 1428 if (last_def_reg == NULL_RTX) 1429 return false; 1430 1431 gcc_assert (REG_P (last_def_reg) || GET_CODE (last_def_reg) == SUBREG); 1432 1433 return e8_consumed_by_addr_in_p (consumer, last_def_reg); 1434 } 1435 1436 bool 1437 nds32_e8_last_load_to_ex_p (rtx_insn *producer, rtx_insn *consumer) 1438 { 1439 rtx last_def_reg = extract_nth_access_reg (producer, -1); 1440 1441 if (last_def_reg == NULL_RTX) 1442 return false; 1443 1444 gcc_assert (REG_P (last_def_reg) || GET_CODE (last_def_reg) == SUBREG); 1445 1446 return e8_consumed_by_ex_p (consumer, last_def_reg); 1447 } 1448 1449 /* Guard functions for N9 cores. */ 1450 1451 /* Check dependencies from MM to EX. */ 1452 bool 1453 nds32_n9_2r1w_mm_to_ex_p (rtx_insn *producer, rtx_insn *consumer) 1454 { 1455 rtx def_reg; 1456 1457 switch (get_attr_type (producer)) 1458 { 1459 /* LD_!bi */ 1460 case TYPE_LOAD: 1461 if (post_update_insn_p (producer)) 1462 return false; 1463 1464 def_reg = SET_DEST (PATTERN (producer)); 1465 break; 1466 1467 case TYPE_MUL: 1468 case TYPE_MAC: 1469 def_reg = SET_DEST (PATTERN (producer)); 1470 break; 1471 1472 default: 1473 gcc_unreachable (); 1474 } 1475 1476 return n9_2r1w_consumed_by_ex_dep_p (consumer, def_reg); 1477 } 1478 1479 /* Check dependencies from MM to EX. */ 1480 bool 1481 nds32_n9_3r2w_mm_to_ex_p (rtx_insn *producer, rtx_insn *consumer) 1482 { 1483 rtx def_reg; 1484 1485 switch (get_attr_type (producer)) 1486 { 1487 case TYPE_LOAD: 1488 case TYPE_MUL: 1489 case TYPE_MAC: 1490 def_reg = SET_DEST (PATTERN (producer)); 1491 break; 1492 1493 /* Some special instructions, divmodsi4 and udivmodsi4, produce two 1494 results, the quotient and the remainder. We have to handle them 1495 individually. */ 1496 case TYPE_DIV: 1497 if (divmod_p (producer)) 1498 { 1499 rtx def_reg1 = SET_DEST (parallel_element (producer, 0)); 1500 rtx def_reg2 = SET_DEST (parallel_element (producer, 1)); 1501 1502 return (n9_3r2w_consumed_by_ex_dep_p (consumer, def_reg1) 1503 || n9_3r2w_consumed_by_ex_dep_p (consumer, def_reg2)); 1504 } 1505 1506 def_reg = SET_DEST (PATTERN (producer)); 1507 break; 1508 1509 default: 1510 gcc_unreachable (); 1511 } 1512 1513 return n9_3r2w_consumed_by_ex_dep_p (consumer, def_reg); 1514 } 1515 1516 /* Check dependencies from LMW(N, N) to EX. */ 1517 bool 1518 nds32_n9_last_load_to_ex_p (rtx_insn *producer, rtx_insn *consumer) 1519 { 1520 rtx last_def_reg = extract_nth_access_reg (producer, -1); 1521 1522 if (nds32_register_ports_config == REG_PORT_2R1W) 1523 { 1524 /* The base-update micro operation occupies the last cycle. */ 1525 if (post_update_insn_p (producer)) 1526 return false; 1527 1528 /* When the base register is in the list of a load multiple insn and the 1529 access order of the base register is not the last one, we need an 1530 additional micro operation to commit the load result to the base 1531 register -- we can treat the base register as the last defined 1532 register. */ 1533 size_t i; 1534 size_t n_elems = parallel_elements (producer); 1535 rtx base_reg = extract_base_reg (producer); 1536 1537 for (i = 0; i < n_elems; ++i) 1538 { 1539 rtx load_rtx = extract_nth_access_rtx (producer, i); 1540 rtx list_element = SET_DEST (load_rtx); 1541 1542 if (rtx_equal_p (base_reg, list_element) && i != n_elems - 1) 1543 { 1544 last_def_reg = base_reg; 1545 break; 1546 } 1547 } 1548 1549 return n9_2r1w_consumed_by_ex_dep_p (consumer, last_def_reg); 1550 } 1551 else 1552 return n9_3r2w_consumed_by_ex_dep_p (consumer, last_def_reg); 1553 } 1554 1555 /* Guard functions for N10 cores. */ 1556 1557 /* Check dependencies from EX to EX (ADDR_OUT -> ADDR_IN). */ 1558 bool 1559 nds32_n10_ex_to_ex_p (rtx_insn *producer, rtx_insn *consumer) 1560 { 1561 gcc_assert (get_attr_type (producer) == TYPE_FLOAD 1562 || get_attr_type (producer) == TYPE_FSTORE); 1563 gcc_assert (get_attr_type (consumer) == TYPE_FLOAD 1564 || get_attr_type (consumer) == TYPE_FSTORE); 1565 1566 if (!post_update_insn_p (producer)) 1567 return false; 1568 1569 return reg_overlap_p (extract_base_reg (producer), 1570 extract_mem_rtx (consumer)); 1571 } 1572 1573 /* Check dependencies from MM to EX. */ 1574 bool 1575 nds32_n10_mm_to_ex_p (rtx_insn *producer, rtx_insn *consumer) 1576 { 1577 rtx def_reg; 1578 1579 switch (get_attr_type (producer)) 1580 { 1581 case TYPE_LOAD: 1582 case TYPE_MUL: 1583 case TYPE_MAC: 1584 case TYPE_DALU64: 1585 case TYPE_DMUL: 1586 case TYPE_DMAC: 1587 case TYPE_DALUROUND: 1588 case TYPE_DBPICK: 1589 case TYPE_DWEXT: 1590 def_reg = SET_DEST (PATTERN (producer)); 1591 break; 1592 1593 /* Some special instructions, divmodsi4 and udivmodsi4, produce two 1594 results, the quotient and the remainder. We have to handle them 1595 individually. */ 1596 case TYPE_DIV: 1597 if (divmod_p (producer)) 1598 { 1599 rtx def_reg1 = SET_DEST (parallel_element (producer, 0)); 1600 rtx def_reg2 = SET_DEST (parallel_element (producer, 1)); 1601 1602 return (n10_consumed_by_ex_dep_p (consumer, def_reg1) 1603 || n10_consumed_by_ex_dep_p (consumer, def_reg2)); 1604 } 1605 1606 def_reg = SET_DEST (PATTERN (producer)); 1607 break; 1608 1609 default: 1610 gcc_unreachable (); 1611 } 1612 1613 return n10_consumed_by_ex_dep_p (consumer, def_reg); 1614 } 1615 1616 /* Check dependencies from LMW(N, N) to EX. */ 1617 bool 1618 nds32_n10_last_load_to_ex_p (rtx_insn *producer, rtx_insn *consumer) 1619 { 1620 rtx last_def_reg = extract_nth_access_reg (producer, -1); 1621 1622 return n10_consumed_by_ex_dep_p (consumer, last_def_reg); 1623 } 1624 1625 /* Guard functions for Graywolf cores. */ 1626 1627 /* Check dependencies from EX to EX (ADDR_OUT -> ADDR_IN). */ 1628 bool 1629 nds32_gw_ex_to_ex_p (rtx_insn *producer, rtx_insn *consumer) 1630 { 1631 return nds32_n10_ex_to_ex_p (producer, consumer); 1632 } 1633 1634 /* Check dependencies from MM to EX. */ 1635 bool 1636 nds32_gw_mm_to_ex_p (rtx_insn *producer, rtx_insn *consumer) 1637 { 1638 rtx def_reg; 1639 1640 switch (get_attr_type (producer)) 1641 { 1642 case TYPE_LOAD: 1643 case TYPE_MUL: 1644 case TYPE_MAC: 1645 case TYPE_DALU64: 1646 case TYPE_DMUL: 1647 case TYPE_DMAC: 1648 case TYPE_DALUROUND: 1649 case TYPE_DBPICK: 1650 case TYPE_DWEXT: 1651 def_reg = SET_DEST (PATTERN (producer)); 1652 break; 1653 1654 /* Some special instructions, divmodsi4 and udivmodsi4, produce two 1655 results, the quotient and the remainder. We have to handle them 1656 individually. */ 1657 case TYPE_DIV: 1658 if (divmod_p (producer)) 1659 { 1660 rtx def_reg1 = SET_DEST (parallel_element (producer, 0)); 1661 rtx def_reg2 = SET_DEST (parallel_element (producer, 1)); 1662 1663 return (gw_consumed_by_ex_dep_p (consumer, def_reg1) 1664 || gw_consumed_by_ex_dep_p (consumer, def_reg2)); 1665 } 1666 1667 def_reg = SET_DEST (PATTERN (producer)); 1668 break; 1669 1670 default: 1671 gcc_unreachable (); 1672 } 1673 1674 return gw_consumed_by_ex_dep_p (consumer, def_reg); 1675 } 1676 1677 /* Check dependencies from LMW(N, N) to EX. */ 1678 bool 1679 nds32_gw_last_load_to_ex_p (rtx_insn *producer, rtx_insn *consumer) 1680 { 1681 rtx last_def_reg = extract_nth_access_reg (producer, -1); 1682 1683 return gw_consumed_by_ex_dep_p (consumer, last_def_reg); 1684 } 1685 1686 /* Guard functions for N12/N13 cores. */ 1687 1688 /* Check dependencies from E2 to E1. */ 1689 bool 1690 nds32_n13_e2_to_e1_p (rtx_insn *producer, rtx_insn *consumer) 1691 { 1692 rtx def_reg; 1693 1694 switch (get_attr_type (producer)) 1695 { 1696 /* Only post-update load/store instructions are considered. These 1697 instructions produces address output at E2. */ 1698 case TYPE_LOAD: 1699 case TYPE_STORE: 1700 case TYPE_LOAD_MULTIPLE: 1701 case TYPE_STORE_MULTIPLE: 1702 if (!post_update_insn_p (producer)) 1703 return false; 1704 1705 def_reg = extract_base_reg (producer); 1706 break; 1707 1708 case TYPE_ALU: 1709 case TYPE_ALU_SHIFT: 1710 case TYPE_PBSAD: 1711 case TYPE_PBSADA: 1712 case TYPE_MUL: 1713 case TYPE_MAC: 1714 def_reg = SET_DEST (PATTERN (producer)); 1715 break; 1716 1717 case TYPE_BRANCH: 1718 return true; 1719 1720 case TYPE_DIV: 1721 /* Some special instructions, divmodsi4 and udivmodsi4, produce two 1722 results, the quotient and the remainder. We have to handle them 1723 individually. */ 1724 if (divmod_p (producer)) 1725 { 1726 rtx def_reg1 = SET_DEST (parallel_element (producer, 0)); 1727 rtx def_reg2 = SET_DEST (parallel_element (producer, 1)); 1728 1729 return (n13_consumed_by_e1_dep_p (consumer, def_reg1) 1730 || n13_consumed_by_e1_dep_p (consumer, def_reg2)); 1731 } 1732 1733 def_reg = SET_DEST (PATTERN (producer)); 1734 break; 1735 1736 default: 1737 gcc_unreachable (); 1738 } 1739 1740 return n13_consumed_by_e1_dep_p (consumer, def_reg); 1741 } 1742 1743 /* Check dependencies from Load-Store Unit (E3) to E1. */ 1744 bool 1745 nds32_n13_load_to_e1_p (rtx_insn *producer, rtx_insn *consumer) 1746 { 1747 rtx def_reg = SET_DEST (PATTERN (producer)); 1748 1749 gcc_assert (get_attr_type (producer) == TYPE_LOAD); 1750 gcc_assert (REG_P (def_reg) || GET_CODE (def_reg) == SUBREG); 1751 1752 return n13_consumed_by_e1_dep_p (consumer, def_reg); 1753 } 1754 1755 /* Check dependencies from Load-Store Unit (E3) to E2. */ 1756 bool 1757 nds32_n13_load_to_e2_p (rtx_insn *producer, rtx_insn *consumer) 1758 { 1759 rtx def_reg = SET_DEST (PATTERN (producer)); 1760 1761 gcc_assert (get_attr_type (producer) == TYPE_LOAD); 1762 gcc_assert (REG_P (def_reg) || GET_CODE (def_reg) == SUBREG); 1763 1764 return n13_consumed_by_e2_dep_p (consumer, def_reg); 1765 } 1766 1767 /* Check dependencies from LMW(N, N) to E1. */ 1768 bool 1769 nds32_n13_last_load_to_e1_p (rtx_insn *producer, rtx_insn *consumer) 1770 { 1771 rtx last_def_reg = extract_nth_access_reg (producer, -1); 1772 1773 return n13_consumed_by_e1_dep_p (consumer, last_def_reg); 1774 } 1775 1776 /* Check dependencies from LMW(N, N) to E2. */ 1777 bool 1778 nds32_n13_last_load_to_e2_p (rtx_insn *producer, rtx_insn *consumer) 1779 { 1780 rtx last_def_reg = extract_nth_access_reg (producer, -1); 1781 1782 return n13_consumed_by_e2_dep_p (consumer, last_def_reg); 1783 } 1784 1785 /* Check dependencies from LMW(N, N-1) to E2. */ 1786 bool 1787 nds32_n13_last_two_load_to_e1_p (rtx_insn *producer, rtx_insn *consumer) 1788 { 1789 rtx last_two_def_reg = extract_nth_access_reg (producer, -2); 1790 1791 if (last_two_def_reg == NULL_RTX) 1792 return false; 1793 1794 return n13_consumed_by_e1_dep_p (consumer, last_two_def_reg); 1795 } 1796 /* ------------------------------------------------------------------------ */ 1797