1;; Faraday FA626TE Pipeline Description 2;; Copyright (C) 2010-2019 Free Software Foundation, Inc. 3;; Written by Mingfeng Wu, based on ARM926EJ-S Pipeline Description. 4;; 5;; This file is part of GCC. 6;; 7;; GCC is free software; you can redistribute it and/or modify it under 8;; the terms of the GNU General Public License as published by the Free 9;; Software Foundation; either version 3, or (at your option) any later 10;; version. 11;; 12;; GCC is distributed in the hope that it will be useful, but WITHOUT ANY 13;; WARRANTY; without even the implied warranty of MERCHANTABILITY or 14;; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 15;; for more details. 16;; 17;; You should have received a copy of the GNU General Public License 18;; along with GCC; see the file COPYING3. If not see 19;; <http://www.gnu.org/licenses/>. */ 20 21;; These descriptions are based on the information contained in the 22;; FMP626 Core Design Note, Copyright (c) 2010 Faraday Technology Corp. 23 24;; Pipeline architecture 25;; S E M W(Q1) Q2 26;; ___________________________________________ 27;; shifter alu 28;; mul1 mul2 mul3 29;; ld/st1 ld/st2 ld/st3 ld/st4 ld/st5 30 31;; This automaton provides a pipeline description for the Faraday 32;; FMP626 core. 33;; 34;; The model given here assumes that the condition for all conditional 35;; instructions is "true", i.e., that all of the instructions are 36;; actually executed. 37 38(define_automaton "fmp626") 39 40;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 41;; Pipelines 42;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 43 44;; There is a single pipeline 45;; 46;; The ALU pipeline has fetch, decode, execute, memory, and 47;; write stages. We only need to model the execute, memory and write 48;; stages. 49 50(define_cpu_unit "fmp626_core" "fmp626") 51 52;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 53;; ALU Instructions 54;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 55 56;; ALU instructions require two cycles to execute, and use the ALU 57;; pipeline in each of the three stages. The results are available 58;; after the execute stage has finished. 59;; 60;; If the destination register is the PC, the pipelines are stalled 61;; for several cycles. That case is not modeled here. 62 63;; ALU operations 64(define_insn_reservation "mp626_alu_op" 1 65 (and (eq_attr "tune" "fmp626") 66 (eq_attr "type" "alu_imm,alus_imm,alu_sreg,alus_sreg,\ 67 logic_imm,logics_imm,logic_reg,logics_reg,\ 68 adc_imm,adcs_imm,adc_reg,adcs_reg,\ 69 adr,bfm,rev,\ 70 shift_imm,shift_reg,\ 71 mov_imm,mov_reg,mvn_imm,mvn_reg")) 72 "fmp626_core") 73 74(define_insn_reservation "mp626_alu_shift_op" 2 75 (and (eq_attr "tune" "fmp626") 76 (eq_attr "type" "alu_shift_imm,logic_shift_imm,alus_shift_imm,logics_shift_imm,\ 77 alu_shift_reg,logic_shift_reg,alus_shift_reg,logics_shift_reg,\ 78 extend,\ 79 mov_shift,mov_shift_reg,\ 80 mvn_shift,mvn_shift_reg")) 81 "fmp626_core") 82 83;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 84;; Multiplication Instructions 85;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 86 87(define_insn_reservation "mp626_mult1" 2 88 (and (eq_attr "tune" "fmp626") 89 (eq_attr "type" "smulwy,smlawy,smulxy,smlaxy")) 90 "fmp626_core") 91 92(define_insn_reservation "mp626_mult2" 2 93 (and (eq_attr "tune" "fmp626") 94 (eq_attr "type" "mul,mla")) 95 "fmp626_core") 96 97(define_insn_reservation "mp626_mult3" 3 98 (and (eq_attr "tune" "fmp626") 99 (eq_attr "type" "muls,mlas,smull,smlal,umull,umlal,smlalxy,smlawx")) 100 "fmp626_core*2") 101 102(define_insn_reservation "mp626_mult4" 4 103 (and (eq_attr "tune" "fmp626") 104 (eq_attr "type" "smulls,smlals,umulls,umlals")) 105 "fmp626_core*3") 106 107;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 108;; Load/Store Instructions 109;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 110 111;; The models for load/store instructions do not accurately describe 112;; the difference between operations with a base register writeback 113;; (such as "ldm!"). These models assume that all memory references 114;; hit in dcache. 115 116(define_insn_reservation "mp626_load1_op" 5 117 (and (eq_attr "tune" "fmp626") 118 (eq_attr "type" "load_4,load_byte")) 119 "fmp626_core") 120 121(define_insn_reservation "mp626_load2_op" 6 122 (and (eq_attr "tune" "fmp626") 123 (eq_attr "type" "load_8,load_12")) 124 "fmp626_core*2") 125 126(define_insn_reservation "mp626_load3_op" 7 127 (and (eq_attr "tune" "fmp626") 128 (eq_attr "type" "load_16")) 129 "fmp626_core*3") 130 131(define_insn_reservation "mp626_store1_op" 0 132 (and (eq_attr "tune" "fmp626") 133 (eq_attr "type" "store_4")) 134 "fmp626_core") 135 136(define_insn_reservation "mp626_store2_op" 1 137 (and (eq_attr "tune" "fmp626") 138 (eq_attr "type" "store_8,store_12")) 139 "fmp626_core*2") 140 141(define_insn_reservation "mp626_store3_op" 2 142 (and (eq_attr "tune" "fmp626") 143 (eq_attr "type" "store_16")) 144 "fmp626_core*3") 145 146(define_bypass 1 "mp626_load1_op,mp626_load2_op,mp626_load3_op" 147 "mp626_store1_op,mp626_store2_op,mp626_store3_op" 148 "arm_no_early_store_addr_dep") 149(define_bypass 1 "mp626_alu_op,mp626_alu_shift_op,mp626_mult1,mp626_mult2,\ 150 mp626_mult3,mp626_mult4" "mp626_store1_op" 151 "arm_no_early_store_addr_dep") 152(define_bypass 1 "mp626_alu_shift_op" "mp626_alu_op") 153(define_bypass 1 "mp626_alu_shift_op" "mp626_alu_shift_op" 154 "arm_no_early_alu_shift_dep") 155(define_bypass 1 "mp626_mult1,mp626_mult2" "mp626_alu_shift_op" 156 "arm_no_early_alu_shift_dep") 157(define_bypass 2 "mp626_mult3" "mp626_alu_shift_op" 158 "arm_no_early_alu_shift_dep") 159(define_bypass 3 "mp626_mult4" "mp626_alu_shift_op" 160 "arm_no_early_alu_shift_dep") 161(define_bypass 1 "mp626_mult1,mp626_mult2" "mp626_alu_op") 162(define_bypass 2 "mp626_mult3" "mp626_alu_op") 163(define_bypass 3 "mp626_mult4" "mp626_alu_op") 164(define_bypass 4 "mp626_load1_op" "mp626_alu_op") 165(define_bypass 5 "mp626_load2_op" "mp626_alu_op") 166(define_bypass 6 "mp626_load3_op" "mp626_alu_op") 167 168;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 169;; Branch and Call Instructions 170;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 171 172;; Branch instructions are difficult to model accurately. The FMP626 173;; core can predict most branches. If the branch is predicted 174;; correctly, and predicted early enough, the branch can be completely 175;; eliminated from the instruction stream. Some branches can 176;; therefore appear to require zero cycle to execute. We assume that 177;; all branches are predicted correctly, and that the latency is 178;; therefore the minimum value. 179 180(define_insn_reservation "mp626_branch_op" 0 181 (and (eq_attr "tune" "fmp626") 182 (eq_attr "type" "branch")) 183 "fmp626_core") 184 185;; The latency for a call is actually the latency when the result is available. 186;; i.e. R0 ready for int return value. 187(define_insn_reservation "mp626_call_op" 1 188 (and (eq_attr "tune" "fmp626") 189 (eq_attr "type" "call")) 190 "fmp626_core") 191 192