1;; ARM 926EJ-S Pipeline Description 2;; Copyright (C) 2003-2013 Free Software Foundation, Inc. 3;; Written by CodeSourcery, LLC. 4;; 5;; This file is part of GCC. 6;; 7;; GCC is free software; you can redistribute it and/or modify it 8;; under the terms of the GNU General Public License as published by 9;; the Free Software Foundation; either version 3, or (at your option) 10;; any later version. 11;; 12;; GCC is distributed in the hope that it will be useful, but 13;; WITHOUT ANY WARRANTY; without even the implied warranty of 14;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15;; General Public License for more details. 16;; 17;; You should have received a copy of the GNU General Public License 18;; along with GCC; see the file COPYING3. If not see 19;; <http://www.gnu.org/licenses/>. */ 20 21;; These descriptions are based on the information contained in the 22;; ARM926EJ-S Technical Reference Manual, Copyright (c) 2002 ARM 23;; Limited. 24;; 25 26;; This automaton provides a pipeline description for the ARM 27;; 926EJ-S core. 28;; 29;; The model given here assumes that the condition for all conditional 30;; instructions is "true", i.e., that all of the instructions are 31;; actually executed. 32 33(define_automaton "arm926ejs") 34 35;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 36;; Pipelines 37;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 38 39;; There is a single pipeline 40;; 41;; The ALU pipeline has fetch, decode, execute, memory, and 42;; write stages. We only need to model the execute, memory and write 43;; stages. 44 45(define_cpu_unit "e,m,w" "arm926ejs") 46 47;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 48;; ALU Instructions 49;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 50 51;; ALU instructions require three cycles to execute, and use the ALU 52;; pipeline in each of the three stages. The results are available 53;; after the execute stage stage has finished. 54;; 55;; If the destination register is the PC, the pipelines are stalled 56;; for several cycles. That case is not modeled here. 57 58;; ALU operations with no shifted operand 59(define_insn_reservation "9_alu_op" 1 60 (and (eq_attr "tune" "arm926ejs") 61 (eq_attr "type" "alu_reg,simple_alu_imm,simple_alu_shift,alu_shift")) 62 "e,m,w") 63 64;; ALU operations with a shift-by-register operand 65;; These really stall in the decoder, in order to read 66;; the shift value in a second cycle. Pretend we take two cycles in 67;; the execute stage. 68(define_insn_reservation "9_alu_shift_reg_op" 2 69 (and (eq_attr "tune" "arm926ejs") 70 (eq_attr "type" "alu_shift_reg")) 71 "e*2,m,w") 72 73;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 74;; Multiplication Instructions 75;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 76 77;; Multiplication instructions loop in the execute stage until the 78;; instruction has been passed through the multiplier array enough 79;; times. Multiply operations occur in both the execute and memory 80;; stages of the pipeline 81 82(define_insn_reservation "9_mult1" 3 83 (and (eq_attr "tune" "arm926ejs") 84 (eq_attr "insn" "smlalxy,mul,mla")) 85 "e*2,m,w") 86 87(define_insn_reservation "9_mult2" 4 88 (and (eq_attr "tune" "arm926ejs") 89 (eq_attr "insn" "muls,mlas")) 90 "e*3,m,w") 91 92(define_insn_reservation "9_mult3" 4 93 (and (eq_attr "tune" "arm926ejs") 94 (eq_attr "insn" "umull,umlal,smull,smlal")) 95 "e*3,m,w") 96 97(define_insn_reservation "9_mult4" 5 98 (and (eq_attr "tune" "arm926ejs") 99 (eq_attr "insn" "umulls,umlals,smulls,smlals")) 100 "e*4,m,w") 101 102(define_insn_reservation "9_mult5" 2 103 (and (eq_attr "tune" "arm926ejs") 104 (eq_attr "insn" "smulxy,smlaxy,smlawx")) 105 "e,m,w") 106 107(define_insn_reservation "9_mult6" 3 108 (and (eq_attr "tune" "arm926ejs") 109 (eq_attr "insn" "smlalxy")) 110 "e*2,m,w") 111 112;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 113;; Load/Store Instructions 114;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 115 116;; The models for load/store instructions do not accurately describe 117;; the difference between operations with a base register writeback 118;; (such as "ldm!"). These models assume that all memory references 119;; hit in dcache. 120 121;; Loads with a shifted offset take 3 cycles, and are (a) probably the 122;; most common and (b) the pessimistic assumption will lead to fewer stalls. 123(define_insn_reservation "9_load1_op" 3 124 (and (eq_attr "tune" "arm926ejs") 125 (eq_attr "type" "load1,load_byte")) 126 "e*2,m,w") 127 128(define_insn_reservation "9_store1_op" 0 129 (and (eq_attr "tune" "arm926ejs") 130 (eq_attr "type" "store1")) 131 "e,m,w") 132 133;; multiple word loads and stores 134(define_insn_reservation "9_load2_op" 3 135 (and (eq_attr "tune" "arm926ejs") 136 (eq_attr "type" "load2")) 137 "e,m*2,w") 138 139(define_insn_reservation "9_load3_op" 4 140 (and (eq_attr "tune" "arm926ejs") 141 (eq_attr "type" "load3")) 142 "e,m*3,w") 143 144(define_insn_reservation "9_load4_op" 5 145 (and (eq_attr "tune" "arm926ejs") 146 (eq_attr "type" "load4")) 147 "e,m*4,w") 148 149(define_insn_reservation "9_store2_op" 0 150 (and (eq_attr "tune" "arm926ejs") 151 (eq_attr "type" "store2")) 152 "e,m*2,w") 153 154(define_insn_reservation "9_store3_op" 0 155 (and (eq_attr "tune" "arm926ejs") 156 (eq_attr "type" "store3")) 157 "e,m*3,w") 158 159(define_insn_reservation "9_store4_op" 0 160 (and (eq_attr "tune" "arm926ejs") 161 (eq_attr "type" "store4")) 162 "e,m*4,w") 163 164;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 165;; Branch and Call Instructions 166;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 167 168;; Branch instructions are difficult to model accurately. The ARM 169;; core can predict most branches. If the branch is predicted 170;; correctly, and predicted early enough, the branch can be completely 171;; eliminated from the instruction stream. Some branches can 172;; therefore appear to require zero cycles to execute. We assume that 173;; all branches are predicted correctly, and that the latency is 174;; therefore the minimum value. 175 176(define_insn_reservation "9_branch_op" 0 177 (and (eq_attr "tune" "arm926ejs") 178 (eq_attr "type" "branch")) 179 "nothing") 180 181;; The latency for a call is not predictable. Therefore, we use 32 as 182;; roughly equivalent to positive infinity. 183 184(define_insn_reservation "9_call_op" 32 185 (and (eq_attr "tune" "arm926ejs") 186 (eq_attr "type" "call")) 187 "nothing") 188