1;; Copyright (C) 2007-2020 Free Software Foundation, Inc. 2;; 3;; This file is part of GCC. 4;; 5;; GCC is free software; you can redistribute it and/or modify 6;; it under the terms of the GNU General Public License as published by 7;; the Free Software Foundation; either version 3, or (at your option) 8;; any later version. 9;; 10;; GCC is distributed in the hope that it will be useful, 11;; but WITHOUT ANY WARRANTY; without even the implied warranty of 12;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13;; GNU General Public License for more details. 14;; 15;; You should have received a copy of the GNU General Public License 16;; along with GCC; see the file COPYING3. If not see 17;; <http://www.gnu.org/licenses/>. */ 18;; 19;; ......................... 20;; 21;; DFA-based pipeline description for MIPS64 model R20Kc. 22;; Contributed by Jason Eckhardt (jle@cygnus.com). 23;; 24;; The R20Kc is a dual-issue processor that can generally bundle 25;; instructions as follows: 26;; 1. integer with integer 27;; 2. integer with fp 28;; 3. fp with fpload/fpstore 29;; 30;; Of course, there are various restrictions. 31;; Reference: 32;; "Ruby (R20K) Technical Specification Rev. 1.2, December 28, 1999." 33;; 34;; ......................... 35 36;; Use three automata to isolate long latency operations, reducing space. 37(define_automaton "r20kc_other, r20kc_fdiv, r20kc_idiv") 38 39;; 40;; Describe the resources. 41;; 42 43;; Global. 44(define_cpu_unit "r20kc_iss0, r20kc_iss1" "r20kc_other") 45 46;; Integer execution unit (pipeline A). 47(define_cpu_unit "r20kc_ixua_addsub_agen" "r20kc_other") 48(define_cpu_unit "r20kc_ixua_shift" "r20kc_other") 49 50(exclusion_set "r20kc_ixua_addsub_agen" "r20kc_ixua_shift") 51 52;; Integer execution unit (pipeline B). 53(define_cpu_unit "r20kc_ixub_addsub" "r20kc_other") 54(define_cpu_unit "r20kc_ixub_branch" "r20kc_other") 55(define_cpu_unit "r20kc_ixub_mpydiv" "r20kc_other") 56(define_cpu_unit "r20kc_ixub_mpydiv_iter" "r20kc_idiv") 57 58(exclusion_set "r20kc_ixub_addsub" "r20kc_ixub_branch, r20kc_ixub_mpydiv") 59(exclusion_set "r20kc_ixub_branch" "r20kc_ixub_mpydiv") 60 61;; Cache / memory interface. 62(define_cpu_unit "r20kc_cache" "r20kc_other") 63 64;; Floating-point unit. 65(define_cpu_unit "r20kc_fpu_add" "r20kc_other") 66(define_cpu_unit "r20kc_fpu_mpy" "r20kc_other") 67(define_cpu_unit "r20kc_fpu_mpy_iter" "r20kc_fdiv") 68(define_cpu_unit "r20kc_fpu_divsqrt" "r20kc_other") 69(define_cpu_unit "r20kc_fpu_divsqrt_iter" "r20kc_fdiv") 70 71(exclusion_set "r20kc_fpu_add" "r20kc_fpu_mpy, r20kc_fpu_divsqrt") 72(exclusion_set "r20kc_fpu_mpy" "r20kc_fpu_divsqrt") 73 74;; After branch any insn cannot be issued. 75(absence_set "r20kc_iss0,r20kc_iss1" "r20kc_ixub_branch") 76 77;; 78;; Define reservations for unit name mnemonics or combinations. 79;; 80 81(define_reservation "r20kc_iss" 82 "r20kc_iss0|r20kc_iss1") 83(define_reservation "r20kc_single_dispatch" 84 "r20kc_iss0+r20kc_iss1") 85(define_reservation "r20kc_iaddsub" 86 "r20kc_iss+(r20kc_ixua_addsub_agen|r20kc_ixub_addsub)") 87(define_reservation "r20kc_ishift" 88 "r20kc_iss+r20kc_ixua_shift") 89(define_reservation "r20kc_fpmove" 90 "r20kc_iss+r20kc_ixua_addsub_agen") 91(define_reservation "r20kc_imem" 92 "r20kc_iss+r20kc_ixua_addsub_agen+r20kc_cache") 93(define_reservation "r20kc_icache" 94 "r20kc_cache") 95(define_reservation "r20kc_impydiv" 96 "r20kc_iss+r20kc_ixub_mpydiv") 97(define_reservation "r20kc_impydiv_iter" 98 "r20kc_ixub_mpydiv_iter") 99(define_reservation "r20kc_ibranch" 100 "r20kc_iss+r20kc_ixub_branch") 101 102(define_reservation "r20kc_fpadd" 103 "r20kc_iss+r20kc_fpu_add") 104(define_reservation "r20kc_fpmpy" 105 "r20kc_iss+r20kc_fpu_mpy") 106(define_reservation "r20kc_fpmpy_iter" 107 "r20kc_fpu_mpy_iter") 108(define_reservation "r20kc_fpdivsqrt" 109 "r20kc_iss+r20kc_fpu_divsqrt") 110(define_reservation "r20kc_fpdivsqrt_iter" 111 "r20kc_fpu_divsqrt_iter") 112 113;; 114;; Describe instruction reservations for integer operations. 115;; 116 117;; Conditional moves always force single-dispatch. 118(define_insn_reservation "r20kc_cond_move_int" 1 119 (and (eq_attr "cpu" "20kc") 120 (and (eq_attr "type" "condmove") 121 (eq_attr "mode" "!SF,DF"))) 122 "r20kc_single_dispatch") 123 124(define_insn_reservation "r20kc_cond_move_fp" 4 125 (and (eq_attr "cpu" "20kc") 126 (and (eq_attr "type" "condmove") 127 (eq_attr "mode" "SF,DF"))) 128 "r20kc_single_dispatch") 129 130(define_insn_reservation "r20kc_int_other" 1 131 (and (eq_attr "cpu" "20kc") 132 (eq_attr "type" "move,arith,const,nop")) 133 "r20kc_iaddsub") 134 135;; Shifts can only execute on ixu pipeline A. 136(define_insn_reservation "r20kc_int_shift" 1 137 (and (eq_attr "cpu" "20kc") 138 (eq_attr "type" "shift")) 139 "r20kc_ishift") 140 141(define_insn_reservation "r20kc_ld" 2 142 (and (eq_attr "cpu" "20kc") 143 (eq_attr "type" "load,prefetch,prefetchx")) 144 "r20kc_imem") 145 146 147;; A load immediately following a store will stall, so 148;; say that a store uses the cache for an extra cycle. 149(define_insn_reservation "r20kc_st" 2 150 (and (eq_attr "cpu" "20kc") 151 (eq_attr "type" "store")) 152 "r20kc_imem,r20kc_icache") 153 154(define_insn_reservation "r20kc_fld" 3 155 (and (eq_attr "cpu" "20kc") 156 (eq_attr "type" "fpload")) 157 "r20kc_imem") 158 159(define_insn_reservation "r20kc_ffst" 3 160 (and (eq_attr "cpu" "20kc") 161 (eq_attr "type" "fpstore")) 162 "r20kc_imem,r20kc_icache*2") 163 164;; Integer divide latency is between 13 and 42 cycles for DIV[U] and between 165;; 13 and 72 cycles for DDIV[U]. This depends on the value of the inputs 166;; so we just choose the worst case latency. 167(define_insn_reservation "r20kc_idiv_si" 42 168 (and (eq_attr "cpu" "20kc") 169 (and (eq_attr "type" "idiv") 170 (eq_attr "mode" "SI"))) 171 "r20kc_impydiv+(r20kc_impydiv_iter*42)") 172 173(define_insn_reservation "r20kc_idiv_di" 72 174 (and (eq_attr "cpu" "20kc") 175 (and (eq_attr "type" "idiv") 176 (eq_attr "mode" "DI"))) 177 "r20kc_impydiv+(r20kc_impydiv_iter*72)") 178 179;; Integer multiply latency is 4 or 7 cycles for word and double-word 180;; respectively. 181(define_insn_reservation "r20kc_impy_si" 4 182 (and (eq_attr "cpu" "20kc") 183 (and (eq_attr "type" "imadd,imul,imul3") 184 (eq_attr "mode" "SI"))) 185 "r20kc_impydiv+(r20kc_impydiv_iter*2)") 186 187(define_insn_reservation "r20kc_impy_di" 7 188 (and (eq_attr "cpu" "20kc") 189 (and (eq_attr "type" "imadd,imul,imul3") 190 (eq_attr "mode" "DI"))) 191 "r20kc_impydiv+(r20kc_impydiv_iter*7)") 192 193;; Move to/from HI/LO. 194;; Moving to HI/LO has a 3 cycle latency while moving from only has a 1 195;; cycle latency. Repeat rate is 3 for both. 196(define_insn_reservation "r20kc_imthilo" 3 197 (and (eq_attr "cpu" "20kc") 198 (eq_attr "type" "mthi,mtlo")) 199 "r20kc_impydiv+(r20kc_impydiv_iter*3)") 200 201(define_insn_reservation "r20kc_imfhilo" 1 202 (and (eq_attr "cpu" "20kc") 203 (eq_attr "type" "mfhi,mflo")) 204 "r20kc_impydiv+(r20kc_impydiv_iter*3)") 205 206;; Move to fp coprocessor. 207(define_insn_reservation "r20kc_ixfer_mt" 3 208 (and (eq_attr "cpu" "20kc") 209 (eq_attr "type" "mtc")) 210 "r20kc_fpmove") 211 212;; Move from fp coprocessor. 213(define_insn_reservation "r20kc_ixfer_mf" 2 214 (and (eq_attr "cpu" "20kc") 215 (eq_attr "type" "mfc")) 216 "r20kc_fpmove") 217 218;; Assume branch predicted correctly. 219(define_insn_reservation "r20kc_ibr" 1 220 (and (eq_attr "cpu" "20kc") 221 (eq_attr "type" "branch,jump,call")) 222 "r20kc_ibranch") 223 224;; 225;; Describe instruction reservations for the floating-point operations. 226;; 227(define_insn_reservation "r20kc_fp_other" 4 228 (and (eq_attr "cpu" "20kc") 229 (eq_attr "type" "fmove,fadd,fabs,fneg,fcmp")) 230 "r20kc_fpadd") 231 232(define_insn_reservation "r20kc_fp_cvt_a" 4 233 (and (eq_attr "cpu" "20kc") 234 (and (eq_attr "type" "fcvt") 235 (eq_attr "cnv_mode" "I2S,I2D,S2D"))) 236 "r20kc_fpadd") 237 238(define_insn_reservation "r20kc_fp_cvt_b" 5 239 (and (eq_attr "cpu" "20kc") 240 (and (eq_attr "type" "fcvt") 241 (eq_attr "cnv_mode" "D2S,S2I"))) 242 "r20kc_fpadd") 243 244(define_insn_reservation "r20kc_fp_divsqrt_df" 32 245 (and (eq_attr "cpu" "20kc") 246 (and (eq_attr "type" "fdiv,fsqrt") 247 (eq_attr "mode" "DF"))) 248 "r20kc_fpdivsqrt+(r20kc_fpdivsqrt_iter*32)") 249 250(define_insn_reservation "r20kc_fp_divsqrt_sf" 17 251 (and (eq_attr "cpu" "20kc") 252 (and (eq_attr "type" "fdiv,fsqrt") 253 (eq_attr "mode" "SF"))) 254 "r20kc_fpdivsqrt+(r20kc_fpdivsqrt_iter*17)") 255 256(define_insn_reservation "r20kc_fp_rsqrt_df" 35 257 (and (eq_attr "cpu" "20kc") 258 (and (eq_attr "type" "frsqrt") 259 (eq_attr "mode" "DF"))) 260 "r20kc_fpdivsqrt+(r20kc_fpdivsqrt_iter*35)") 261 262(define_insn_reservation "r20kc_fp_rsqrt_sf" 17 263 (and (eq_attr "cpu" "20kc") 264 (and (eq_attr "type" "frsqrt") 265 (eq_attr "mode" "SF"))) 266 "r20kc_fpdivsqrt+(r20kc_fpdivsqrt_iter*17)") 267 268(define_insn_reservation "r20kc_fp_mpy_sf" 4 269 (and (eq_attr "cpu" "20kc") 270 (and (eq_attr "type" "fmul,fmadd") 271 (eq_attr "mode" "SF"))) 272 "r20kc_fpmpy+r20kc_fpmpy_iter") 273 274(define_insn_reservation "r20kc_fp_mpy_df" 5 275 (and (eq_attr "cpu" "20kc") 276 (and (eq_attr "type" "fmul,fmadd") 277 (eq_attr "mode" "DF"))) 278 "r20kc_fpmpy+(r20kc_fpmpy_iter*2)") 279 280;; Force single-dispatch for unknown or multi. 281(define_insn_reservation "r20kc_unknown" 1 282 (and (eq_attr "cpu" "20kc") 283 (eq_attr "type" "unknown,multi,atomic,syncloop")) 284 "r20kc_single_dispatch") 285