1;; Cavium ThunderX pipeline description 2;; Copyright (C) 2014-2015 Free Software Foundation, Inc. 3;; 4;; Written by Andrew Pinski <apinski@cavium.com> 5 6;; This file is part of GCC. 7 8;; GCC is free software; you can redistribute it and/or modify 9;; it under the terms of the GNU General Public License as published by 10;; the Free Software Foundation; either version 3, or (at your option) 11;; any later version. 12 13;; GCC is distributed in the hope that it will be useful, 14;; but WITHOUT ANY WARRANTY; without even the implied warranty of 15;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16;; GNU General Public License for more details. 17 18;; You should have received a copy of the GNU General Public License 19;; along with GCC; see the file COPYING3. If not see 20;; <http://www.gnu.org/licenses/>. 21 22 23;; Thunder is a dual-issue processor that can issue all instructions on 24;; pipe0 and a subset on pipe1. 25 26 27(define_automaton "thunderx_main, thunderx_mult, thunderx_divide, thunderx_simd") 28 29(define_cpu_unit "thunderx_pipe0" "thunderx_main") 30(define_cpu_unit "thunderx_pipe1" "thunderx_main") 31(define_cpu_unit "thunderx_mult" "thunderx_mult") 32(define_cpu_unit "thunderx_divide" "thunderx_divide") 33(define_cpu_unit "thunderx_simd" "thunderx_simd") 34 35(define_insn_reservation "thunderx_add" 1 36 (and (eq_attr "tune" "thunderx") 37 (eq_attr "type" "adc_imm,adc_reg,adr,alu_imm,alu_sreg,alus_imm,alus_sreg,extend,logic_imm,logic_reg,logics_imm,logics_reg,mov_imm,mov_reg")) 38 "thunderx_pipe0 | thunderx_pipe1") 39 40(define_insn_reservation "thunderx_shift" 1 41 (and (eq_attr "tune" "thunderx") 42 (eq_attr "type" "bfm,extend,shift_imm,shift_reg")) 43 "thunderx_pipe0 | thunderx_pipe1") 44 45 46;; Arthimentic instructions with an extra shift or extend is two cycles. 47;; FIXME: This needs more attributes on aarch64 than what is currently there; 48;; this is conserative for now. 49;; Except this is not correct as this is only for !(LSL && shift by 0/1/2/3) 50;; Except this is not correct as this is only for !(zero extend) 51 52(define_insn_reservation "thunderx_arith_shift" 2 53 (and (eq_attr "tune" "thunderx") 54 (eq_attr "type" "alu_ext,alu_shift_imm,alu_shift_reg,alus_ext,logic_shift_imm,logic_shift_reg,logics_shift_imm,logics_shift_reg,alus_shift_imm")) 55 "thunderx_pipe0 | thunderx_pipe1") 56 57(define_insn_reservation "thunderx_csel" 2 58 (and (eq_attr "tune" "thunderx") 59 (eq_attr "type" "csel")) 60 "thunderx_pipe0 | thunderx_pipe1") 61 62;; Multiply and mulitply accumulate and count leading zeros can only happen on pipe 1 63 64(define_insn_reservation "thunderx_mul" 4 65 (and (eq_attr "tune" "thunderx") 66 (eq_attr "type" "mul,muls,mla,mlas,clz,smull,umull,smlal,umlal")) 67 "thunderx_pipe1 + thunderx_mult") 68 69;; Multiply high instructions take an extra cycle and cause the muliply unit to 70;; be busy for an extra cycle. 71 72;(define_insn_reservation "thunderx_mul_high" 5 73; (and (eq_attr "tune" "thunderx") 74; (eq_attr "type" "smull,umull")) 75; "thunderx_pipe1 + thunderx_mult") 76 77(define_insn_reservation "thunderx_div32" 22 78 (and (eq_attr "tune" "thunderx") 79 (eq_attr "type" "udiv,sdiv")) 80 "thunderx_pipe1 + thunderx_divide, thunderx_divide * 21") 81 82;(define_insn_reservation "thunderx_div64" 38 83; (and (eq_attr "tune" "thunderx") 84; (eq_attr "type" "udiv,sdiv") 85; (eq_attr "mode" "DI")) 86; "thunderx_pipe1 + thunderx_divide, thunderx_divide * 34") 87 88;; Stores take one cycle in pipe 0 89(define_insn_reservation "thunderx_store" 1 90 (and (eq_attr "tune" "thunderx") 91 (eq_attr "type" "store1")) 92 "thunderx_pipe0") 93 94;; Store pair are single issued 95(define_insn_reservation "thunderx_storepair" 1 96 (and (eq_attr "tune" "thunderx") 97 (eq_attr "type" "store2")) 98 "thunderx_pipe0 + thunderx_pipe1") 99 100 101;; loads (and load pairs) from L1 take 3 cycles in pipe 0 102(define_insn_reservation "thunderx_load" 3 103 (and (eq_attr "tune" "thunderx") 104 (eq_attr "type" "load1, load2")) 105 "thunderx_pipe0") 106 107(define_insn_reservation "thunderx_brj" 1 108 (and (eq_attr "tune" "thunderx") 109 (eq_attr "type" "branch,trap,call")) 110 "thunderx_pipe1") 111 112;; FPU 113 114(define_insn_reservation "thunderx_fadd" 4 115 (and (eq_attr "tune" "thunderx") 116 (eq_attr "type" "faddd,fadds")) 117 "thunderx_pipe1") 118 119(define_insn_reservation "thunderx_fconst" 1 120 (and (eq_attr "tune" "thunderx") 121 (eq_attr "type" "fconsts,fconstd")) 122 "thunderx_pipe1") 123 124;; Moves between fp are 2 cycles including min/max/select/abs/neg 125(define_insn_reservation "thunderx_fmov" 2 126 (and (eq_attr "tune" "thunderx") 127 (eq_attr "type" "fmov,f_minmaxs,f_minmaxd,fcsel,ffarithd,ffariths")) 128 "thunderx_pipe1") 129 130(define_insn_reservation "thunderx_fmovgpr" 2 131 (and (eq_attr "tune" "thunderx") 132 (eq_attr "type" "f_mrc, f_mcr")) 133 "thunderx_pipe1") 134 135(define_insn_reservation "thunderx_fmul" 6 136 (and (eq_attr "tune" "thunderx") 137 (eq_attr "type" "fmacs,fmacd,fmuls,fmuld")) 138 "thunderx_pipe1") 139 140(define_insn_reservation "thunderx_fdivs" 12 141 (and (eq_attr "tune" "thunderx") 142 (eq_attr "type" "fdivs")) 143 "thunderx_pipe1 + thunderx_divide, thunderx_divide*8") 144 145(define_insn_reservation "thunderx_fdivd" 22 146 (and (eq_attr "tune" "thunderx") 147 (eq_attr "type" "fdivd")) 148 "thunderx_pipe1 + thunderx_divide, thunderx_divide*18") 149 150(define_insn_reservation "thunderx_fsqrts" 17 151 (and (eq_attr "tune" "thunderx") 152 (eq_attr "type" "fsqrts")) 153 "thunderx_pipe1 + thunderx_divide, thunderx_divide*13") 154 155(define_insn_reservation "thunderx_fsqrtd" 28 156 (and (eq_attr "tune" "thunderx") 157 (eq_attr "type" "fsqrtd")) 158 "thunderx_pipe1 + thunderx_divide, thunderx_divide*31") 159 160;; The rounding conversion inside fp is 4 cycles 161(define_insn_reservation "thunderx_frint" 4 162 (and (eq_attr "tune" "thunderx") 163 (eq_attr "type" "f_rints,f_rintd")) 164 "thunderx_pipe1") 165 166;; Float to integer with a move from int to/from float is 6 cycles 167(define_insn_reservation "thunderx_f_cvt" 6 168 (and (eq_attr "tune" "thunderx") 169 (eq_attr "type" "f_cvt,f_cvtf2i,f_cvti2f")) 170 "thunderx_pipe1") 171 172;; FP/SIMD load/stores happen in pipe 0 173;; 64bit Loads register/pairs are 4 cycles from L1 174(define_insn_reservation "thunderx_64simd_fp_load" 4 175 (and (eq_attr "tune" "thunderx") 176 (eq_attr "type" "f_loadd,f_loads,neon_load1_1reg,\ 177 neon_load1_1reg_q,neon_load1_2reg")) 178 "thunderx_pipe0") 179 180;; 128bit load pair is singled issue and 4 cycles from L1 181(define_insn_reservation "thunderx_128simd_pair_load" 4 182 (and (eq_attr "tune" "thunderx") 183 (eq_attr "type" "neon_load1_2reg_q")) 184 "thunderx_pipe0+thunderx_pipe1") 185 186;; FP/SIMD Stores takes one cycle in pipe 0 187(define_insn_reservation "thunderx_simd_fp_store" 1 188 (and (eq_attr "tune" "thunderx") 189 (eq_attr "type" "f_stored,f_stores,neon_store1_1reg,neon_store1_1reg_q")) 190 "thunderx_pipe0") 191 192;; 64bit neon store pairs are single issue for one cycle 193(define_insn_reservation "thunderx_64neon_storepair" 1 194 (and (eq_attr "tune" "thunderx") 195 (eq_attr "type" "neon_store1_2reg")) 196 "thunderx_pipe0 + thunderx_pipe1") 197 198;; 128bit neon store pair are single issued for two cycles 199(define_insn_reservation "thunderx_128neon_storepair" 2 200 (and (eq_attr "tune" "thunderx") 201 (eq_attr "type" "neon_store1_2reg_q")) 202 "(thunderx_pipe0 + thunderx_pipe1)*2") 203 204 205;; SIMD/NEON (q forms take an extra cycle) 206 207;; Thunder simd move instruction types - 2/3 cycles 208(define_insn_reservation "thunderx_neon_move" 2 209 (and (eq_attr "tune" "thunderx") 210 (eq_attr "type" "neon_logic, neon_bsl, neon_fp_compare_s, \ 211 neon_fp_compare_d, neon_move")) 212 "thunderx_pipe1 + thunderx_simd") 213 214(define_insn_reservation "thunderx_neon_move_q" 3 215 (and (eq_attr "tune" "thunderx") 216 (eq_attr "type" "neon_logic_q, neon_bsl_q, neon_fp_compare_s_q, \ 217 neon_fp_compare_d_q, neon_move_q")) 218 "thunderx_pipe1 + thunderx_simd, thunderx_simd") 219 220 221;; Thunder simd simple/add instruction types - 4/5 cycles 222 223(define_insn_reservation "thunderx_neon_add" 4 224 (and (eq_attr "tune" "thunderx") 225 (eq_attr "type" "neon_reduc_add, neon_reduc_minmax, neon_fp_reduc_add_s, \ 226 neon_fp_reduc_add_d, neon_fp_to_int_s, neon_fp_to_int_d, \ 227 neon_add_halve, neon_sub_halve, neon_qadd, neon_compare, \ 228 neon_compare_zero, neon_minmax, neon_abd, neon_add, neon_sub, \ 229 neon_fp_minmax_s, neon_fp_minmax_d, neon_reduc_add, neon_cls, \ 230 neon_qabs, neon_qneg, neon_fp_addsub_s, neon_fp_addsub_d")) 231 "thunderx_pipe1 + thunderx_simd") 232 233;; BIG NOTE: neon_add_long/neon_sub_long don't have a q form which is incorrect 234 235(define_insn_reservation "thunderx_neon_add_q" 5 236 (and (eq_attr "tune" "thunderx") 237 (eq_attr "type" "neon_reduc_add_q, neon_reduc_minmax_q, neon_fp_reduc_add_s_q, \ 238 neon_fp_reduc_add_d_q, neon_fp_to_int_s_q, neon_fp_to_int_d_q, \ 239 neon_add_halve_q, neon_sub_halve_q, neon_qadd_q, neon_compare_q, \ 240 neon_compare_zero_q, neon_minmax_q, neon_abd_q, neon_add_q, neon_sub_q, \ 241 neon_fp_minmax_s_q, neon_fp_minmax_d_q, neon_reduc_add_q, neon_cls_q, \ 242 neon_qabs_q, neon_qneg_q, neon_fp_addsub_s_q, neon_fp_addsub_d_q, \ 243 neon_add_long, neon_sub_long")) 244 "thunderx_pipe1 + thunderx_simd, thunderx_simd") 245 246 247;; Thunder 128bit SIMD reads the upper halve in cycle 2 and writes in the last cycle 248(define_bypass 2 "thunderx_neon_move_q" "thunderx_neon_move_q, thunderx_neon_add_q") 249(define_bypass 4 "thunderx_neon_add_q" "thunderx_neon_move_q, thunderx_neon_add_q") 250 251;; Assume both pipes are needed for unknown and multiple-instruction 252;; patterns. 253 254(define_insn_reservation "thunderx_unknown" 1 255 (and (eq_attr "tune" "thunderx") 256 (eq_attr "type" "untyped,multiple")) 257 "thunderx_pipe0 + thunderx_pipe1") 258 259 260