1;; ARM Cortex-A57 pipeline description 2;; Copyright (C) 2014-2020 Free Software Foundation, Inc. 3;; 4;; This file is part of GCC. 5;; 6;; GCC is free software; you can redistribute it and/or modify it 7;; under the terms of the GNU General Public License as published by 8;; the Free Software Foundation; either version 3, or (at your option) 9;; any later version. 10;; 11;; GCC is distributed in the hope that it will be useful, but 12;; WITHOUT ANY WARRANTY; without even the implied warranty of 13;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14;; General Public License for more details. 15;; 16;; You should have received a copy of the GNU General Public License 17;; along with GCC; see the file COPYING3. If not see 18;; <http://www.gnu.org/licenses/>. 19 20(define_automaton "cortex_a57") 21 22(define_attr "cortex_a57_neon_type" 23 "neon_abd, neon_abd_q, neon_arith_acc, neon_arith_acc_q, 24 neon_arith_basic, neon_arith_complex, 25 neon_reduc_add_acc, neon_multiply, neon_multiply_q, 26 neon_multiply_long, neon_mla, neon_mla_q, neon_mla_long, 27 neon_sat_mla_long, neon_shift_acc, neon_shift_imm_basic, 28 neon_shift_imm_complex, 29 neon_shift_reg_basic, neon_shift_reg_basic_q, neon_shift_reg_complex, 30 neon_shift_reg_complex_q, neon_fp_negabs, neon_fp_arith, 31 neon_fp_arith_q, neon_fp_reductions_q, neon_fp_cvt_int, 32 neon_fp_cvt_int_q, neon_fp_cvt16, neon_fp_minmax, neon_fp_mul, 33 neon_fp_mul_q, neon_fp_mla, neon_fp_mla_q, neon_fp_recpe_rsqrte, 34 neon_fp_recpe_rsqrte_q, neon_fp_recps_rsqrts, neon_fp_recps_rsqrts_q, 35 neon_bitops, neon_bitops_q, neon_from_gp, 36 neon_from_gp_q, neon_move, neon_tbl3_tbl4, neon_zip_q, neon_to_gp, 37 neon_load_a, neon_load_b, neon_load_c, neon_load_d, neon_load_e, 38 neon_load_f, neon_store_a, neon_store_b, neon_store_complex, 39 unknown" 40 (cond [ 41 (eq_attr "type" "neon_abd, neon_abd_long") 42 (const_string "neon_abd") 43 (eq_attr "type" "neon_abd_q") 44 (const_string "neon_abd_q") 45 (eq_attr "type" "neon_arith_acc, neon_reduc_add_acc,\ 46 neon_reduc_add_acc_q") 47 (const_string "neon_arith_acc") 48 (eq_attr "type" "neon_arith_acc_q") 49 (const_string "neon_arith_acc_q") 50 (eq_attr "type" "neon_add, neon_add_q, neon_add_long,\ 51 neon_add_widen, neon_neg, neon_neg_q,\ 52 neon_reduc_add, neon_reduc_add_q,\ 53 neon_reduc_add_long, neon_sub, neon_sub_q,\ 54 neon_sub_long, neon_sub_widen, neon_logic,\ 55 neon_logic_q, neon_tst, neon_tst_q") 56 (const_string "neon_arith_basic") 57 (eq_attr "type" "neon_abs, neon_abs_q, neon_add_halve_narrow_q,\ 58 neon_add_halve, neon_add_halve_q,\ 59 neon_sub_halve, neon_sub_halve_q, neon_qabs,\ 60 neon_qabs_q, neon_qadd, neon_qadd_q, neon_qneg,\ 61 neon_qneg_q, neon_qsub, neon_qsub_q,\ 62 neon_sub_halve_narrow_q,\ 63 neon_compare, neon_compare_q,\ 64 neon_compare_zero, neon_compare_zero_q,\ 65 neon_minmax, neon_minmax_q, neon_reduc_minmax,\ 66 neon_reduc_minmax_q") 67 (const_string "neon_arith_complex") 68 69 (eq_attr "type" "neon_mul_b, neon_mul_h, neon_mul_s,\ 70 neon_mul_h_scalar, neon_mul_s_scalar,\ 71 neon_sat_mul_b, neon_sat_mul_h,\ 72 neon_sat_mul_s, neon_sat_mul_h_scalar,\ 73 neon_sat_mul_s_scalar,\ 74 neon_mul_b_long, neon_mul_h_long,\ 75 neon_mul_s_long, neon_mul_d_long,\ 76 neon_mul_h_scalar_long, neon_mul_s_scalar_long,\ 77 neon_sat_mul_b_long, neon_sat_mul_h_long,\ 78 neon_sat_mul_s_long, neon_sat_mul_h_scalar_long,\ 79 neon_sat_mul_s_scalar_long, crypto_pmull") 80 (const_string "neon_multiply") 81 (eq_attr "type" "neon_mul_b_q, neon_mul_h_q, neon_mul_s_q,\ 82 neon_mul_h_scalar_q, neon_mul_s_scalar_q,\ 83 neon_sat_mul_b_q, neon_sat_mul_h_q,\ 84 neon_sat_mul_s_q, neon_sat_mul_h_scalar_q,\ 85 neon_sat_mul_s_scalar_q") 86 (const_string "neon_multiply_q") 87 (eq_attr "type" "neon_mla_b, neon_mla_h, neon_mla_s,\ 88 neon_mla_h_scalar, neon_mla_s_scalar,\ 89 neon_mla_b_long, neon_mla_h_long,\ 90 neon_mla_s_long,\ 91 neon_mla_h_scalar_long, neon_mla_s_scalar_long") 92 (const_string "neon_mla") 93 (eq_attr "type" "neon_mla_b_q, neon_mla_h_q, neon_mla_s_q,\ 94 neon_mla_h_scalar_q, neon_mla_s_scalar_q") 95 (const_string "neon_mla_q") 96 (eq_attr "type" "neon_sat_mla_b_long, neon_sat_mla_h_long,\ 97 neon_sat_mla_s_long, neon_sat_mla_h_scalar_long,\ 98 neon_sat_mla_s_scalar_long") 99 (const_string "neon_sat_mla_long") 100 101 (eq_attr "type" "neon_shift_acc, neon_shift_acc_q") 102 (const_string "neon_shift_acc") 103 (eq_attr "type" "neon_shift_imm, neon_shift_imm_q,\ 104 neon_shift_imm_narrow_q, neon_shift_imm_long") 105 (const_string "neon_shift_imm_basic") 106 (eq_attr "type" "neon_sat_shift_imm, neon_sat_shift_imm_q,\ 107 neon_sat_shift_imm_narrow_q") 108 (const_string "neon_shift_imm_complex") 109 (eq_attr "type" "neon_shift_reg") 110 (const_string "neon_shift_reg_basic") 111 (eq_attr "type" "neon_shift_reg_q") 112 (const_string "neon_shift_reg_basic_q") 113 (eq_attr "type" "neon_sat_shift_reg") 114 (const_string "neon_shift_reg_complex") 115 (eq_attr "type" "neon_sat_shift_reg_q") 116 (const_string "neon_shift_reg_complex_q") 117 118 (eq_attr "type" "neon_fp_neg_s, neon_fp_neg_s_q,\ 119 neon_fp_abs_s, neon_fp_abs_s_q,\ 120 neon_fp_neg_d, neon_fp_neg_d_q,\ 121 neon_fp_abs_d, neon_fp_abs_d_q") 122 (const_string "neon_fp_negabs") 123 (eq_attr "type" "neon_fp_addsub_s, neon_fp_abd_s,\ 124 neon_fp_reduc_add_s, neon_fp_compare_s,\ 125 neon_fp_minmax_s, neon_fp_round_s,\ 126 neon_fp_addsub_d, neon_fp_abd_d,\ 127 neon_fp_reduc_add_d, neon_fp_compare_d,\ 128 neon_fp_minmax_d, neon_fp_round_d,\ 129 neon_fp_reduc_minmax_s, neon_fp_reduc_minmax_d") 130 (const_string "neon_fp_arith") 131 (eq_attr "type" "neon_fp_addsub_s_q, neon_fp_abd_s_q,\ 132 neon_fp_reduc_add_s_q, neon_fp_compare_s_q,\ 133 neon_fp_minmax_s_q, neon_fp_round_s_q,\ 134 neon_fp_addsub_d_q, neon_fp_abd_d_q,\ 135 neon_fp_reduc_add_d_q, neon_fp_compare_d_q,\ 136 neon_fp_minmax_d_q, neon_fp_round_d_q") 137 (const_string "neon_fp_arith_q") 138 (eq_attr "type" "neon_fp_reduc_minmax_s_q,\ 139 neon_fp_reduc_minmax_d_q,\ 140 neon_fp_reduc_add_s_q, neon_fp_reduc_add_d_q") 141 (const_string "neon_fp_reductions_q") 142 (eq_attr "type" "neon_fp_to_int_s, neon_int_to_fp_s,\ 143 neon_fp_to_int_d, neon_int_to_fp_d") 144 (const_string "neon_fp_cvt_int") 145 (eq_attr "type" "neon_fp_to_int_s_q, neon_int_to_fp_s_q,\ 146 neon_fp_to_int_d_q, neon_int_to_fp_d_q") 147 (const_string "neon_fp_cvt_int_q") 148 (eq_attr "type" "neon_fp_cvt_narrow_s_q, neon_fp_cvt_widen_h") 149 (const_string "neon_fp_cvt16") 150 (eq_attr "type" "neon_fp_mul_s, neon_fp_mul_s_scalar,\ 151 neon_fp_mul_d") 152 (const_string "neon_fp_mul") 153 (eq_attr "type" "neon_fp_mul_s_q, neon_fp_mul_s_scalar_q,\ 154 neon_fp_mul_d_q, neon_fp_mul_d_scalar_q") 155 (const_string "neon_fp_mul_q") 156 (eq_attr "type" "neon_fp_mla_s, neon_fp_mla_s_scalar,\ 157 neon_fp_mla_d") 158 (const_string "neon_fp_mla") 159 (eq_attr "type" "neon_fp_mla_s_q, neon_fp_mla_s_scalar_q, 160 neon_fp_mla_d_q, neon_fp_mla_d_scalar_q") 161 (const_string "neon_fp_mla_q") 162 (eq_attr "type" "neon_fp_recpe_s, neon_fp_rsqrte_s,\ 163 neon_fp_recpx_s,\ 164 neon_fp_recpe_d, neon_fp_rsqrte_d,\ 165 neon_fp_recpx_d") 166 (const_string "neon_fp_recpe_rsqrte") 167 (eq_attr "type" "neon_fp_recpe_s_q, neon_fp_rsqrte_s_q,\ 168 neon_fp_recpx_s_q,\ 169 neon_fp_recpe_d_q, neon_fp_rsqrte_d_q,\ 170 neon_fp_recpx_d_q") 171 (const_string "neon_fp_recpe_rsqrte_q") 172 (eq_attr "type" "neon_fp_recps_s, neon_fp_rsqrts_s,\ 173 neon_fp_recps_d, neon_fp_rsqrts_d") 174 (const_string "neon_fp_recps_rsqrts") 175 (eq_attr "type" "neon_fp_recps_s_q, neon_fp_rsqrts_s_q,\ 176 neon_fp_recps_d_q, neon_fp_rsqrts_d_q") 177 (const_string "neon_fp_recps_rsqrts_q") 178 (eq_attr "type" "neon_bsl, neon_cls, neon_cnt,\ 179 neon_rev, neon_permute, neon_rbit,\ 180 neon_tbl1, neon_tbl2, neon_zip,\ 181 neon_dup, neon_dup_q, neon_ext, neon_ext_q,\ 182 neon_move, neon_move_q, neon_move_narrow_q") 183 (const_string "neon_bitops") 184 (eq_attr "type" "neon_bsl_q, neon_cls_q, neon_cnt_q,\ 185 neon_rev_q, neon_permute_q, neon_rbit_q") 186 (const_string "neon_bitops_q") 187 (eq_attr "type" "neon_from_gp,f_mcr,f_mcrr") 188 (const_string "neon_from_gp") 189 (eq_attr "type" "neon_from_gp_q") 190 (const_string "neon_from_gp_q") 191 (eq_attr "type" "neon_tbl3, neon_tbl4") 192 (const_string "neon_tbl3_tbl4") 193 (eq_attr "type" "neon_zip_q") 194 (const_string "neon_zip_q") 195 (eq_attr "type" "neon_to_gp, neon_to_gp_q,f_mrc,f_mrrc") 196 (const_string "neon_to_gp") 197 198 (eq_attr "type" "f_loads, f_loadd,\ 199 neon_load1_1reg, neon_load1_1reg_q,\ 200 neon_load1_2reg, neon_load1_2reg_q") 201 (const_string "neon_load_a") 202 (eq_attr "type" "neon_load1_3reg, neon_load1_3reg_q,\ 203 neon_load1_4reg, neon_load1_4reg_q") 204 (const_string "neon_load_b") 205 (eq_attr "type" "neon_ldp, neon_ldp_q,\ 206 neon_load1_one_lane, neon_load1_one_lane_q,\ 207 neon_load1_all_lanes, neon_load1_all_lanes_q,\ 208 neon_load2_2reg, neon_load2_2reg_q,\ 209 neon_load2_all_lanes, neon_load2_all_lanes_q") 210 (const_string "neon_load_c") 211 (eq_attr "type" "neon_load2_4reg, neon_load2_4reg_q,\ 212 neon_load3_3reg, neon_load3_3reg_q,\ 213 neon_load3_one_lane, neon_load3_one_lane_q,\ 214 neon_load4_4reg, neon_load4_4reg_q") 215 (const_string "neon_load_d") 216 (eq_attr "type" "neon_load2_one_lane, neon_load2_one_lane_q,\ 217 neon_load3_all_lanes, neon_load3_all_lanes_q,\ 218 neon_load4_all_lanes, neon_load4_all_lanes_q") 219 (const_string "neon_load_e") 220 (eq_attr "type" "neon_load4_one_lane, neon_load4_one_lane_q") 221 (const_string "neon_load_f") 222 223 (eq_attr "type" "f_stores, f_stored,\ 224 neon_store1_1reg") 225 (const_string "neon_store_a") 226 (eq_attr "type" "neon_store1_2reg, neon_store1_1reg_q") 227 (const_string "neon_store_b") 228 (eq_attr "type" "neon_stp, neon_stp_q,\ 229 neon_store1_3reg, neon_store1_3reg_q,\ 230 neon_store3_3reg, neon_store3_3reg_q,\ 231 neon_store2_4reg, neon_store2_4reg_q,\ 232 neon_store4_4reg, neon_store4_4reg_q,\ 233 neon_store2_2reg, neon_store2_2reg_q,\ 234 neon_store3_one_lane, neon_store3_one_lane_q,\ 235 neon_store4_one_lane, neon_store4_one_lane_q,\ 236 neon_store1_4reg, neon_store1_4reg_q,\ 237 neon_store1_one_lane, neon_store1_one_lane_q,\ 238 neon_store2_one_lane, neon_store2_one_lane_q") 239 (const_string "neon_store_complex") 240;; If it doesn't match any of the above that we want to treat specially but is 241;; still a NEON type, treat it as a basic NEON type. This is better than 242;; dropping it on the floor and making no assumptions about it whatsoever. 243 (eq_attr "is_neon_type" "yes") 244 (const_string "neon_arith_basic")] 245 (const_string "unknown"))) 246 247;; The Cortex-A57 core is modelled as a triple issue pipeline that has 248;; the following functional units. 249;; 1. Two pipelines for integer operations: SX1, SX2 250 251(define_cpu_unit "ca57_sx1_issue" "cortex_a57") 252(define_reservation "ca57_sx1" "ca57_sx1_issue") 253 254(define_cpu_unit "ca57_sx2_issue" "cortex_a57") 255(define_reservation "ca57_sx2" "ca57_sx2_issue") 256 257;; 2. One pipeline for complex integer operations: MX 258 259(define_cpu_unit "ca57_mx_issue" 260 "cortex_a57") 261(define_reservation "ca57_mx" "ca57_mx_issue") 262(define_reservation "ca57_mx_block" "ca57_mx_issue") 263 264;; 3. Two asymmetric pipelines for Neon and FP operations: CX1, CX2 265(define_automaton "cortex_a57_cx") 266 267(define_cpu_unit "ca57_cx1_issue" 268 "cortex_a57_cx") 269(define_cpu_unit "ca57_cx2_issue" 270 "cortex_a57_cx") 271 272(define_reservation "ca57_cx1" "ca57_cx1_issue") 273 274(define_reservation "ca57_cx2" "ca57_cx2_issue") 275(define_reservation "ca57_cx2_block" "ca57_cx2_issue*2") 276 277;; 4. One pipeline for branch operations: BX 278 279(define_cpu_unit "ca57_bx_issue" "cortex_a57") 280(define_reservation "ca57_bx" "ca57_bx_issue") 281 282;; 5. Two pipelines for load and store operations: LS1, LS2. The most 283;; valuable thing we can do is force a structural hazard to split 284;; up loads/stores. 285 286(define_cpu_unit "ca57_ls_issue" "cortex_a57") 287(define_cpu_unit "ca57_ldr, ca57_str" "cortex_a57") 288(define_reservation "ca57_load_model" "ca57_ls_issue,ca57_ldr*2") 289(define_reservation "ca57_store_model" "ca57_ls_issue,ca57_str") 290 291;; Block all issue queues. 292 293(define_reservation "ca57_block" "ca57_cx1_issue + ca57_cx2_issue 294 + ca57_mx_issue + ca57_sx1_issue 295 + ca57_sx2_issue + ca57_ls_issue") 296 297;; Simple Execution Unit: 298;; 299;; Simple ALU without shift 300(define_insn_reservation "cortex_a57_alu" 2 301 (and (eq_attr "tune" "cortexa57") 302 (eq_attr "type" "alu_imm,alus_imm,logic_imm,logics_imm,\ 303 alu_sreg,alus_sreg,logic_reg,logics_reg,\ 304 adc_imm,adcs_imm,adc_reg,adcs_reg,\ 305 adr,bfx,extend,clz,rbit,rev,alu_dsp_reg,\ 306 rotate_imm,shift_imm,shift_reg,\ 307 mov_imm,mov_reg,\ 308 mvn_imm,mvn_reg,\ 309 mrs,multiple")) 310 "ca57_sx1|ca57_sx2") 311 312;; ALU ops with immediate shift 313(define_insn_reservation "cortex_a57_alu_shift" 3 314 (and (eq_attr "tune" "cortexa57") 315 (eq_attr "type" "bfm,\ 316 alu_shift_imm,alus_shift_imm,\ 317 crc,logic_shift_imm,logics_shift_imm,\ 318 mov_shift,mvn_shift")) 319 "ca57_mx") 320 321;; Multi-Cycle Execution Unit: 322;; 323;; ALU ops with register controlled shift 324(define_insn_reservation "cortex_a57_alu_shift_reg" 3 325 (and (eq_attr "tune" "cortexa57") 326 (eq_attr "type" "alu_shift_reg,alus_shift_reg,\ 327 logic_shift_reg,logics_shift_reg,\ 328 mov_shift_reg,mvn_shift_reg")) 329 "ca57_mx") 330 331;; All multiplies 332;; TODO: AArch32 and AArch64 have different behavior 333(define_insn_reservation "cortex_a57_mult32" 3 334 (and (eq_attr "tune" "cortexa57") 335 (ior (eq_attr "mul32" "yes") 336 (eq_attr "widen_mul64" "yes"))) 337 "ca57_mx") 338 339;; Integer divide 340(define_insn_reservation "cortex_a57_div" 10 341 (and (eq_attr "tune" "cortexa57") 342 (eq_attr "type" "udiv,sdiv")) 343 "ca57_mx_issue,ca57_mx_block*3") 344 345;; Block all issue pipes for a cycle 346(define_insn_reservation "cortex_a57_block" 1 347 (and (eq_attr "tune" "cortexa57") 348 (eq_attr "type" "block")) 349 "ca57_block") 350 351;; Branch execution Unit 352;; 353;; Branches take one issue slot. 354;; No latency as there is no result 355(define_insn_reservation "cortex_a57_branch" 0 356 (and (eq_attr "tune" "cortexa57") 357 (eq_attr "type" "branch")) 358 "ca57_bx") 359 360;; Load-store execution Unit 361;; 362;; Loads of up to two words. 363(define_insn_reservation "cortex_a57_load1" 5 364 (and (eq_attr "tune" "cortexa57") 365 (eq_attr "type" "load_byte,load_4,load_8")) 366 "ca57_load_model") 367 368;; Loads of three or four words. 369(define_insn_reservation "cortex_a57_load3" 5 370 (and (eq_attr "tune" "cortexa57") 371 (eq_attr "type" "load_12,load_16")) 372 "ca57_ls_issue*2,ca57_load_model") 373 374;; Stores of up to two words. 375(define_insn_reservation "cortex_a57_store1" 0 376 (and (eq_attr "tune" "cortexa57") 377 (eq_attr "type" "store_4,store_8")) 378 "ca57_store_model") 379 380;; Stores of three or four words. 381(define_insn_reservation "cortex_a57_store3" 0 382 (and (eq_attr "tune" "cortexa57") 383 (eq_attr "type" "store_12,store_16")) 384 "ca57_ls_issue*2,ca57_store_model") 385 386;; Advanced SIMD Unit - Integer Arithmetic Instructions. 387 388(define_insn_reservation "cortex_a57_neon_abd" 5 389 (and (eq_attr "tune" "cortexa57") 390 (eq_attr "cortex_a57_neon_type" "neon_abd")) 391 "ca57_cx1|ca57_cx2") 392 393(define_insn_reservation "cortex_a57_neon_abd_q" 5 394 (and (eq_attr "tune" "cortexa57") 395 (eq_attr "cortex_a57_neon_type" "neon_abd_q")) 396 "ca57_cx1+ca57_cx2") 397 398(define_insn_reservation "cortex_a57_neon_aba" 7 399 (and (eq_attr "tune" "cortexa57") 400 (eq_attr "cortex_a57_neon_type" "neon_arith_acc")) 401 "ca57_cx2") 402 403(define_insn_reservation "cortex_a57_neon_aba_q" 8 404 (and (eq_attr "tune" "cortexa57") 405 (eq_attr "cortex_a57_neon_type" "neon_arith_acc_q")) 406 "ca57_cx2+(ca57_cx2_issue,ca57_cx2)") 407 408(define_insn_reservation "cortex_a57_neon_arith_basic" 4 409 (and (eq_attr "tune" "cortexa57") 410 (eq_attr "cortex_a57_neon_type" "neon_arith_basic")) 411 "ca57_cx1|ca57_cx2") 412 413(define_insn_reservation "cortex_a57_neon_arith_complex" 5 414 (and (eq_attr "tune" "cortexa57") 415 (eq_attr "cortex_a57_neon_type" "neon_arith_complex")) 416 "ca57_cx1|ca57_cx2") 417 418;; Integer Multiply Instructions. 419 420(define_insn_reservation "cortex_a57_neon_multiply" 6 421 (and (eq_attr "tune" "cortexa57") 422 (eq_attr "cortex_a57_neon_type" "neon_multiply")) 423 "ca57_cx1") 424 425(define_insn_reservation "cortex_a57_neon_multiply_q" 7 426 (and (eq_attr "tune" "cortexa57") 427 (eq_attr "cortex_a57_neon_type" "neon_multiply_q")) 428 "ca57_cx1+(ca57_cx1_issue,ca57_cx1)") 429 430(define_insn_reservation "cortex_a57_neon_mla" 6 431 (and (eq_attr "tune" "cortexa57") 432 (eq_attr "cortex_a57_neon_type" "neon_mla")) 433 "ca57_cx1") 434 435(define_insn_reservation "cortex_a57_neon_mla_q" 7 436 (and (eq_attr "tune" "cortexa57") 437 (eq_attr "cortex_a57_neon_type" "neon_mla_q")) 438 "ca57_cx1+(ca57_cx1_issue,ca57_cx1)") 439 440(define_insn_reservation "cortex_a57_neon_sat_mla_long" 6 441 (and (eq_attr "tune" "cortexa57") 442 (eq_attr "cortex_a57_neon_type" "neon_sat_mla_long")) 443 "ca57_cx1") 444 445;; Integer Shift Instructions. 446 447(define_insn_reservation 448 "cortex_a57_neon_shift_acc" 7 449 (and (eq_attr "tune" "cortexa57") 450 (eq_attr "cortex_a57_neon_type" "neon_shift_acc")) 451 "ca57_cx2") 452 453(define_insn_reservation 454 "cortex_a57_neon_shift_imm_basic" 4 455 (and (eq_attr "tune" "cortexa57") 456 (eq_attr "cortex_a57_neon_type" "neon_shift_imm_basic")) 457 "ca57_cx2") 458 459(define_insn_reservation 460 "cortex_a57_neon_shift_imm_complex" 5 461 (and (eq_attr "tune" "cortexa57") 462 (eq_attr "cortex_a57_neon_type" "neon_shift_imm_complex")) 463 "ca57_cx2") 464 465(define_insn_reservation 466 "cortex_a57_neon_shift_reg_basic" 4 467 (and (eq_attr "tune" "cortexa57") 468 (eq_attr "cortex_a57_neon_type" "neon_shift_reg_basic")) 469 "ca57_cx2") 470 471(define_insn_reservation 472 "cortex_a57_neon_shift_reg_basic_q" 5 473 (and (eq_attr "tune" "cortexa57") 474 (eq_attr "cortex_a57_neon_type" "neon_shift_reg_basic_q")) 475 "ca57_cx2+(ca57_cx2_issue,ca57_cx2)") 476 477(define_insn_reservation 478 "cortex_a57_neon_shift_reg_complex" 5 479 (and (eq_attr "tune" "cortexa57") 480 (eq_attr "cortex_a57_neon_type" "neon_shift_reg_complex")) 481 "ca57_cx2") 482 483(define_insn_reservation 484 "cortex_a57_neon_shift_reg_complex_q" 6 485 (and (eq_attr "tune" "cortexa57") 486 (eq_attr "cortex_a57_neon_type" "neon_shift_reg_complex_q")) 487 "ca57_cx2+(ca57_cx2_issue,ca57_cx2)") 488 489;; Floating Point Instructions. 490 491(define_insn_reservation 492 "cortex_a57_neon_fp_negabs" 4 493 (and (eq_attr "tune" "cortexa57") 494 (eq_attr "cortex_a57_neon_type" "neon_fp_negabs")) 495 "(ca57_cx1|ca57_cx2)") 496 497(define_insn_reservation 498 "cortex_a57_neon_fp_arith" 6 499 (and (eq_attr "tune" "cortexa57") 500 (eq_attr "cortex_a57_neon_type" "neon_fp_arith")) 501 "(ca57_cx1|ca57_cx2)") 502 503(define_insn_reservation 504 "cortex_a57_neon_fp_arith_q" 6 505 (and (eq_attr "tune" "cortexa57") 506 (eq_attr "cortex_a57_neon_type" "neon_fp_arith_q")) 507 "(ca57_cx1+ca57_cx2)") 508 509(define_insn_reservation 510 "cortex_a57_neon_fp_reductions_q" 10 511 (and (eq_attr "tune" "cortexa57") 512 (eq_attr "cortex_a57_neon_type" "neon_fp_reductions_q")) 513 "(ca57_cx1+ca57_cx2),(ca57_cx1|ca57_cx2)") 514 515(define_insn_reservation 516 "cortex_a57_neon_fp_cvt_int" 6 517 (and (eq_attr "tune" "cortexa57") 518 (eq_attr "cortex_a57_neon_type" "neon_fp_cvt_int")) 519 "(ca57_cx1|ca57_cx2)") 520 521(define_insn_reservation 522 "cortex_a57_neon_fp_cvt_int_q" 6 523 (and (eq_attr "tune" "cortexa57") 524 (eq_attr "cortex_a57_neon_type" "neon_fp_cvt_int_q")) 525 "(ca57_cx1+ca57_cx2)") 526 527(define_insn_reservation 528 "cortex_a57_neon_fp_cvt16" 10 529 (and (eq_attr "tune" "cortexa57") 530 (eq_attr "cortex_a57_neon_type" "neon_fp_cvt16")) 531 "(ca57_cx1_issue+ca57_cx2_issue),(ca57_cx1|ca57_cx2)") 532 533(define_insn_reservation 534 "cortex_a57_neon_fp_mul" 5 535 (and (eq_attr "tune" "cortexa57") 536 (eq_attr "cortex_a57_neon_type" "neon_fp_mul")) 537 "(ca57_cx1|ca57_cx2)") 538 539(define_insn_reservation 540 "cortex_a57_neon_fp_mul_q" 5 541 (and (eq_attr "tune" "cortexa57") 542 (eq_attr "cortex_a57_neon_type" "neon_fp_mul_q")) 543 "(ca57_cx1+ca57_cx2)") 544 545(define_insn_reservation 546 "cortex_a57_neon_fp_mla" 9 547 (and (eq_attr "tune" "cortexa57") 548 (eq_attr "cortex_a57_neon_type" "neon_fp_mla")) 549 "(ca57_cx1,ca57_cx1)|(ca57_cx2,ca57_cx2)") 550 551(define_insn_reservation 552 "cortex_a57_neon_fp_mla_q" 9 553 (and (eq_attr "tune" "cortexa57") 554 (eq_attr "cortex_a57_neon_type" "neon_fp_mla_q")) 555 "(ca57_cx1+ca57_cx2),(ca57_cx1,ca57_cx2)") 556 557(define_insn_reservation 558 "cortex_a57_neon_fp_recpe_rsqrte" 6 559 (and (eq_attr "tune" "cortexa57") 560 (eq_attr "cortex_a57_neon_type" "neon_fp_recpe_rsqrte")) 561 "(ca57_cx1|ca57_cx2)") 562 563(define_insn_reservation 564 "cortex_a57_neon_fp_recpe_rsqrte_q" 6 565 (and (eq_attr "tune" "cortexa57") 566 (eq_attr "cortex_a57_neon_type" "neon_fp_recpe_rsqrte_q")) 567 "(ca57_cx1+ca57_cx2)") 568 569(define_insn_reservation 570 "cortex_a57_neon_fp_recps_rsqrts" 10 571 (and (eq_attr "tune" "cortexa57") 572 (eq_attr "cortex_a57_neon_type" "neon_fp_recps_rsqrts")) 573 "(ca57_cx1|ca57_cx2)") 574 575(define_insn_reservation 576 "cortex_a57_neon_fp_recps_rsqrts_q" 10 577 (and (eq_attr "tune" "cortexa57") 578 (eq_attr "cortex_a57_neon_type" "neon_fp_recps_rsqrts_q")) 579 "(ca57_cx1+ca57_cx2)") 580 581;; Miscellaneous Instructions. 582 583(define_insn_reservation 584 "cortex_a57_neon_bitops" 4 585 (and (eq_attr "tune" "cortexa57") 586 (eq_attr "cortex_a57_neon_type" "neon_bitops")) 587 "(ca57_cx1|ca57_cx2)") 588 589(define_insn_reservation 590 "cortex_a57_neon_bitops_q" 4 591 (and (eq_attr "tune" "cortexa57") 592 (eq_attr "cortex_a57_neon_type" "neon_bitops_q")) 593 "(ca57_cx1+ca57_cx2)") 594 595(define_insn_reservation 596 "cortex_a57_neon_from_gp" 9 597 (and (eq_attr "tune" "cortexa57") 598 (eq_attr "cortex_a57_neon_type" "neon_from_gp")) 599 "(ca57_ls_issue+ca57_cx1_issue,ca57_cx1) 600 |(ca57_ls_issue+ca57_cx2_issue,ca57_cx2)") 601 602(define_insn_reservation 603 "cortex_a57_neon_from_gp_q" 9 604 (and (eq_attr "tune" "cortexa57") 605 (eq_attr "cortex_a57_neon_type" "neon_from_gp_q")) 606 "(ca57_ls_issue+ca57_cx1_issue,ca57_cx1) 607 +(ca57_ls_issue+ca57_cx2_issue,ca57_cx2)") 608 609(define_insn_reservation 610 "cortex_a57_neon_tbl3_tbl4" 7 611 (and (eq_attr "tune" "cortexa57") 612 (eq_attr "cortex_a57_neon_type" "neon_tbl3_tbl4")) 613 "(ca57_cx1_issue,ca57_cx1) 614 +(ca57_cx2_issue,ca57_cx2)") 615 616(define_insn_reservation 617 "cortex_a57_neon_zip_q" 7 618 (and (eq_attr "tune" "cortexa57") 619 (eq_attr "cortex_a57_neon_type" "neon_zip_q")) 620 "(ca57_cx1_issue,ca57_cx1) 621 +(ca57_cx2_issue,ca57_cx2)") 622 623(define_insn_reservation 624 "cortex_a57_neon_to_gp" 7 625 (and (eq_attr "tune" "cortexa57") 626 (eq_attr "cortex_a57_neon_type" "neon_to_gp")) 627 "((ca57_ls_issue+ca57_sx1_issue),ca57_sx1) 628 |((ca57_ls_issue+ca57_sx2_issue),ca57_sx2)") 629 630;; Load Instructions. 631 632(define_insn_reservation 633 "cortex_a57_neon_load_a" 6 634 (and (eq_attr "tune" "cortexa57") 635 (eq_attr "cortex_a57_neon_type" "neon_load_a")) 636 "ca57_load_model") 637 638(define_insn_reservation 639 "cortex_a57_neon_load_b" 7 640 (and (eq_attr "tune" "cortexa57") 641 (eq_attr "cortex_a57_neon_type" "neon_load_b")) 642 "ca57_ls_issue,ca57_ls_issue+ca57_ldr,ca57_ldr*2") 643 644(define_insn_reservation 645 "cortex_a57_neon_load_c" 9 646 (and (eq_attr "tune" "cortexa57") 647 (eq_attr "cortex_a57_neon_type" "neon_load_c")) 648 "ca57_load_model+(ca57_cx1|ca57_cx2)") 649 650(define_insn_reservation 651 "cortex_a57_neon_load_d" 11 652 (and (eq_attr "tune" "cortexa57") 653 (eq_attr "cortex_a57_neon_type" "neon_load_d")) 654 "ca57_cx1_issue+ca57_cx2_issue, 655 ca57_ls_issue+ca57_ls_issue,ca57_ldr*2") 656 657(define_insn_reservation 658 "cortex_a57_neon_load_e" 9 659 (and (eq_attr "tune" "cortexa57") 660 (eq_attr "cortex_a57_neon_type" "neon_load_e")) 661 "ca57_load_model+(ca57_cx1|ca57_cx2)") 662 663(define_insn_reservation 664 "cortex_a57_neon_load_f" 11 665 (and (eq_attr "tune" "cortexa57") 666 (eq_attr "cortex_a57_neon_type" "neon_load_f")) 667 "ca57_cx1_issue+ca57_cx2_issue, 668 ca57_ls_issue+ca57_ls_issue,ca57_ldr*2") 669 670;; Store Instructions. 671 672(define_insn_reservation 673 "cortex_a57_neon_store_a" 0 674 (and (eq_attr "tune" "cortexa57") 675 (eq_attr "cortex_a57_neon_type" "neon_store_a")) 676 "ca57_store_model") 677 678(define_insn_reservation 679 "cortex_a57_neon_store_b" 0 680 (and (eq_attr "tune" "cortexa57") 681 (eq_attr "cortex_a57_neon_type" "neon_store_b")) 682 "ca57_store_model") 683 684;; These block issue for a number of cycles proportional to the number 685;; of 64-bit chunks they will store, we don't attempt to model that 686;; precisely, treat them as blocking execution for two cycles when 687;; issued. 688(define_insn_reservation 689 "cortex_a57_neon_store_complex" 0 690 (and (eq_attr "tune" "cortexa57") 691 (eq_attr "cortex_a57_neon_type" "neon_store_complex")) 692 "ca57_block*2") 693 694;; Floating-Point Operations. 695 696(define_insn_reservation "cortex_a57_fp_const" 4 697 (and (eq_attr "tune" "cortexa57") 698 (eq_attr "type" "fconsts,fconstd")) 699 "(ca57_cx1|ca57_cx2)") 700 701(define_insn_reservation "cortex_a57_fp_add_sub" 6 702 (and (eq_attr "tune" "cortexa57") 703 (eq_attr "type" "fadds,faddd")) 704 "(ca57_cx1|ca57_cx2)") 705 706(define_insn_reservation "cortex_a57_fp_mul" 6 707 (and (eq_attr "tune" "cortexa57") 708 (eq_attr "type" "fmuls,fmuld")) 709 "(ca57_cx1|ca57_cx2)") 710 711(define_insn_reservation "cortex_a57_fp_mac" 10 712 (and (eq_attr "tune" "cortexa57") 713 (eq_attr "type" "fmacs,ffmas,fmacd,ffmad")) 714 "(ca57_cx1,nothing,nothing,ca57_cx1) \ 715 |(ca57_cx2,nothing,nothing,ca57_cx2)") 716 717(define_insn_reservation "cortex_a57_fp_cvt" 6 718 (and (eq_attr "tune" "cortexa57") 719 (eq_attr "type" "f_cvt,f_cvtf2i,f_cvti2f")) 720 "(ca57_cx1|ca57_cx2)") 721 722(define_insn_reservation "cortex_a57_fp_cmp" 7 723 (and (eq_attr "tune" "cortexa57") 724 (eq_attr "type" "fcmps,fcmpd,fccmps,fccmpd")) 725 "ca57_cx2") 726 727(define_insn_reservation "cortex_a57_fp_arith" 4 728 (and (eq_attr "tune" "cortexa57") 729 (eq_attr "type" "ffariths,ffarithd")) 730 "(ca57_cx1|ca57_cx2)") 731 732(define_insn_reservation "cortex_a57_fp_cpys" 4 733 (and (eq_attr "tune" "cortexa57") 734 (eq_attr "type" "fmov,fcsel")) 735 "(ca57_cx1|ca57_cx2)") 736 737(define_insn_reservation "cortex_a57_fp_divs" 12 738 (and (eq_attr "tune" "cortexa57") 739 (eq_attr "type" "fdivs, fsqrts,\ 740 neon_fp_div_s, neon_fp_sqrt_s")) 741 "ca57_cx2_block*5") 742 743(define_insn_reservation "cortex_a57_fp_divd" 16 744 (and (eq_attr "tune" "cortexa57") 745 (eq_attr "type" "fdivd, fsqrtd, neon_fp_div_d, neon_fp_sqrt_d")) 746 "ca57_cx2_block*3") 747 748(define_insn_reservation "cortex_a57_neon_fp_div_q" 20 749 (and (eq_attr "tune" "cortexa57") 750 (eq_attr "type" "fdivd, fsqrtd,\ 751 neon_fp_div_s_q, neon_fp_div_d_q,\ 752 neon_fp_sqrt_s_q, neon_fp_sqrt_d_q")) 753 "ca57_cx2_block*3") 754 755(define_insn_reservation "cortex_a57_crypto_simple" 3 756 (and (eq_attr "tune" "cortexa57") 757 (eq_attr "type" "crypto_aese,crypto_aesmc,crypto_sha1_fast,crypto_sha256_fast")) 758 "ca57_cx1") 759 760(define_insn_reservation "cortex_a57_crypto_complex" 6 761 (and (eq_attr "tune" "cortexa57") 762 (eq_attr "type" "crypto_sha1_slow,crypto_sha256_slow")) 763 "ca57_cx1*2") 764 765(define_insn_reservation "cortex_a57_crypto_xor" 6 766 (and (eq_attr "tune" "cortexa57") 767 (eq_attr "type" "crypto_sha1_xor")) 768 "(ca57_cx1*2)|(ca57_cx2*2)") 769 770;; We lie with calls. They take up all issue slots, but are otherwise 771;; not harmful. 772(define_insn_reservation "cortex_a57_call" 1 773 (and (eq_attr "tune" "cortexa57") 774 (eq_attr "type" "call")) 775 "ca57_sx1_issue+ca57_sx2_issue+ca57_cx1_issue+ca57_cx2_issue\ 776 +ca57_mx_issue+ca57_bx_issue+ca57_ls_issue" 777) 778 779;; Simple execution unit bypasses 780(define_bypass 1 "cortex_a57_alu" 781 "cortex_a57_alu,cortex_a57_alu_shift,cortex_a57_alu_shift_reg") 782(define_bypass 2 "cortex_a57_alu_shift" 783 "cortex_a57_alu,cortex_a57_alu_shift,cortex_a57_alu_shift_reg") 784(define_bypass 2 "cortex_a57_alu_shift_reg" 785 "cortex_a57_alu,cortex_a57_alu_shift,cortex_a57_alu_shift_reg") 786(define_bypass 1 "cortex_a57_alu" "cortex_a57_load1,cortex_a57_load3") 787(define_bypass 2 "cortex_a57_alu_shift" "cortex_a57_load1,cortex_a57_load3") 788(define_bypass 2 "cortex_a57_alu_shift_reg" 789 "cortex_a57_load1,cortex_a57_load3") 790 791;; An MLA or a MUL can feed a dependent MLA. 792(define_bypass 5 "cortex_a57_neon_*mla*,cortex_a57_neon_*mul*" 793 "cortex_a57_neon_*mla*") 794 795(define_bypass 5 "cortex_a57_fp_mul,cortex_a57_fp_mac" 796 "cortex_a57_fp_mac") 797 798;; We don't need to care about control hazards, either the branch is 799;; predicted in which case we pay no penalty, or the branch is 800;; mispredicted in which case instruction scheduling will be unlikely to 801;; help. 802(define_bypass 1 "cortex_a57_*" 803 "cortex_a57_call,cortex_a57_branch") 804