1;; Scheduling description for UltraSPARC-I/II. 2;; Copyright (C) 2002 Free Software Foundation, Inc. 3;; 4;; This file is part of GNU CC. 5;; 6;; GNU CC is free software; you can redistribute it and/or modify 7;; it under the terms of the GNU General Public License as published by 8;; the Free Software Foundation; either version 2, or (at your option) 9;; any later version. 10;; 11;; GNU CC is distributed in the hope that it will be useful, 12;; but WITHOUT ANY WARRANTY; without even the implied warranty of 13;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14;; GNU General Public License for more details. 15;; 16;; You should have received a copy of the GNU General Public License 17;; along with GNU CC; see the file COPYING. If not, write to 18;; the Free Software Foundation, 59 Temple Place - Suite 330, 19;; Boston, MA 02111-1307, USA. 20 21;; UltraSPARC-I and II are quad-issue processors. Interesting features 22;; to note: 23;; 24;; - Buffered loads, they can queue waiting for the actual data until 25;; an instruction actually tries to reference the destination register 26;; as an input 27;; - Two integer units. Only one of them can do shifts, and the other 28;; is the only one which may do condition code setting instructions. 29;; Complicating things further, a shift may go only into the first 30;; slot in a dispatched group. And if you have a non-condition code 31;; setting instruction and one that does set the condition codes. The 32;; former must be issued first in order for both of them to issue. 33;; - Stores can issue before the value being stored is available. As long 34;; as the input data becomes ready before the store is to move out of the 35;; store buffer, it will not cause a stall. 36;; - Branches may issue in the same cycle as an instruction setting the 37;; condition codes being tested by that branch. This does not apply 38;; to floating point, only integer. 39 40(define_automaton "ultrasparc_0,ultrasparc_1") 41 42(define_cpu_unit "us1_fdivider,us1_fpm" "ultrasparc_0"); 43(define_cpu_unit "us1_fpa,us1_load_writeback" "ultrasparc_1") 44(define_cpu_unit "us1_fps_0,us1_fps_1,us1_fpd_0,us1_fpd_1" "ultrasparc_1") 45(define_cpu_unit "us1_slot0,us1_slot1,us1_slot2,us1_slot3" "ultrasparc_1") 46(define_cpu_unit "us1_ieu0,us1_ieu1,us1_cti,us1_lsu" "ultrasparc_1") 47 48(define_reservation "us1_slot012" "(us1_slot0 | us1_slot1 | us1_slot2)") 49(define_reservation "us1_slotany" "(us1_slot0 | us1_slot1 | us1_slot2 | us1_slot3)") 50(define_reservation "us1_single_issue" "us1_slot0 + us1_slot1 + us1_slot2 + us1_slot3") 51 52(define_reservation "us1_fp_single" "(us1_fps_0 | us1_fps_1)") 53(define_reservation "us1_fp_double" "(us1_fpd_0 | us1_fpd_1)") 54 55;; This is a simplified representation of the issue at hand. 56;; For most cases, going from one FP precision type insn to another 57;; just breaks up the insn group. However for some cases, such 58;; a situation causes the second insn to stall 2 more cycles. 59(exclusion_set "us1_fps_0,us1_fps_1" "us1_fpd_0,us1_fpd_1") 60 61;; If we have to schedule an ieu1 specific instruction and we want 62;; to reserve the ieu0 unit as well, we must reserve it first. So for 63;; example we could not schedule this sequence: 64;; COMPARE IEU1 65;; IALU IEU0 66;; but we could schedule them together like this: 67;; IALU IEU0 68;; COMPARE IEU1 69;; This basically requires that ieu0 is reserved before ieu1 when 70;; it is required that both be reserved. 71(absence_set "us1_ieu0" "us1_ieu1") 72 73;; This defines the slotting order. Most IEU instructions can only 74;; execute in the first three slots, FPU and branches can go into 75;; any slot. We represent instructions which "break the group" 76;; as requiring reservation of us1_slot0. 77(absence_set "us1_slot0" "us1_slot1,us1_slot2,us1_slot3") 78(absence_set "us1_slot1" "us1_slot2,us1_slot3") 79(absence_set "us1_slot2" "us1_slot3") 80 81(define_insn_reservation "us1_single" 1 82 (and (eq_attr "cpu" "ultrasparc") 83 (eq_attr "type" "multi,flushw,iflush,trap")) 84 "us1_single_issue") 85 86(define_insn_reservation "us1_simple_ieuN" 1 87 (and (eq_attr "cpu" "ultrasparc") 88 (eq_attr "type" "ialu")) 89 "(us1_ieu0 | us1_ieu1) + us1_slot012") 90 91(define_insn_reservation "us1_simple_ieu0" 1 92 (and (eq_attr "cpu" "ultrasparc") 93 (eq_attr "type" "shift")) 94 "us1_ieu0 + us1_slot012") 95 96(define_insn_reservation "us1_simple_ieu1" 1 97 (and (eq_attr "cpu" "ultrasparc") 98 (eq_attr "type" "compare")) 99 "us1_ieu1 + us1_slot012") 100 101(define_insn_reservation "us1_ialuX" 1 102 (and (eq_attr "cpu" "ultrasparc") 103 (eq_attr "type" "ialuX")) 104 "us1_single_issue") 105 106(define_insn_reservation "us1_cmove" 2 107 (and (eq_attr "cpu" "ultrasparc") 108 (eq_attr "type" "cmove")) 109 "us1_single_issue, nothing") 110 111(define_insn_reservation "us1_imul" 1 112 (and (eq_attr "cpu" "ultrasparc") 113 (eq_attr "type" "imul")) 114 "us1_single_issue") 115 116(define_insn_reservation "us1_idiv" 1 117 (and (eq_attr "cpu" "ultrasparc") 118 (eq_attr "type" "idiv")) 119 "us1_single_issue") 120 121;; For loads, the "delayed return mode" behavior of the chip 122;; is represented using the us1_load_writeback resource. 123(define_insn_reservation "us1_load" 2 124 (and (eq_attr "cpu" "ultrasparc") 125 (eq_attr "type" "load,fpload")) 126 "us1_lsu + us1_slot012, us1_load_writeback") 127 128(define_insn_reservation "us1_load_signed" 3 129 (and (eq_attr "cpu" "ultrasparc") 130 (eq_attr "type" "sload")) 131 "us1_lsu + us1_slot012, nothing, us1_load_writeback") 132 133(define_insn_reservation "us1_store" 1 134 (and (eq_attr "cpu" "ultrasparc") 135 (eq_attr "type" "store,fpstore")) 136 "us1_lsu + us1_slot012") 137 138(define_insn_reservation "us1_branch" 1 139 (and (eq_attr "cpu" "ultrasparc") 140 (eq_attr "type" "branch")) 141 "us1_cti + us1_slotany") 142 143(define_insn_reservation "us1_call_jmpl" 1 144 (and (eq_attr "cpu" "ultrasparc") 145 (eq_attr "type" "call,sibcall,call_no_delay_slot,uncond_branch")) 146 "us1_cti + us1_ieu1 + us1_slot0") 147 148(define_insn_reservation "us1_fmov_single" 1 149 (and (and (eq_attr "cpu" "ultrasparc") 150 (eq_attr "type" "fpmove")) 151 (eq_attr "fptype" "single")) 152 "us1_fpa + us1_fp_single + us1_slotany") 153 154(define_insn_reservation "us1_fmov_double" 1 155 (and (and (eq_attr "cpu" "ultrasparc") 156 (eq_attr "type" "fpmove")) 157 (eq_attr "fptype" "double")) 158 "us1_fpa + us1_fp_double + us1_slotany") 159 160(define_insn_reservation "us1_fcmov_single" 2 161 (and (and (eq_attr "cpu" "ultrasparc") 162 (eq_attr "type" "fpcmove,fpcrmove")) 163 (eq_attr "fptype" "single")) 164 "us1_fpa + us1_fp_single + us1_slotany, nothing") 165 166(define_insn_reservation "us1_fcmov_double" 2 167 (and (and (eq_attr "cpu" "ultrasparc") 168 (eq_attr "type" "fpcmove,fpcrmove")) 169 (eq_attr "fptype" "double")) 170 "us1_fpa + us1_fp_double + us1_slotany, nothing") 171 172(define_insn_reservation "us1_faddsub_single" 4 173 (and (and (eq_attr "cpu" "ultrasparc") 174 (eq_attr "type" "fp")) 175 (eq_attr "fptype" "single")) 176 "us1_fpa + us1_fp_single + us1_slotany, nothing*3") 177 178(define_insn_reservation "us1_faddsub_double" 4 179 (and (and (eq_attr "cpu" "ultrasparc") 180 (eq_attr "type" "fp")) 181 (eq_attr "fptype" "double")) 182 "us1_fpa + us1_fp_double + us1_slotany, nothing*3") 183 184(define_insn_reservation "us1_fpcmp_single" 1 185 (and (and (eq_attr "cpu" "ultrasparc") 186 (eq_attr "type" "fpcmp")) 187 (eq_attr "fptype" "single")) 188 "us1_fpa + us1_fp_single + us1_slotany") 189 190(define_insn_reservation "us1_fpcmp_double" 1 191 (and (and (eq_attr "cpu" "ultrasparc") 192 (eq_attr "type" "fpcmp")) 193 (eq_attr "fptype" "double")) 194 "us1_fpa + us1_fp_double + us1_slotany") 195 196(define_insn_reservation "us1_fmult_single" 4 197 (and (and (eq_attr "cpu" "ultrasparc") 198 (eq_attr "type" "fpmul")) 199 (eq_attr "fptype" "single")) 200 "us1_fpm + us1_fp_single + us1_slotany, nothing*3") 201 202(define_insn_reservation "us1_fmult_double" 4 203 (and (and (eq_attr "cpu" "ultrasparc") 204 (eq_attr "type" "fpmul")) 205 (eq_attr "fptype" "double")) 206 "us1_fpm + us1_fp_double + us1_slotany, nothing*3") 207 208;; This is actually in theory dangerous, because it is possible 209;; for the chip to prematurely dispatch the dependent instruction 210;; in the G stage, resulting in a 9 cycle stall. However I have never 211;; been able to trigger this case myself even with hand written code, 212;; so it must require some rare complicated pipeline state. 213(define_bypass 3 214 "us1_faddsub_single,us1_faddsub_double,us1_fmult_single,us1_fmult_double" 215 "us1_faddsub_single,us1_faddsub_double,us1_fmult_single,us1_fmult_double") 216 217;; Floating point divide and square root use the multiplier unit 218;; for final rounding 3 cycles before the divide/sqrt is complete. 219 220(define_insn_reservation "us1_fdivs" 221 13 222 (and (eq_attr "cpu" "ultrasparc") 223 (eq_attr "type" "fpdivs,fpsqrts")) 224 "(us1_fpm + us1_fdivider + us1_slot0), us1_fdivider*8, (us1_fpm + us1_fdivider), us1_fdivider*2" 225 ) 226 227(define_bypass 228 12 229 "us1_fdivs" 230 "us1_faddsub_single,us1_faddsub_double,us1_fmult_single,us1_fmult_double") 231 232(define_insn_reservation "us1_fdivd" 233 23 234 (and (eq_attr "cpu" "ultrasparc") 235 (eq_attr "type" "fpdivd,fpsqrtd")) 236 "(us1_fpm + us1_fdivider + us1_slot0), us1_fdivider*18, (us1_fpm + us1_fdivider), us1_fdivider*2" 237 ) 238(define_bypass 239 22 240 "us1_fdivd" 241 "us1_faddsub_single,us1_faddsub_double,us1_fmult_single,us1_fmult_double") 242 243;; Any store may multi issue with the insn creating the source 244;; data as long as that creating insn is not an FPU div/sqrt. 245;; We need a special guard function because this bypass does 246;; not apply to the address inputs of the store. 247(define_bypass 0 "us1_simple_ieuN,us1_simple_ieu1,us1_simple_ieu0,us1_faddsub_single,us1_faddsub_double,us1_fmov_single,us1_fmov_double,us1_fcmov_single,us1_fcmov_double,us1_fmult_single,us1_fmult_double" "us1_store" 248 "store_data_bypass_p") 249 250;; An integer branch may execute in the same cycle as the compare 251;; creating the condition codes. 252(define_bypass 0 "us1_simple_ieu1" "us1_branch") 253