xref: /openbsd-src/gnu/usr.bin/gcc/gcc/config/sparc/ultra1_2.md (revision c87b03e512fc05ed6e0222f6fb0ae86264b1d05b)
1;; Scheduling description for UltraSPARC-I/II.
2;;   Copyright (C) 2002 Free Software Foundation, Inc.
3;;
4;; This file is part of GNU CC.
5;;
6;; GNU CC is free software; you can redistribute it and/or modify
7;; it under the terms of the GNU General Public License as published by
8;; the Free Software Foundation; either version 2, or (at your option)
9;; any later version.
10;;
11;; GNU CC is distributed in the hope that it will be useful,
12;; but WITHOUT ANY WARRANTY; without even the implied warranty of
13;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14;; GNU General Public License for more details.
15;;
16;; You should have received a copy of the GNU General Public License
17;; along with GNU CC; see the file COPYING.  If not, write to
18;; the Free Software Foundation, 59 Temple Place - Suite 330,
19;; Boston, MA 02111-1307, USA.
20
21;; UltraSPARC-I and II are quad-issue processors.  Interesting features
22;; to note:
23;;
24;; - Buffered loads, they can queue waiting for the actual data until
25;;   an instruction actually tries to reference the destination register
26;;   as an input
27;; - Two integer units.  Only one of them can do shifts, and the other
28;;   is the only one which may do condition code setting instructions.
29;;   Complicating things further, a shift may go only into the first
30;;   slot in a dispatched group.  And if you have a non-condition code
31;;   setting instruction and one that does set the condition codes.  The
32;;   former must be issued first in order for both of them to issue.
33;; - Stores can issue before the value being stored is available.  As long
34;;   as the input data becomes ready before the store is to move out of the
35;;   store buffer, it will not cause a stall.
36;; - Branches may issue in the same cycle as an instruction setting the
37;;   condition codes being tested by that branch.  This does not apply
38;;   to floating point, only integer.
39
40(define_automaton "ultrasparc_0,ultrasparc_1")
41
42(define_cpu_unit "us1_fdivider,us1_fpm" "ultrasparc_0");
43(define_cpu_unit "us1_fpa,us1_load_writeback" "ultrasparc_1")
44(define_cpu_unit "us1_fps_0,us1_fps_1,us1_fpd_0,us1_fpd_1" "ultrasparc_1")
45(define_cpu_unit "us1_slot0,us1_slot1,us1_slot2,us1_slot3" "ultrasparc_1")
46(define_cpu_unit "us1_ieu0,us1_ieu1,us1_cti,us1_lsu" "ultrasparc_1")
47
48(define_reservation "us1_slot012" "(us1_slot0 | us1_slot1 | us1_slot2)")
49(define_reservation "us1_slotany" "(us1_slot0 | us1_slot1 | us1_slot2 | us1_slot3)")
50(define_reservation "us1_single_issue" "us1_slot0 + us1_slot1 + us1_slot2 + us1_slot3")
51
52(define_reservation "us1_fp_single" "(us1_fps_0 | us1_fps_1)")
53(define_reservation "us1_fp_double" "(us1_fpd_0 | us1_fpd_1)")
54
55;; This is a simplified representation of the issue at hand.
56;; For most cases, going from one FP precision type insn to another
57;; just breaks up the insn group.  However for some cases, such
58;; a situation causes the second insn to stall 2 more cycles.
59(exclusion_set "us1_fps_0,us1_fps_1" "us1_fpd_0,us1_fpd_1")
60
61;; If we have to schedule an ieu1 specific instruction and we want
62;; to reserve the ieu0 unit as well, we must reserve it first.  So for
63;; example we could not schedule this sequence:
64;;	COMPARE		IEU1
65;;	IALU		IEU0
66;; but we could schedule them together like this:
67;;	IALU		IEU0
68;;	COMPARE		IEU1
69;; This basically requires that ieu0 is reserved before ieu1 when
70;; it is required that both be reserved.
71(absence_set "us1_ieu0" "us1_ieu1")
72
73;; This defines the slotting order.  Most IEU instructions can only
74;; execute in the first three slots, FPU and branches can go into
75;; any slot.  We represent instructions which "break the group"
76;; as requiring reservation of us1_slot0.
77(absence_set "us1_slot0" "us1_slot1,us1_slot2,us1_slot3")
78(absence_set "us1_slot1" "us1_slot2,us1_slot3")
79(absence_set "us1_slot2" "us1_slot3")
80
81(define_insn_reservation "us1_single" 1
82  (and (eq_attr "cpu" "ultrasparc")
83    (eq_attr "type" "multi,flushw,iflush,trap"))
84  "us1_single_issue")
85
86(define_insn_reservation "us1_simple_ieuN" 1
87  (and (eq_attr "cpu" "ultrasparc")
88    (eq_attr "type" "ialu"))
89  "(us1_ieu0 | us1_ieu1) + us1_slot012")
90
91(define_insn_reservation "us1_simple_ieu0" 1
92  (and (eq_attr "cpu" "ultrasparc")
93    (eq_attr "type" "shift"))
94  "us1_ieu0 + us1_slot012")
95
96(define_insn_reservation "us1_simple_ieu1" 1
97  (and (eq_attr "cpu" "ultrasparc")
98    (eq_attr "type" "compare"))
99  "us1_ieu1 + us1_slot012")
100
101(define_insn_reservation "us1_ialuX" 1
102  (and (eq_attr "cpu" "ultrasparc")
103    (eq_attr "type" "ialuX"))
104  "us1_single_issue")
105
106(define_insn_reservation "us1_cmove" 2
107  (and (eq_attr "cpu" "ultrasparc")
108    (eq_attr "type" "cmove"))
109  "us1_single_issue, nothing")
110
111(define_insn_reservation "us1_imul" 1
112  (and (eq_attr "cpu" "ultrasparc")
113    (eq_attr "type" "imul"))
114  "us1_single_issue")
115
116(define_insn_reservation "us1_idiv" 1
117  (and (eq_attr "cpu" "ultrasparc")
118    (eq_attr "type" "idiv"))
119  "us1_single_issue")
120
121;; For loads, the "delayed return mode" behavior of the chip
122;; is represented using the us1_load_writeback resource.
123(define_insn_reservation "us1_load" 2
124  (and (eq_attr "cpu" "ultrasparc")
125    (eq_attr "type" "load,fpload"))
126  "us1_lsu + us1_slot012, us1_load_writeback")
127
128(define_insn_reservation "us1_load_signed" 3
129  (and (eq_attr "cpu" "ultrasparc")
130    (eq_attr "type" "sload"))
131  "us1_lsu + us1_slot012, nothing, us1_load_writeback")
132
133(define_insn_reservation "us1_store" 1
134  (and (eq_attr "cpu" "ultrasparc")
135    (eq_attr "type" "store,fpstore"))
136  "us1_lsu + us1_slot012")
137
138(define_insn_reservation "us1_branch" 1
139  (and (eq_attr "cpu" "ultrasparc")
140    (eq_attr "type" "branch"))
141  "us1_cti + us1_slotany")
142
143(define_insn_reservation "us1_call_jmpl" 1
144  (and (eq_attr "cpu" "ultrasparc")
145    (eq_attr "type" "call,sibcall,call_no_delay_slot,uncond_branch"))
146  "us1_cti + us1_ieu1 + us1_slot0")
147
148(define_insn_reservation "us1_fmov_single" 1
149  (and (and (eq_attr "cpu" "ultrasparc")
150            (eq_attr "type" "fpmove"))
151       (eq_attr "fptype" "single"))
152  "us1_fpa + us1_fp_single + us1_slotany")
153
154(define_insn_reservation "us1_fmov_double" 1
155  (and (and (eq_attr "cpu" "ultrasparc")
156            (eq_attr "type" "fpmove"))
157       (eq_attr "fptype" "double"))
158  "us1_fpa + us1_fp_double + us1_slotany")
159
160(define_insn_reservation "us1_fcmov_single" 2
161  (and (and (eq_attr "cpu" "ultrasparc")
162            (eq_attr "type" "fpcmove,fpcrmove"))
163       (eq_attr "fptype" "single"))
164  "us1_fpa + us1_fp_single + us1_slotany, nothing")
165
166(define_insn_reservation "us1_fcmov_double" 2
167  (and (and (eq_attr "cpu" "ultrasparc")
168            (eq_attr "type" "fpcmove,fpcrmove"))
169       (eq_attr "fptype" "double"))
170  "us1_fpa + us1_fp_double + us1_slotany, nothing")
171
172(define_insn_reservation "us1_faddsub_single" 4
173  (and (and (eq_attr "cpu" "ultrasparc")
174            (eq_attr "type" "fp"))
175       (eq_attr "fptype" "single"))
176  "us1_fpa + us1_fp_single + us1_slotany, nothing*3")
177
178(define_insn_reservation "us1_faddsub_double" 4
179  (and (and (eq_attr "cpu" "ultrasparc")
180            (eq_attr "type" "fp"))
181       (eq_attr "fptype" "double"))
182  "us1_fpa + us1_fp_double + us1_slotany, nothing*3")
183
184(define_insn_reservation "us1_fpcmp_single" 1
185  (and (and (eq_attr "cpu" "ultrasparc")
186            (eq_attr "type" "fpcmp"))
187       (eq_attr "fptype" "single"))
188  "us1_fpa + us1_fp_single + us1_slotany")
189
190(define_insn_reservation "us1_fpcmp_double" 1
191  (and (and (eq_attr "cpu" "ultrasparc")
192            (eq_attr "type" "fpcmp"))
193       (eq_attr "fptype" "double"))
194  "us1_fpa + us1_fp_double + us1_slotany")
195
196(define_insn_reservation "us1_fmult_single" 4
197  (and (and (eq_attr "cpu" "ultrasparc")
198            (eq_attr "type" "fpmul"))
199       (eq_attr "fptype" "single"))
200  "us1_fpm + us1_fp_single + us1_slotany, nothing*3")
201
202(define_insn_reservation "us1_fmult_double" 4
203  (and (and (eq_attr "cpu" "ultrasparc")
204            (eq_attr "type" "fpmul"))
205       (eq_attr "fptype" "double"))
206  "us1_fpm + us1_fp_double + us1_slotany, nothing*3")
207
208;; This is actually in theory dangerous, because it is possible
209;; for the chip to prematurely dispatch the dependent instruction
210;; in the G stage, resulting in a 9 cycle stall.  However I have never
211;; been able to trigger this case myself even with hand written code,
212;; so it must require some rare complicated pipeline state.
213(define_bypass 3
214   "us1_faddsub_single,us1_faddsub_double,us1_fmult_single,us1_fmult_double"
215   "us1_faddsub_single,us1_faddsub_double,us1_fmult_single,us1_fmult_double")
216
217;; Floating point divide and square root use the multiplier unit
218;; for final rounding 3 cycles before the divide/sqrt is complete.
219
220(define_insn_reservation "us1_fdivs"
221  13
222  (and (eq_attr "cpu" "ultrasparc")
223    (eq_attr "type" "fpdivs,fpsqrts"))
224  "(us1_fpm + us1_fdivider + us1_slot0), us1_fdivider*8, (us1_fpm + us1_fdivider), us1_fdivider*2"
225  )
226
227(define_bypass
228  12
229  "us1_fdivs"
230  "us1_faddsub_single,us1_faddsub_double,us1_fmult_single,us1_fmult_double")
231
232(define_insn_reservation "us1_fdivd"
233  23
234  (and (eq_attr "cpu" "ultrasparc")
235    (eq_attr "type" "fpdivd,fpsqrtd"))
236  "(us1_fpm + us1_fdivider + us1_slot0), us1_fdivider*18, (us1_fpm + us1_fdivider), us1_fdivider*2"
237  )
238(define_bypass
239  22
240  "us1_fdivd"
241  "us1_faddsub_single,us1_faddsub_double,us1_fmult_single,us1_fmult_double")
242
243;; Any store may multi issue with the insn creating the source
244;; data as long as that creating insn is not an FPU div/sqrt.
245;; We need a special guard function because this bypass does
246;; not apply to the address inputs of the store.
247(define_bypass 0 "us1_simple_ieuN,us1_simple_ieu1,us1_simple_ieu0,us1_faddsub_single,us1_faddsub_double,us1_fmov_single,us1_fmov_double,us1_fcmov_single,us1_fcmov_double,us1_fmult_single,us1_fmult_double" "us1_store"
248   "store_data_bypass_p")
249
250;; An integer branch may execute in the same cycle as the compare
251;; creating the condition codes.
252(define_bypass 0 "us1_simple_ieu1" "us1_branch")
253