1*38fd1498Szrj /* Scheduler hooks for IA-32 which implement bdver1-4 specific logic.
2*38fd1498Szrj Copyright (C) 1988-2018 Free Software Foundation, Inc.
3*38fd1498Szrj
4*38fd1498Szrj This file is part of GCC.
5*38fd1498Szrj
6*38fd1498Szrj GCC is free software; you can redistribute it and/or modify
7*38fd1498Szrj it under the terms of the GNU General Public License as published by
8*38fd1498Szrj the Free Software Foundation; either version 3, or (at your option)
9*38fd1498Szrj any later version.
10*38fd1498Szrj
11*38fd1498Szrj GCC is distributed in the hope that it will be useful,
12*38fd1498Szrj but WITHOUT ANY WARRANTY; without even the implied warranty of
13*38fd1498Szrj MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14*38fd1498Szrj GNU General Public License for more details.
15*38fd1498Szrj
16*38fd1498Szrj You should have received a copy of the GNU General Public License
17*38fd1498Szrj along with GCC; see the file COPYING3. If not see
18*38fd1498Szrj <http://www.gnu.org/licenses/>. */
19*38fd1498Szrj
20*38fd1498Szrj #define IN_TARGET_CODE 1
21*38fd1498Szrj
22*38fd1498Szrj #include "config.h"
23*38fd1498Szrj #include "system.h"
24*38fd1498Szrj #include "coretypes.h"
25*38fd1498Szrj #include "backend.h"
26*38fd1498Szrj #include "rtl.h"
27*38fd1498Szrj #include "tree.h"
28*38fd1498Szrj #include "cfghooks.h"
29*38fd1498Szrj #include "tm_p.h"
30*38fd1498Szrj #include "insn-config.h"
31*38fd1498Szrj #include "insn-attr.h"
32*38fd1498Szrj #include "recog.h"
33*38fd1498Szrj #include "target.h"
34*38fd1498Szrj #include "rtl-iter.h"
35*38fd1498Szrj #include "regset.h"
36*38fd1498Szrj #include "sched-int.h"
37*38fd1498Szrj
38*38fd1498Szrj
39*38fd1498Szrj /* Model decoder of Core 2/i7.
40*38fd1498Szrj Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
41*38fd1498Szrj track the instruction fetch block boundaries and make sure that long
42*38fd1498Szrj (9+ bytes) instructions are assigned to D0. */
43*38fd1498Szrj
44*38fd1498Szrj /* Maximum length of an insn that can be handled by
45*38fd1498Szrj a secondary decoder unit. '8' for Core 2/i7. */
46*38fd1498Szrj static int core2i7_secondary_decoder_max_insn_size;
47*38fd1498Szrj
48*38fd1498Szrj /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
49*38fd1498Szrj '16' for Core 2/i7. */
50*38fd1498Szrj static int core2i7_ifetch_block_size;
51*38fd1498Szrj
52*38fd1498Szrj /* Maximum number of instructions decoder can handle per cycle.
53*38fd1498Szrj '6' for Core 2/i7. */
54*38fd1498Szrj static int core2i7_ifetch_block_max_insns;
55*38fd1498Szrj
56*38fd1498Szrj typedef struct ix86_first_cycle_multipass_data_ *
57*38fd1498Szrj ix86_first_cycle_multipass_data_t;
58*38fd1498Szrj typedef const struct ix86_first_cycle_multipass_data_ *
59*38fd1498Szrj const_ix86_first_cycle_multipass_data_t;
60*38fd1498Szrj
61*38fd1498Szrj /* A variable to store target state across calls to max_issue within
62*38fd1498Szrj one cycle. */
63*38fd1498Szrj static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data,
64*38fd1498Szrj *ix86_first_cycle_multipass_data = &_ix86_first_cycle_multipass_data;
65*38fd1498Szrj
66*38fd1498Szrj /* Initialize DATA. */
67*38fd1498Szrj static void
core2i7_first_cycle_multipass_init(void * _data)68*38fd1498Szrj core2i7_first_cycle_multipass_init (void *_data)
69*38fd1498Szrj {
70*38fd1498Szrj ix86_first_cycle_multipass_data_t data
71*38fd1498Szrj = (ix86_first_cycle_multipass_data_t) _data;
72*38fd1498Szrj
73*38fd1498Szrj data->ifetch_block_len = 0;
74*38fd1498Szrj data->ifetch_block_n_insns = 0;
75*38fd1498Szrj data->ready_try_change = NULL;
76*38fd1498Szrj data->ready_try_change_size = 0;
77*38fd1498Szrj }
78*38fd1498Szrj
79*38fd1498Szrj /* Advancing the cycle; reset ifetch block counts. */
80*38fd1498Szrj static void
core2i7_dfa_post_advance_cycle(void)81*38fd1498Szrj core2i7_dfa_post_advance_cycle (void)
82*38fd1498Szrj {
83*38fd1498Szrj ix86_first_cycle_multipass_data_t data = ix86_first_cycle_multipass_data;
84*38fd1498Szrj
85*38fd1498Szrj gcc_assert (data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
86*38fd1498Szrj
87*38fd1498Szrj data->ifetch_block_len = 0;
88*38fd1498Szrj data->ifetch_block_n_insns = 0;
89*38fd1498Szrj }
90*38fd1498Szrj
91*38fd1498Szrj /* Filter out insns from ready_try that the core will not be able to issue
92*38fd1498Szrj on current cycle due to decoder. */
93*38fd1498Szrj static void
core2i7_first_cycle_multipass_filter_ready_try(const_ix86_first_cycle_multipass_data_t data,signed char * ready_try,int n_ready,bool first_cycle_insn_p)94*38fd1498Szrj core2i7_first_cycle_multipass_filter_ready_try
95*38fd1498Szrj (const_ix86_first_cycle_multipass_data_t data,
96*38fd1498Szrj signed char *ready_try, int n_ready, bool first_cycle_insn_p)
97*38fd1498Szrj {
98*38fd1498Szrj while (n_ready--)
99*38fd1498Szrj {
100*38fd1498Szrj rtx_insn *insn;
101*38fd1498Szrj int insn_size;
102*38fd1498Szrj
103*38fd1498Szrj if (ready_try[n_ready])
104*38fd1498Szrj continue;
105*38fd1498Szrj
106*38fd1498Szrj insn = get_ready_element (n_ready);
107*38fd1498Szrj insn_size = ix86_min_insn_size (insn);
108*38fd1498Szrj
109*38fd1498Szrj if (/* If this is a too long an insn for a secondary decoder ... */
110*38fd1498Szrj (!first_cycle_insn_p
111*38fd1498Szrj && insn_size > core2i7_secondary_decoder_max_insn_size)
112*38fd1498Szrj /* ... or it would not fit into the ifetch block ... */
113*38fd1498Szrj || data->ifetch_block_len + insn_size > core2i7_ifetch_block_size
114*38fd1498Szrj /* ... or the decoder is full already ... */
115*38fd1498Szrj || data->ifetch_block_n_insns + 1 > core2i7_ifetch_block_max_insns)
116*38fd1498Szrj /* ... mask the insn out. */
117*38fd1498Szrj {
118*38fd1498Szrj ready_try[n_ready] = 1;
119*38fd1498Szrj
120*38fd1498Szrj if (data->ready_try_change)
121*38fd1498Szrj bitmap_set_bit (data->ready_try_change, n_ready);
122*38fd1498Szrj }
123*38fd1498Szrj }
124*38fd1498Szrj }
125*38fd1498Szrj
126*38fd1498Szrj /* Prepare for a new round of multipass lookahead scheduling. */
127*38fd1498Szrj static void
core2i7_first_cycle_multipass_begin(void * _data,signed char * ready_try,int n_ready,bool first_cycle_insn_p)128*38fd1498Szrj core2i7_first_cycle_multipass_begin (void *_data,
129*38fd1498Szrj signed char *ready_try, int n_ready,
130*38fd1498Szrj bool first_cycle_insn_p)
131*38fd1498Szrj {
132*38fd1498Szrj ix86_first_cycle_multipass_data_t data
133*38fd1498Szrj = (ix86_first_cycle_multipass_data_t) _data;
134*38fd1498Szrj const_ix86_first_cycle_multipass_data_t prev_data
135*38fd1498Szrj = ix86_first_cycle_multipass_data;
136*38fd1498Szrj
137*38fd1498Szrj /* Restore the state from the end of the previous round. */
138*38fd1498Szrj data->ifetch_block_len = prev_data->ifetch_block_len;
139*38fd1498Szrj data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns;
140*38fd1498Szrj
141*38fd1498Szrj /* Filter instructions that cannot be issued on current cycle due to
142*38fd1498Szrj decoder restrictions. */
143*38fd1498Szrj core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
144*38fd1498Szrj first_cycle_insn_p);
145*38fd1498Szrj }
146*38fd1498Szrj
147*38fd1498Szrj /* INSN is being issued in current solution. Account for its impact on
148*38fd1498Szrj the decoder model. */
149*38fd1498Szrj static void
core2i7_first_cycle_multipass_issue(void * _data,signed char * ready_try,int n_ready,rtx_insn * insn,const void * _prev_data)150*38fd1498Szrj core2i7_first_cycle_multipass_issue (void *_data,
151*38fd1498Szrj signed char *ready_try, int n_ready,
152*38fd1498Szrj rtx_insn *insn, const void *_prev_data)
153*38fd1498Szrj {
154*38fd1498Szrj ix86_first_cycle_multipass_data_t data
155*38fd1498Szrj = (ix86_first_cycle_multipass_data_t) _data;
156*38fd1498Szrj const_ix86_first_cycle_multipass_data_t prev_data
157*38fd1498Szrj = (const_ix86_first_cycle_multipass_data_t) _prev_data;
158*38fd1498Szrj
159*38fd1498Szrj int insn_size = ix86_min_insn_size (insn);
160*38fd1498Szrj
161*38fd1498Szrj data->ifetch_block_len = prev_data->ifetch_block_len + insn_size;
162*38fd1498Szrj data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns + 1;
163*38fd1498Szrj gcc_assert (data->ifetch_block_len <= core2i7_ifetch_block_size
164*38fd1498Szrj && data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
165*38fd1498Szrj
166*38fd1498Szrj /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
167*38fd1498Szrj if (!data->ready_try_change)
168*38fd1498Szrj {
169*38fd1498Szrj data->ready_try_change = sbitmap_alloc (n_ready);
170*38fd1498Szrj data->ready_try_change_size = n_ready;
171*38fd1498Szrj }
172*38fd1498Szrj else if (data->ready_try_change_size < n_ready)
173*38fd1498Szrj {
174*38fd1498Szrj data->ready_try_change = sbitmap_resize (data->ready_try_change,
175*38fd1498Szrj n_ready, 0);
176*38fd1498Szrj data->ready_try_change_size = n_ready;
177*38fd1498Szrj }
178*38fd1498Szrj bitmap_clear (data->ready_try_change);
179*38fd1498Szrj
180*38fd1498Szrj /* Filter out insns from ready_try that the core will not be able to issue
181*38fd1498Szrj on current cycle due to decoder. */
182*38fd1498Szrj core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
183*38fd1498Szrj false);
184*38fd1498Szrj }
185*38fd1498Szrj
186*38fd1498Szrj /* Revert the effect on ready_try. */
187*38fd1498Szrj static void
core2i7_first_cycle_multipass_backtrack(const void * _data,signed char * ready_try,int n_ready ATTRIBUTE_UNUSED)188*38fd1498Szrj core2i7_first_cycle_multipass_backtrack (const void *_data,
189*38fd1498Szrj signed char *ready_try,
190*38fd1498Szrj int n_ready ATTRIBUTE_UNUSED)
191*38fd1498Szrj {
192*38fd1498Szrj const_ix86_first_cycle_multipass_data_t data
193*38fd1498Szrj = (const_ix86_first_cycle_multipass_data_t) _data;
194*38fd1498Szrj unsigned int i = 0;
195*38fd1498Szrj sbitmap_iterator sbi;
196*38fd1498Szrj
197*38fd1498Szrj gcc_assert (bitmap_last_set_bit (data->ready_try_change) < n_ready);
198*38fd1498Szrj EXECUTE_IF_SET_IN_BITMAP (data->ready_try_change, 0, i, sbi)
199*38fd1498Szrj {
200*38fd1498Szrj ready_try[i] = 0;
201*38fd1498Szrj }
202*38fd1498Szrj }
203*38fd1498Szrj
204*38fd1498Szrj /* Save the result of multipass lookahead scheduling for the next round. */
205*38fd1498Szrj static void
core2i7_first_cycle_multipass_end(const void * _data)206*38fd1498Szrj core2i7_first_cycle_multipass_end (const void *_data)
207*38fd1498Szrj {
208*38fd1498Szrj const_ix86_first_cycle_multipass_data_t data
209*38fd1498Szrj = (const_ix86_first_cycle_multipass_data_t) _data;
210*38fd1498Szrj ix86_first_cycle_multipass_data_t next_data
211*38fd1498Szrj = ix86_first_cycle_multipass_data;
212*38fd1498Szrj
213*38fd1498Szrj if (data != NULL)
214*38fd1498Szrj {
215*38fd1498Szrj next_data->ifetch_block_len = data->ifetch_block_len;
216*38fd1498Szrj next_data->ifetch_block_n_insns = data->ifetch_block_n_insns;
217*38fd1498Szrj }
218*38fd1498Szrj }
219*38fd1498Szrj
220*38fd1498Szrj /* Deallocate target data. */
221*38fd1498Szrj static void
core2i7_first_cycle_multipass_fini(void * _data)222*38fd1498Szrj core2i7_first_cycle_multipass_fini (void *_data)
223*38fd1498Szrj {
224*38fd1498Szrj ix86_first_cycle_multipass_data_t data
225*38fd1498Szrj = (ix86_first_cycle_multipass_data_t) _data;
226*38fd1498Szrj
227*38fd1498Szrj if (data->ready_try_change)
228*38fd1498Szrj {
229*38fd1498Szrj sbitmap_free (data->ready_try_change);
230*38fd1498Szrj data->ready_try_change = NULL;
231*38fd1498Szrj data->ready_try_change_size = 0;
232*38fd1498Szrj }
233*38fd1498Szrj }
234*38fd1498Szrj
235*38fd1498Szrj void
ix86_core2i7_init_hooks(void)236*38fd1498Szrj ix86_core2i7_init_hooks (void)
237*38fd1498Szrj {
238*38fd1498Szrj targetm.sched.dfa_post_advance_cycle
239*38fd1498Szrj = core2i7_dfa_post_advance_cycle;
240*38fd1498Szrj targetm.sched.first_cycle_multipass_init
241*38fd1498Szrj = core2i7_first_cycle_multipass_init;
242*38fd1498Szrj targetm.sched.first_cycle_multipass_begin
243*38fd1498Szrj = core2i7_first_cycle_multipass_begin;
244*38fd1498Szrj targetm.sched.first_cycle_multipass_issue
245*38fd1498Szrj = core2i7_first_cycle_multipass_issue;
246*38fd1498Szrj targetm.sched.first_cycle_multipass_backtrack
247*38fd1498Szrj = core2i7_first_cycle_multipass_backtrack;
248*38fd1498Szrj targetm.sched.first_cycle_multipass_end
249*38fd1498Szrj = core2i7_first_cycle_multipass_end;
250*38fd1498Szrj targetm.sched.first_cycle_multipass_fini
251*38fd1498Szrj = core2i7_first_cycle_multipass_fini;
252*38fd1498Szrj
253*38fd1498Szrj /* Set decoder parameters. */
254*38fd1498Szrj core2i7_secondary_decoder_max_insn_size = 8;
255*38fd1498Szrj core2i7_ifetch_block_size = 16;
256*38fd1498Szrj core2i7_ifetch_block_max_insns = 6;
257*38fd1498Szrj }
258