xref: /dflybsd-src/contrib/gcc-8.0/gcc/config/i386/x86-tune-sched-core.c (revision 38fd149817dfbff97799f62fcb70be98c4e32523)
1*38fd1498Szrj /* Scheduler hooks for IA-32 which implement bdver1-4 specific logic.
2*38fd1498Szrj    Copyright (C) 1988-2018 Free Software Foundation, Inc.
3*38fd1498Szrj 
4*38fd1498Szrj This file is part of GCC.
5*38fd1498Szrj 
6*38fd1498Szrj GCC is free software; you can redistribute it and/or modify
7*38fd1498Szrj it under the terms of the GNU General Public License as published by
8*38fd1498Szrj the Free Software Foundation; either version 3, or (at your option)
9*38fd1498Szrj any later version.
10*38fd1498Szrj 
11*38fd1498Szrj GCC is distributed in the hope that it will be useful,
12*38fd1498Szrj but WITHOUT ANY WARRANTY; without even the implied warranty of
13*38fd1498Szrj MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14*38fd1498Szrj GNU General Public License for more details.
15*38fd1498Szrj 
16*38fd1498Szrj You should have received a copy of the GNU General Public License
17*38fd1498Szrj along with GCC; see the file COPYING3.  If not see
18*38fd1498Szrj <http://www.gnu.org/licenses/>.  */
19*38fd1498Szrj 
20*38fd1498Szrj #define IN_TARGET_CODE 1
21*38fd1498Szrj 
22*38fd1498Szrj #include "config.h"
23*38fd1498Szrj #include "system.h"
24*38fd1498Szrj #include "coretypes.h"
25*38fd1498Szrj #include "backend.h"
26*38fd1498Szrj #include "rtl.h"
27*38fd1498Szrj #include "tree.h"
28*38fd1498Szrj #include "cfghooks.h"
29*38fd1498Szrj #include "tm_p.h"
30*38fd1498Szrj #include "insn-config.h"
31*38fd1498Szrj #include "insn-attr.h"
32*38fd1498Szrj #include "recog.h"
33*38fd1498Szrj #include "target.h"
34*38fd1498Szrj #include "rtl-iter.h"
35*38fd1498Szrj #include "regset.h"
36*38fd1498Szrj #include "sched-int.h"
37*38fd1498Szrj 
38*38fd1498Szrj 
39*38fd1498Szrj /* Model decoder of Core 2/i7.
40*38fd1498Szrj    Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
41*38fd1498Szrj    track the instruction fetch block boundaries and make sure that long
42*38fd1498Szrj    (9+ bytes) instructions are assigned to D0.  */
43*38fd1498Szrj 
44*38fd1498Szrj /* Maximum length of an insn that can be handled by
45*38fd1498Szrj    a secondary decoder unit.  '8' for Core 2/i7.  */
46*38fd1498Szrj static int core2i7_secondary_decoder_max_insn_size;
47*38fd1498Szrj 
48*38fd1498Szrj /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
49*38fd1498Szrj    '16' for Core 2/i7.  */
50*38fd1498Szrj static int core2i7_ifetch_block_size;
51*38fd1498Szrj 
52*38fd1498Szrj /* Maximum number of instructions decoder can handle per cycle.
53*38fd1498Szrj    '6' for Core 2/i7.  */
54*38fd1498Szrj static int core2i7_ifetch_block_max_insns;
55*38fd1498Szrj 
56*38fd1498Szrj typedef struct ix86_first_cycle_multipass_data_ *
57*38fd1498Szrj   ix86_first_cycle_multipass_data_t;
58*38fd1498Szrj typedef const struct ix86_first_cycle_multipass_data_ *
59*38fd1498Szrj   const_ix86_first_cycle_multipass_data_t;
60*38fd1498Szrj 
61*38fd1498Szrj /* A variable to store target state across calls to max_issue within
62*38fd1498Szrj    one cycle.  */
63*38fd1498Szrj static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data,
64*38fd1498Szrj   *ix86_first_cycle_multipass_data = &_ix86_first_cycle_multipass_data;
65*38fd1498Szrj 
66*38fd1498Szrj /* Initialize DATA.  */
67*38fd1498Szrj static void
core2i7_first_cycle_multipass_init(void * _data)68*38fd1498Szrj core2i7_first_cycle_multipass_init (void *_data)
69*38fd1498Szrj {
70*38fd1498Szrj   ix86_first_cycle_multipass_data_t data
71*38fd1498Szrj     = (ix86_first_cycle_multipass_data_t) _data;
72*38fd1498Szrj 
73*38fd1498Szrj   data->ifetch_block_len = 0;
74*38fd1498Szrj   data->ifetch_block_n_insns = 0;
75*38fd1498Szrj   data->ready_try_change = NULL;
76*38fd1498Szrj   data->ready_try_change_size = 0;
77*38fd1498Szrj }
78*38fd1498Szrj 
79*38fd1498Szrj /* Advancing the cycle; reset ifetch block counts.  */
80*38fd1498Szrj static void
core2i7_dfa_post_advance_cycle(void)81*38fd1498Szrj core2i7_dfa_post_advance_cycle (void)
82*38fd1498Szrj {
83*38fd1498Szrj   ix86_first_cycle_multipass_data_t data = ix86_first_cycle_multipass_data;
84*38fd1498Szrj 
85*38fd1498Szrj   gcc_assert (data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
86*38fd1498Szrj 
87*38fd1498Szrj   data->ifetch_block_len = 0;
88*38fd1498Szrj   data->ifetch_block_n_insns = 0;
89*38fd1498Szrj }
90*38fd1498Szrj 
91*38fd1498Szrj /* Filter out insns from ready_try that the core will not be able to issue
92*38fd1498Szrj    on current cycle due to decoder.  */
93*38fd1498Szrj static void
core2i7_first_cycle_multipass_filter_ready_try(const_ix86_first_cycle_multipass_data_t data,signed char * ready_try,int n_ready,bool first_cycle_insn_p)94*38fd1498Szrj core2i7_first_cycle_multipass_filter_ready_try
95*38fd1498Szrj (const_ix86_first_cycle_multipass_data_t data,
96*38fd1498Szrj  signed char *ready_try, int n_ready, bool first_cycle_insn_p)
97*38fd1498Szrj {
98*38fd1498Szrj   while (n_ready--)
99*38fd1498Szrj     {
100*38fd1498Szrj       rtx_insn *insn;
101*38fd1498Szrj       int insn_size;
102*38fd1498Szrj 
103*38fd1498Szrj       if (ready_try[n_ready])
104*38fd1498Szrj 	continue;
105*38fd1498Szrj 
106*38fd1498Szrj       insn = get_ready_element (n_ready);
107*38fd1498Szrj       insn_size = ix86_min_insn_size (insn);
108*38fd1498Szrj 
109*38fd1498Szrj       if (/* If this is a too long an insn for a secondary decoder ...  */
110*38fd1498Szrj 	  (!first_cycle_insn_p
111*38fd1498Szrj 	   && insn_size > core2i7_secondary_decoder_max_insn_size)
112*38fd1498Szrj 	  /* ... or it would not fit into the ifetch block ...  */
113*38fd1498Szrj 	  || data->ifetch_block_len + insn_size > core2i7_ifetch_block_size
114*38fd1498Szrj 	  /* ... or the decoder is full already ...  */
115*38fd1498Szrj 	  || data->ifetch_block_n_insns + 1 > core2i7_ifetch_block_max_insns)
116*38fd1498Szrj 	/* ... mask the insn out.  */
117*38fd1498Szrj 	{
118*38fd1498Szrj 	  ready_try[n_ready] = 1;
119*38fd1498Szrj 
120*38fd1498Szrj 	  if (data->ready_try_change)
121*38fd1498Szrj 	    bitmap_set_bit (data->ready_try_change, n_ready);
122*38fd1498Szrj 	}
123*38fd1498Szrj     }
124*38fd1498Szrj }
125*38fd1498Szrj 
126*38fd1498Szrj /* Prepare for a new round of multipass lookahead scheduling.  */
127*38fd1498Szrj static void
core2i7_first_cycle_multipass_begin(void * _data,signed char * ready_try,int n_ready,bool first_cycle_insn_p)128*38fd1498Szrj core2i7_first_cycle_multipass_begin (void *_data,
129*38fd1498Szrj 				     signed char *ready_try, int n_ready,
130*38fd1498Szrj 				     bool first_cycle_insn_p)
131*38fd1498Szrj {
132*38fd1498Szrj   ix86_first_cycle_multipass_data_t data
133*38fd1498Szrj     = (ix86_first_cycle_multipass_data_t) _data;
134*38fd1498Szrj   const_ix86_first_cycle_multipass_data_t prev_data
135*38fd1498Szrj     = ix86_first_cycle_multipass_data;
136*38fd1498Szrj 
137*38fd1498Szrj   /* Restore the state from the end of the previous round.  */
138*38fd1498Szrj   data->ifetch_block_len = prev_data->ifetch_block_len;
139*38fd1498Szrj   data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns;
140*38fd1498Szrj 
141*38fd1498Szrj   /* Filter instructions that cannot be issued on current cycle due to
142*38fd1498Szrj      decoder restrictions.  */
143*38fd1498Szrj   core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
144*38fd1498Szrj 						  first_cycle_insn_p);
145*38fd1498Szrj }
146*38fd1498Szrj 
147*38fd1498Szrj /* INSN is being issued in current solution.  Account for its impact on
148*38fd1498Szrj    the decoder model.  */
149*38fd1498Szrj static void
core2i7_first_cycle_multipass_issue(void * _data,signed char * ready_try,int n_ready,rtx_insn * insn,const void * _prev_data)150*38fd1498Szrj core2i7_first_cycle_multipass_issue (void *_data,
151*38fd1498Szrj 				     signed char *ready_try, int n_ready,
152*38fd1498Szrj 				     rtx_insn *insn, const void *_prev_data)
153*38fd1498Szrj {
154*38fd1498Szrj   ix86_first_cycle_multipass_data_t data
155*38fd1498Szrj     = (ix86_first_cycle_multipass_data_t) _data;
156*38fd1498Szrj   const_ix86_first_cycle_multipass_data_t prev_data
157*38fd1498Szrj     = (const_ix86_first_cycle_multipass_data_t) _prev_data;
158*38fd1498Szrj 
159*38fd1498Szrj   int insn_size = ix86_min_insn_size (insn);
160*38fd1498Szrj 
161*38fd1498Szrj   data->ifetch_block_len = prev_data->ifetch_block_len + insn_size;
162*38fd1498Szrj   data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns + 1;
163*38fd1498Szrj   gcc_assert (data->ifetch_block_len <= core2i7_ifetch_block_size
164*38fd1498Szrj 	      && data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
165*38fd1498Szrj 
166*38fd1498Szrj   /* Allocate or resize the bitmap for storing INSN's effect on ready_try.  */
167*38fd1498Szrj   if (!data->ready_try_change)
168*38fd1498Szrj     {
169*38fd1498Szrj       data->ready_try_change = sbitmap_alloc (n_ready);
170*38fd1498Szrj       data->ready_try_change_size = n_ready;
171*38fd1498Szrj     }
172*38fd1498Szrj   else if (data->ready_try_change_size < n_ready)
173*38fd1498Szrj     {
174*38fd1498Szrj       data->ready_try_change = sbitmap_resize (data->ready_try_change,
175*38fd1498Szrj 					       n_ready, 0);
176*38fd1498Szrj       data->ready_try_change_size = n_ready;
177*38fd1498Szrj     }
178*38fd1498Szrj   bitmap_clear (data->ready_try_change);
179*38fd1498Szrj 
180*38fd1498Szrj   /* Filter out insns from ready_try that the core will not be able to issue
181*38fd1498Szrj      on current cycle due to decoder.  */
182*38fd1498Szrj   core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
183*38fd1498Szrj 						  false);
184*38fd1498Szrj }
185*38fd1498Szrj 
186*38fd1498Szrj /* Revert the effect on ready_try.  */
187*38fd1498Szrj static void
core2i7_first_cycle_multipass_backtrack(const void * _data,signed char * ready_try,int n_ready ATTRIBUTE_UNUSED)188*38fd1498Szrj core2i7_first_cycle_multipass_backtrack (const void *_data,
189*38fd1498Szrj 					 signed char *ready_try,
190*38fd1498Szrj 					 int n_ready ATTRIBUTE_UNUSED)
191*38fd1498Szrj {
192*38fd1498Szrj   const_ix86_first_cycle_multipass_data_t data
193*38fd1498Szrj     = (const_ix86_first_cycle_multipass_data_t) _data;
194*38fd1498Szrj   unsigned int i = 0;
195*38fd1498Szrj   sbitmap_iterator sbi;
196*38fd1498Szrj 
197*38fd1498Szrj   gcc_assert (bitmap_last_set_bit (data->ready_try_change) < n_ready);
198*38fd1498Szrj   EXECUTE_IF_SET_IN_BITMAP (data->ready_try_change, 0, i, sbi)
199*38fd1498Szrj     {
200*38fd1498Szrj       ready_try[i] = 0;
201*38fd1498Szrj     }
202*38fd1498Szrj }
203*38fd1498Szrj 
204*38fd1498Szrj /* Save the result of multipass lookahead scheduling for the next round.  */
205*38fd1498Szrj static void
core2i7_first_cycle_multipass_end(const void * _data)206*38fd1498Szrj core2i7_first_cycle_multipass_end (const void *_data)
207*38fd1498Szrj {
208*38fd1498Szrj   const_ix86_first_cycle_multipass_data_t data
209*38fd1498Szrj     = (const_ix86_first_cycle_multipass_data_t) _data;
210*38fd1498Szrj   ix86_first_cycle_multipass_data_t next_data
211*38fd1498Szrj     = ix86_first_cycle_multipass_data;
212*38fd1498Szrj 
213*38fd1498Szrj   if (data != NULL)
214*38fd1498Szrj     {
215*38fd1498Szrj       next_data->ifetch_block_len = data->ifetch_block_len;
216*38fd1498Szrj       next_data->ifetch_block_n_insns = data->ifetch_block_n_insns;
217*38fd1498Szrj     }
218*38fd1498Szrj }
219*38fd1498Szrj 
220*38fd1498Szrj /* Deallocate target data.  */
221*38fd1498Szrj static void
core2i7_first_cycle_multipass_fini(void * _data)222*38fd1498Szrj core2i7_first_cycle_multipass_fini (void *_data)
223*38fd1498Szrj {
224*38fd1498Szrj   ix86_first_cycle_multipass_data_t data
225*38fd1498Szrj     = (ix86_first_cycle_multipass_data_t) _data;
226*38fd1498Szrj 
227*38fd1498Szrj   if (data->ready_try_change)
228*38fd1498Szrj     {
229*38fd1498Szrj       sbitmap_free (data->ready_try_change);
230*38fd1498Szrj       data->ready_try_change = NULL;
231*38fd1498Szrj       data->ready_try_change_size = 0;
232*38fd1498Szrj     }
233*38fd1498Szrj }
234*38fd1498Szrj 
235*38fd1498Szrj void
ix86_core2i7_init_hooks(void)236*38fd1498Szrj ix86_core2i7_init_hooks (void)
237*38fd1498Szrj {
238*38fd1498Szrj   targetm.sched.dfa_post_advance_cycle
239*38fd1498Szrj     = core2i7_dfa_post_advance_cycle;
240*38fd1498Szrj   targetm.sched.first_cycle_multipass_init
241*38fd1498Szrj     = core2i7_first_cycle_multipass_init;
242*38fd1498Szrj   targetm.sched.first_cycle_multipass_begin
243*38fd1498Szrj     = core2i7_first_cycle_multipass_begin;
244*38fd1498Szrj   targetm.sched.first_cycle_multipass_issue
245*38fd1498Szrj     = core2i7_first_cycle_multipass_issue;
246*38fd1498Szrj   targetm.sched.first_cycle_multipass_backtrack
247*38fd1498Szrj     = core2i7_first_cycle_multipass_backtrack;
248*38fd1498Szrj   targetm.sched.first_cycle_multipass_end
249*38fd1498Szrj     = core2i7_first_cycle_multipass_end;
250*38fd1498Szrj   targetm.sched.first_cycle_multipass_fini
251*38fd1498Szrj     = core2i7_first_cycle_multipass_fini;
252*38fd1498Szrj 
253*38fd1498Szrj   /* Set decoder parameters.  */
254*38fd1498Szrj   core2i7_secondary_decoder_max_insn_size = 8;
255*38fd1498Szrj   core2i7_ifetch_block_size = 16;
256*38fd1498Szrj   core2i7_ifetch_block_max_insns = 6;
257*38fd1498Szrj }
258