1*38fd1498Szrj /* Scheduler hooks for IA-32 which implement CPU specific logic.
2*38fd1498Szrj Copyright (C) 1988-2018 Free Software Foundation, Inc.
3*38fd1498Szrj
4*38fd1498Szrj This file is part of GCC.
5*38fd1498Szrj
6*38fd1498Szrj GCC is free software; you can redistribute it and/or modify
7*38fd1498Szrj it under the terms of the GNU General Public License as published by
8*38fd1498Szrj the Free Software Foundation; either version 3, or (at your option)
9*38fd1498Szrj any later version.
10*38fd1498Szrj
11*38fd1498Szrj GCC is distributed in the hope that it will be useful,
12*38fd1498Szrj but WITHOUT ANY WARRANTY; without even the implied warranty of
13*38fd1498Szrj MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14*38fd1498Szrj GNU General Public License for more details.
15*38fd1498Szrj
16*38fd1498Szrj You should have received a copy of the GNU General Public License
17*38fd1498Szrj along with GCC; see the file COPYING3. If not see
18*38fd1498Szrj <http://www.gnu.org/licenses/>. */
19*38fd1498Szrj
20*38fd1498Szrj #define IN_TARGET_CODE 1
21*38fd1498Szrj
22*38fd1498Szrj #include "config.h"
23*38fd1498Szrj #include "system.h"
24*38fd1498Szrj #include "coretypes.h"
25*38fd1498Szrj #include "backend.h"
26*38fd1498Szrj #include "rtl.h"
27*38fd1498Szrj #include "tree.h"
28*38fd1498Szrj #include "cfghooks.h"
29*38fd1498Szrj #include "tm_p.h"
30*38fd1498Szrj #include "insn-config.h"
31*38fd1498Szrj #include "insn-attr.h"
32*38fd1498Szrj #include "recog.h"
33*38fd1498Szrj #include "target.h"
34*38fd1498Szrj
35*38fd1498Szrj /* Return the maximum number of instructions a cpu can issue. */
36*38fd1498Szrj
37*38fd1498Szrj int
ix86_issue_rate(void)38*38fd1498Szrj ix86_issue_rate (void)
39*38fd1498Szrj {
40*38fd1498Szrj switch (ix86_tune)
41*38fd1498Szrj {
42*38fd1498Szrj case PROCESSOR_PENTIUM:
43*38fd1498Szrj case PROCESSOR_LAKEMONT:
44*38fd1498Szrj case PROCESSOR_BONNELL:
45*38fd1498Szrj case PROCESSOR_SILVERMONT:
46*38fd1498Szrj case PROCESSOR_KNL:
47*38fd1498Szrj case PROCESSOR_KNM:
48*38fd1498Szrj case PROCESSOR_INTEL:
49*38fd1498Szrj case PROCESSOR_K6:
50*38fd1498Szrj case PROCESSOR_BTVER2:
51*38fd1498Szrj case PROCESSOR_PENTIUM4:
52*38fd1498Szrj case PROCESSOR_NOCONA:
53*38fd1498Szrj return 2;
54*38fd1498Szrj
55*38fd1498Szrj case PROCESSOR_PENTIUMPRO:
56*38fd1498Szrj case PROCESSOR_ATHLON:
57*38fd1498Szrj case PROCESSOR_K8:
58*38fd1498Szrj case PROCESSOR_AMDFAM10:
59*38fd1498Szrj case PROCESSOR_BTVER1:
60*38fd1498Szrj return 3;
61*38fd1498Szrj
62*38fd1498Szrj case PROCESSOR_BDVER1:
63*38fd1498Szrj case PROCESSOR_BDVER2:
64*38fd1498Szrj case PROCESSOR_BDVER3:
65*38fd1498Szrj case PROCESSOR_BDVER4:
66*38fd1498Szrj case PROCESSOR_ZNVER1:
67*38fd1498Szrj case PROCESSOR_CORE2:
68*38fd1498Szrj case PROCESSOR_NEHALEM:
69*38fd1498Szrj case PROCESSOR_SANDYBRIDGE:
70*38fd1498Szrj case PROCESSOR_HASWELL:
71*38fd1498Szrj case PROCESSOR_GENERIC:
72*38fd1498Szrj return 4;
73*38fd1498Szrj
74*38fd1498Szrj default:
75*38fd1498Szrj return 1;
76*38fd1498Szrj }
77*38fd1498Szrj }
78*38fd1498Szrj
79*38fd1498Szrj /* Return true iff USE_INSN has a memory address with operands set by
80*38fd1498Szrj SET_INSN. */
81*38fd1498Szrj
82*38fd1498Szrj bool
ix86_agi_dependent(rtx_insn * set_insn,rtx_insn * use_insn)83*38fd1498Szrj ix86_agi_dependent (rtx_insn *set_insn, rtx_insn *use_insn)
84*38fd1498Szrj {
85*38fd1498Szrj int i;
86*38fd1498Szrj extract_insn_cached (use_insn);
87*38fd1498Szrj for (i = recog_data.n_operands - 1; i >= 0; --i)
88*38fd1498Szrj if (MEM_P (recog_data.operand[i]))
89*38fd1498Szrj {
90*38fd1498Szrj rtx addr = XEXP (recog_data.operand[i], 0);
91*38fd1498Szrj if (modified_in_p (addr, set_insn) != 0)
92*38fd1498Szrj {
93*38fd1498Szrj /* No AGI stall if SET_INSN is a push or pop and USE_INSN
94*38fd1498Szrj has SP based memory (unless index reg is modified in a pop). */
95*38fd1498Szrj rtx set = single_set (set_insn);
96*38fd1498Szrj if (set
97*38fd1498Szrj && (push_operand (SET_DEST (set), GET_MODE (SET_DEST (set)))
98*38fd1498Szrj || pop_operand (SET_SRC (set), GET_MODE (SET_SRC (set)))))
99*38fd1498Szrj {
100*38fd1498Szrj struct ix86_address parts;
101*38fd1498Szrj if (ix86_decompose_address (addr, &parts)
102*38fd1498Szrj && parts.base == stack_pointer_rtx
103*38fd1498Szrj && (parts.index == NULL_RTX
104*38fd1498Szrj || MEM_P (SET_DEST (set))
105*38fd1498Szrj || !modified_in_p (parts.index, set_insn)))
106*38fd1498Szrj return false;
107*38fd1498Szrj }
108*38fd1498Szrj return true;
109*38fd1498Szrj }
110*38fd1498Szrj return false;
111*38fd1498Szrj }
112*38fd1498Szrj return false;
113*38fd1498Szrj }
114*38fd1498Szrj
115*38fd1498Szrj /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
116*38fd1498Szrj by DEP_INSN and nothing set by DEP_INSN. */
117*38fd1498Szrj
118*38fd1498Szrj static bool
ix86_flags_dependent(rtx_insn * insn,rtx_insn * dep_insn,enum attr_type insn_type)119*38fd1498Szrj ix86_flags_dependent (rtx_insn *insn, rtx_insn *dep_insn, enum attr_type insn_type)
120*38fd1498Szrj {
121*38fd1498Szrj rtx set, set2;
122*38fd1498Szrj
123*38fd1498Szrj /* Simplify the test for uninteresting insns. */
124*38fd1498Szrj if (insn_type != TYPE_SETCC
125*38fd1498Szrj && insn_type != TYPE_ICMOV
126*38fd1498Szrj && insn_type != TYPE_FCMOV
127*38fd1498Szrj && insn_type != TYPE_IBR)
128*38fd1498Szrj return false;
129*38fd1498Szrj
130*38fd1498Szrj if ((set = single_set (dep_insn)) != 0)
131*38fd1498Szrj {
132*38fd1498Szrj set = SET_DEST (set);
133*38fd1498Szrj set2 = NULL_RTX;
134*38fd1498Szrj }
135*38fd1498Szrj else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
136*38fd1498Szrj && XVECLEN (PATTERN (dep_insn), 0) == 2
137*38fd1498Szrj && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
138*38fd1498Szrj && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
139*38fd1498Szrj {
140*38fd1498Szrj set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
141*38fd1498Szrj set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
142*38fd1498Szrj }
143*38fd1498Szrj else
144*38fd1498Szrj return false;
145*38fd1498Szrj
146*38fd1498Szrj if (!REG_P (set) || REGNO (set) != FLAGS_REG)
147*38fd1498Szrj return false;
148*38fd1498Szrj
149*38fd1498Szrj /* This test is true if the dependent insn reads the flags but
150*38fd1498Szrj not any other potentially set register. */
151*38fd1498Szrj if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
152*38fd1498Szrj return false;
153*38fd1498Szrj
154*38fd1498Szrj if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
155*38fd1498Szrj return false;
156*38fd1498Szrj
157*38fd1498Szrj return true;
158*38fd1498Szrj }
159*38fd1498Szrj
160*38fd1498Szrj /* Helper function for exact_store_load_dependency.
161*38fd1498Szrj Return true if addr is found in insn. */
162*38fd1498Szrj static bool
exact_dependency_1(rtx addr,rtx insn)163*38fd1498Szrj exact_dependency_1 (rtx addr, rtx insn)
164*38fd1498Szrj {
165*38fd1498Szrj enum rtx_code code;
166*38fd1498Szrj const char *format_ptr;
167*38fd1498Szrj int i, j;
168*38fd1498Szrj
169*38fd1498Szrj code = GET_CODE (insn);
170*38fd1498Szrj switch (code)
171*38fd1498Szrj {
172*38fd1498Szrj case MEM:
173*38fd1498Szrj if (rtx_equal_p (addr, insn))
174*38fd1498Szrj return true;
175*38fd1498Szrj break;
176*38fd1498Szrj case REG:
177*38fd1498Szrj CASE_CONST_ANY:
178*38fd1498Szrj case SYMBOL_REF:
179*38fd1498Szrj case CODE_LABEL:
180*38fd1498Szrj case PC:
181*38fd1498Szrj case CC0:
182*38fd1498Szrj case EXPR_LIST:
183*38fd1498Szrj return false;
184*38fd1498Szrj default:
185*38fd1498Szrj break;
186*38fd1498Szrj }
187*38fd1498Szrj
188*38fd1498Szrj format_ptr = GET_RTX_FORMAT (code);
189*38fd1498Szrj for (i = 0; i < GET_RTX_LENGTH (code); i++)
190*38fd1498Szrj {
191*38fd1498Szrj switch (*format_ptr++)
192*38fd1498Szrj {
193*38fd1498Szrj case 'e':
194*38fd1498Szrj if (exact_dependency_1 (addr, XEXP (insn, i)))
195*38fd1498Szrj return true;
196*38fd1498Szrj break;
197*38fd1498Szrj case 'E':
198*38fd1498Szrj for (j = 0; j < XVECLEN (insn, i); j++)
199*38fd1498Szrj if (exact_dependency_1 (addr, XVECEXP (insn, i, j)))
200*38fd1498Szrj return true;
201*38fd1498Szrj break;
202*38fd1498Szrj }
203*38fd1498Szrj }
204*38fd1498Szrj return false;
205*38fd1498Szrj }
206*38fd1498Szrj
207*38fd1498Szrj /* Return true if there exists exact dependency for store & load, i.e.
208*38fd1498Szrj the same memory address is used in them. */
209*38fd1498Szrj static bool
exact_store_load_dependency(rtx_insn * store,rtx_insn * load)210*38fd1498Szrj exact_store_load_dependency (rtx_insn *store, rtx_insn *load)
211*38fd1498Szrj {
212*38fd1498Szrj rtx set1, set2;
213*38fd1498Szrj
214*38fd1498Szrj set1 = single_set (store);
215*38fd1498Szrj if (!set1)
216*38fd1498Szrj return false;
217*38fd1498Szrj if (!MEM_P (SET_DEST (set1)))
218*38fd1498Szrj return false;
219*38fd1498Szrj set2 = single_set (load);
220*38fd1498Szrj if (!set2)
221*38fd1498Szrj return false;
222*38fd1498Szrj if (exact_dependency_1 (SET_DEST (set1), SET_SRC (set2)))
223*38fd1498Szrj return true;
224*38fd1498Szrj return false;
225*38fd1498Szrj }
226*38fd1498Szrj
227*38fd1498Szrj
228*38fd1498Szrj /* This function corrects the value of COST (latency) based on the relationship
229*38fd1498Szrj between INSN and DEP_INSN through a dependence of type DEP_TYPE, and strength
230*38fd1498Szrj DW. It should return the new value.
231*38fd1498Szrj
232*38fd1498Szrj On x86 CPUs this is most commonly used to model the fact that valus of
233*38fd1498Szrj registers used to compute address of memory operand needs to be ready
234*38fd1498Szrj earlier than values of registers used in the actual operation. */
235*38fd1498Szrj
236*38fd1498Szrj int
ix86_adjust_cost(rtx_insn * insn,int dep_type,rtx_insn * dep_insn,int cost,unsigned int)237*38fd1498Szrj ix86_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
238*38fd1498Szrj unsigned int)
239*38fd1498Szrj {
240*38fd1498Szrj enum attr_type insn_type, dep_insn_type;
241*38fd1498Szrj enum attr_memory memory;
242*38fd1498Szrj rtx set, set2;
243*38fd1498Szrj int dep_insn_code_number;
244*38fd1498Szrj
245*38fd1498Szrj /* Anti and output dependencies have zero cost on all CPUs. */
246*38fd1498Szrj if (dep_type != 0)
247*38fd1498Szrj return 0;
248*38fd1498Szrj
249*38fd1498Szrj dep_insn_code_number = recog_memoized (dep_insn);
250*38fd1498Szrj
251*38fd1498Szrj /* If we can't recognize the insns, we can't really do anything. */
252*38fd1498Szrj if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
253*38fd1498Szrj return cost;
254*38fd1498Szrj
255*38fd1498Szrj insn_type = get_attr_type (insn);
256*38fd1498Szrj dep_insn_type = get_attr_type (dep_insn);
257*38fd1498Szrj
258*38fd1498Szrj switch (ix86_tune)
259*38fd1498Szrj {
260*38fd1498Szrj case PROCESSOR_PENTIUM:
261*38fd1498Szrj case PROCESSOR_LAKEMONT:
262*38fd1498Szrj /* Address Generation Interlock adds a cycle of latency. */
263*38fd1498Szrj if (insn_type == TYPE_LEA)
264*38fd1498Szrj {
265*38fd1498Szrj rtx addr = PATTERN (insn);
266*38fd1498Szrj
267*38fd1498Szrj if (GET_CODE (addr) == PARALLEL)
268*38fd1498Szrj addr = XVECEXP (addr, 0, 0);
269*38fd1498Szrj
270*38fd1498Szrj gcc_assert (GET_CODE (addr) == SET);
271*38fd1498Szrj
272*38fd1498Szrj addr = SET_SRC (addr);
273*38fd1498Szrj if (modified_in_p (addr, dep_insn))
274*38fd1498Szrj cost += 1;
275*38fd1498Szrj }
276*38fd1498Szrj else if (ix86_agi_dependent (dep_insn, insn))
277*38fd1498Szrj cost += 1;
278*38fd1498Szrj
279*38fd1498Szrj /* ??? Compares pair with jump/setcc. */
280*38fd1498Szrj if (ix86_flags_dependent (insn, dep_insn, insn_type))
281*38fd1498Szrj cost = 0;
282*38fd1498Szrj
283*38fd1498Szrj /* Floating point stores require value to be ready one cycle earlier. */
284*38fd1498Szrj if (insn_type == TYPE_FMOV
285*38fd1498Szrj && get_attr_memory (insn) == MEMORY_STORE
286*38fd1498Szrj && !ix86_agi_dependent (dep_insn, insn))
287*38fd1498Szrj cost += 1;
288*38fd1498Szrj break;
289*38fd1498Szrj
290*38fd1498Szrj case PROCESSOR_PENTIUMPRO:
291*38fd1498Szrj /* INT->FP conversion is expensive. */
292*38fd1498Szrj if (get_attr_fp_int_src (dep_insn))
293*38fd1498Szrj cost += 5;
294*38fd1498Szrj
295*38fd1498Szrj /* There is one cycle extra latency between an FP op and a store. */
296*38fd1498Szrj if (insn_type == TYPE_FMOV
297*38fd1498Szrj && (set = single_set (dep_insn)) != NULL_RTX
298*38fd1498Szrj && (set2 = single_set (insn)) != NULL_RTX
299*38fd1498Szrj && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
300*38fd1498Szrj && MEM_P (SET_DEST (set2)))
301*38fd1498Szrj cost += 1;
302*38fd1498Szrj
303*38fd1498Szrj memory = get_attr_memory (insn);
304*38fd1498Szrj
305*38fd1498Szrj /* Show ability of reorder buffer to hide latency of load by executing
306*38fd1498Szrj in parallel with previous instruction in case
307*38fd1498Szrj previous instruction is not needed to compute the address. */
308*38fd1498Szrj if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
309*38fd1498Szrj && !ix86_agi_dependent (dep_insn, insn))
310*38fd1498Szrj {
311*38fd1498Szrj /* Claim moves to take one cycle, as core can issue one load
312*38fd1498Szrj at time and the next load can start cycle later. */
313*38fd1498Szrj if (dep_insn_type == TYPE_IMOV
314*38fd1498Szrj || dep_insn_type == TYPE_FMOV)
315*38fd1498Szrj cost = 1;
316*38fd1498Szrj else if (cost > 1)
317*38fd1498Szrj cost--;
318*38fd1498Szrj }
319*38fd1498Szrj break;
320*38fd1498Szrj
321*38fd1498Szrj case PROCESSOR_K6:
322*38fd1498Szrj /* The esp dependency is resolved before
323*38fd1498Szrj the instruction is really finished. */
324*38fd1498Szrj if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
325*38fd1498Szrj && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
326*38fd1498Szrj return 1;
327*38fd1498Szrj
328*38fd1498Szrj /* INT->FP conversion is expensive. */
329*38fd1498Szrj if (get_attr_fp_int_src (dep_insn))
330*38fd1498Szrj cost += 5;
331*38fd1498Szrj
332*38fd1498Szrj memory = get_attr_memory (insn);
333*38fd1498Szrj
334*38fd1498Szrj /* Show ability of reorder buffer to hide latency of load by executing
335*38fd1498Szrj in parallel with previous instruction in case
336*38fd1498Szrj previous instruction is not needed to compute the address. */
337*38fd1498Szrj if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
338*38fd1498Szrj && !ix86_agi_dependent (dep_insn, insn))
339*38fd1498Szrj {
340*38fd1498Szrj /* Claim moves to take one cycle, as core can issue one load
341*38fd1498Szrj at time and the next load can start cycle later. */
342*38fd1498Szrj if (dep_insn_type == TYPE_IMOV
343*38fd1498Szrj || dep_insn_type == TYPE_FMOV)
344*38fd1498Szrj cost = 1;
345*38fd1498Szrj else if (cost > 2)
346*38fd1498Szrj cost -= 2;
347*38fd1498Szrj else
348*38fd1498Szrj cost = 1;
349*38fd1498Szrj }
350*38fd1498Szrj break;
351*38fd1498Szrj
352*38fd1498Szrj case PROCESSOR_AMDFAM10:
353*38fd1498Szrj case PROCESSOR_BDVER1:
354*38fd1498Szrj case PROCESSOR_BDVER2:
355*38fd1498Szrj case PROCESSOR_BDVER3:
356*38fd1498Szrj case PROCESSOR_BDVER4:
357*38fd1498Szrj case PROCESSOR_BTVER1:
358*38fd1498Szrj case PROCESSOR_BTVER2:
359*38fd1498Szrj /* Stack engine allows to execute push&pop instructions in parall. */
360*38fd1498Szrj if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
361*38fd1498Szrj && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
362*38fd1498Szrj return 0;
363*38fd1498Szrj /* FALLTHRU */
364*38fd1498Szrj
365*38fd1498Szrj case PROCESSOR_ATHLON:
366*38fd1498Szrj case PROCESSOR_K8:
367*38fd1498Szrj memory = get_attr_memory (insn);
368*38fd1498Szrj
369*38fd1498Szrj /* Show ability of reorder buffer to hide latency of load by executing
370*38fd1498Szrj in parallel with previous instruction in case
371*38fd1498Szrj previous instruction is not needed to compute the address. */
372*38fd1498Szrj if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
373*38fd1498Szrj && !ix86_agi_dependent (dep_insn, insn))
374*38fd1498Szrj {
375*38fd1498Szrj enum attr_unit unit = get_attr_unit (insn);
376*38fd1498Szrj int loadcost = 3;
377*38fd1498Szrj
378*38fd1498Szrj /* Because of the difference between the length of integer and
379*38fd1498Szrj floating unit pipeline preparation stages, the memory operands
380*38fd1498Szrj for floating point are cheaper.
381*38fd1498Szrj
382*38fd1498Szrj ??? For Athlon it the difference is most probably 2. */
383*38fd1498Szrj if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
384*38fd1498Szrj loadcost = 3;
385*38fd1498Szrj else
386*38fd1498Szrj loadcost = TARGET_ATHLON ? 2 : 0;
387*38fd1498Szrj
388*38fd1498Szrj if (cost >= loadcost)
389*38fd1498Szrj cost -= loadcost;
390*38fd1498Szrj else
391*38fd1498Szrj cost = 0;
392*38fd1498Szrj }
393*38fd1498Szrj break;
394*38fd1498Szrj
395*38fd1498Szrj case PROCESSOR_ZNVER1:
396*38fd1498Szrj /* Stack engine allows to execute push&pop instructions in parall. */
397*38fd1498Szrj if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
398*38fd1498Szrj && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
399*38fd1498Szrj return 0;
400*38fd1498Szrj
401*38fd1498Szrj memory = get_attr_memory (insn);
402*38fd1498Szrj
403*38fd1498Szrj /* Show ability of reorder buffer to hide latency of load by executing
404*38fd1498Szrj in parallel with previous instruction in case
405*38fd1498Szrj previous instruction is not needed to compute the address. */
406*38fd1498Szrj if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
407*38fd1498Szrj && !ix86_agi_dependent (dep_insn, insn))
408*38fd1498Szrj {
409*38fd1498Szrj enum attr_unit unit = get_attr_unit (insn);
410*38fd1498Szrj int loadcost;
411*38fd1498Szrj
412*38fd1498Szrj if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
413*38fd1498Szrj loadcost = 4;
414*38fd1498Szrj else
415*38fd1498Szrj loadcost = 7;
416*38fd1498Szrj
417*38fd1498Szrj if (cost >= loadcost)
418*38fd1498Szrj cost -= loadcost;
419*38fd1498Szrj else
420*38fd1498Szrj cost = 0;
421*38fd1498Szrj }
422*38fd1498Szrj break;
423*38fd1498Szrj
424*38fd1498Szrj case PROCESSOR_CORE2:
425*38fd1498Szrj case PROCESSOR_NEHALEM:
426*38fd1498Szrj case PROCESSOR_SANDYBRIDGE:
427*38fd1498Szrj case PROCESSOR_HASWELL:
428*38fd1498Szrj case PROCESSOR_GENERIC:
429*38fd1498Szrj /* Stack engine allows to execute push&pop instructions in parall. */
430*38fd1498Szrj if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
431*38fd1498Szrj && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
432*38fd1498Szrj return 0;
433*38fd1498Szrj
434*38fd1498Szrj memory = get_attr_memory (insn);
435*38fd1498Szrj
436*38fd1498Szrj /* Show ability of reorder buffer to hide latency of load by executing
437*38fd1498Szrj in parallel with previous instruction in case
438*38fd1498Szrj previous instruction is not needed to compute the address. */
439*38fd1498Szrj if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
440*38fd1498Szrj && !ix86_agi_dependent (dep_insn, insn))
441*38fd1498Szrj {
442*38fd1498Szrj if (cost >= 4)
443*38fd1498Szrj cost -= 4;
444*38fd1498Szrj else
445*38fd1498Szrj cost = 0;
446*38fd1498Szrj }
447*38fd1498Szrj break;
448*38fd1498Szrj
449*38fd1498Szrj case PROCESSOR_SILVERMONT:
450*38fd1498Szrj case PROCESSOR_KNL:
451*38fd1498Szrj case PROCESSOR_KNM:
452*38fd1498Szrj case PROCESSOR_INTEL:
453*38fd1498Szrj if (!reload_completed)
454*38fd1498Szrj return cost;
455*38fd1498Szrj
456*38fd1498Szrj /* Increase cost of integer loads. */
457*38fd1498Szrj memory = get_attr_memory (dep_insn);
458*38fd1498Szrj if (memory == MEMORY_LOAD || memory == MEMORY_BOTH)
459*38fd1498Szrj {
460*38fd1498Szrj enum attr_unit unit = get_attr_unit (dep_insn);
461*38fd1498Szrj if (unit == UNIT_INTEGER && cost == 1)
462*38fd1498Szrj {
463*38fd1498Szrj if (memory == MEMORY_LOAD)
464*38fd1498Szrj cost = 3;
465*38fd1498Szrj else
466*38fd1498Szrj {
467*38fd1498Szrj /* Increase cost of ld/st for short int types only
468*38fd1498Szrj because of store forwarding issue. */
469*38fd1498Szrj rtx set = single_set (dep_insn);
470*38fd1498Szrj if (set && (GET_MODE (SET_DEST (set)) == QImode
471*38fd1498Szrj || GET_MODE (SET_DEST (set)) == HImode))
472*38fd1498Szrj {
473*38fd1498Szrj /* Increase cost of store/load insn if exact
474*38fd1498Szrj dependence exists and it is load insn. */
475*38fd1498Szrj enum attr_memory insn_memory = get_attr_memory (insn);
476*38fd1498Szrj if (insn_memory == MEMORY_LOAD
477*38fd1498Szrj && exact_store_load_dependency (dep_insn, insn))
478*38fd1498Szrj cost = 3;
479*38fd1498Szrj }
480*38fd1498Szrj }
481*38fd1498Szrj }
482*38fd1498Szrj }
483*38fd1498Szrj
484*38fd1498Szrj default:
485*38fd1498Szrj break;
486*38fd1498Szrj }
487*38fd1498Szrj
488*38fd1498Szrj return cost;
489*38fd1498Szrj }
490*38fd1498Szrj
491*38fd1498Szrj /* How many alternative schedules to try. This should be as wide as the
492*38fd1498Szrj scheduling freedom in the DFA, but no wider. Making this value too
493*38fd1498Szrj large results extra work for the scheduler. */
494*38fd1498Szrj
495*38fd1498Szrj int
ia32_multipass_dfa_lookahead(void)496*38fd1498Szrj ia32_multipass_dfa_lookahead (void)
497*38fd1498Szrj {
498*38fd1498Szrj /* Generally, we want haifa-sched:max_issue() to look ahead as far
499*38fd1498Szrj as many instructions can be executed on a cycle, i.e.,
500*38fd1498Szrj issue_rate. */
501*38fd1498Szrj if (reload_completed)
502*38fd1498Szrj return ix86_issue_rate ();
503*38fd1498Szrj /* Don't use lookahead for pre-reload schedule to save compile time. */
504*38fd1498Szrj return 0;
505*38fd1498Szrj }
506*38fd1498Szrj
507*38fd1498Szrj /* Return true if target platform supports macro-fusion. */
508*38fd1498Szrj
509*38fd1498Szrj bool
ix86_macro_fusion_p()510*38fd1498Szrj ix86_macro_fusion_p ()
511*38fd1498Szrj {
512*38fd1498Szrj return TARGET_FUSE_CMP_AND_BRANCH;
513*38fd1498Szrj }
514*38fd1498Szrj
515*38fd1498Szrj /* Check whether current microarchitecture support macro fusion
516*38fd1498Szrj for insn pair "CONDGEN + CONDJMP". Refer to
517*38fd1498Szrj "Intel Architectures Optimization Reference Manual". */
518*38fd1498Szrj
519*38fd1498Szrj bool
ix86_macro_fusion_pair_p(rtx_insn * condgen,rtx_insn * condjmp)520*38fd1498Szrj ix86_macro_fusion_pair_p (rtx_insn *condgen, rtx_insn *condjmp)
521*38fd1498Szrj {
522*38fd1498Szrj rtx src, dest;
523*38fd1498Szrj enum rtx_code ccode;
524*38fd1498Szrj rtx compare_set = NULL_RTX, test_if, cond;
525*38fd1498Szrj rtx alu_set = NULL_RTX, addr = NULL_RTX;
526*38fd1498Szrj
527*38fd1498Szrj if (!any_condjump_p (condjmp))
528*38fd1498Szrj return false;
529*38fd1498Szrj
530*38fd1498Szrj unsigned int condreg1, condreg2;
531*38fd1498Szrj rtx cc_reg_1;
532*38fd1498Szrj targetm.fixed_condition_code_regs (&condreg1, &condreg2);
533*38fd1498Szrj cc_reg_1 = gen_rtx_REG (CCmode, condreg1);
534*38fd1498Szrj if (!reg_referenced_p (cc_reg_1, PATTERN (condjmp))
535*38fd1498Szrj || !condgen
536*38fd1498Szrj || !modified_in_p (cc_reg_1, condgen))
537*38fd1498Szrj return false;
538*38fd1498Szrj
539*38fd1498Szrj if (get_attr_type (condgen) != TYPE_TEST
540*38fd1498Szrj && get_attr_type (condgen) != TYPE_ICMP
541*38fd1498Szrj && get_attr_type (condgen) != TYPE_INCDEC
542*38fd1498Szrj && get_attr_type (condgen) != TYPE_ALU)
543*38fd1498Szrj return false;
544*38fd1498Szrj
545*38fd1498Szrj compare_set = single_set (condgen);
546*38fd1498Szrj if (compare_set == NULL_RTX
547*38fd1498Szrj && !TARGET_FUSE_ALU_AND_BRANCH)
548*38fd1498Szrj return false;
549*38fd1498Szrj
550*38fd1498Szrj if (compare_set == NULL_RTX)
551*38fd1498Szrj {
552*38fd1498Szrj int i;
553*38fd1498Szrj rtx pat = PATTERN (condgen);
554*38fd1498Szrj for (i = 0; i < XVECLEN (pat, 0); i++)
555*38fd1498Szrj if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
556*38fd1498Szrj {
557*38fd1498Szrj rtx set_src = SET_SRC (XVECEXP (pat, 0, i));
558*38fd1498Szrj if (GET_CODE (set_src) == COMPARE)
559*38fd1498Szrj compare_set = XVECEXP (pat, 0, i);
560*38fd1498Szrj else
561*38fd1498Szrj alu_set = XVECEXP (pat, 0, i);
562*38fd1498Szrj }
563*38fd1498Szrj }
564*38fd1498Szrj if (compare_set == NULL_RTX)
565*38fd1498Szrj return false;
566*38fd1498Szrj src = SET_SRC (compare_set);
567*38fd1498Szrj if (GET_CODE (src) != COMPARE)
568*38fd1498Szrj return false;
569*38fd1498Szrj
570*38fd1498Szrj /* Macro-fusion for cmp/test MEM-IMM + conditional jmp is not
571*38fd1498Szrj supported. */
572*38fd1498Szrj if ((MEM_P (XEXP (src, 0))
573*38fd1498Szrj && CONST_INT_P (XEXP (src, 1)))
574*38fd1498Szrj || (MEM_P (XEXP (src, 1))
575*38fd1498Szrj && CONST_INT_P (XEXP (src, 0))))
576*38fd1498Szrj return false;
577*38fd1498Szrj
578*38fd1498Szrj /* No fusion for RIP-relative address. */
579*38fd1498Szrj if (MEM_P (XEXP (src, 0)))
580*38fd1498Szrj addr = XEXP (XEXP (src, 0), 0);
581*38fd1498Szrj else if (MEM_P (XEXP (src, 1)))
582*38fd1498Szrj addr = XEXP (XEXP (src, 1), 0);
583*38fd1498Szrj
584*38fd1498Szrj if (addr) {
585*38fd1498Szrj ix86_address parts;
586*38fd1498Szrj int ok = ix86_decompose_address (addr, &parts);
587*38fd1498Szrj gcc_assert (ok);
588*38fd1498Szrj
589*38fd1498Szrj if (ix86_rip_relative_addr_p (&parts))
590*38fd1498Szrj return false;
591*38fd1498Szrj }
592*38fd1498Szrj
593*38fd1498Szrj test_if = SET_SRC (pc_set (condjmp));
594*38fd1498Szrj cond = XEXP (test_if, 0);
595*38fd1498Szrj ccode = GET_CODE (cond);
596*38fd1498Szrj /* Check whether conditional jump use Sign or Overflow Flags. */
597*38fd1498Szrj if (!TARGET_FUSE_CMP_AND_BRANCH_SOFLAGS
598*38fd1498Szrj && (ccode == GE
599*38fd1498Szrj || ccode == GT
600*38fd1498Szrj || ccode == LE
601*38fd1498Szrj || ccode == LT))
602*38fd1498Szrj return false;
603*38fd1498Szrj
604*38fd1498Szrj /* Return true for TYPE_TEST and TYPE_ICMP. */
605*38fd1498Szrj if (get_attr_type (condgen) == TYPE_TEST
606*38fd1498Szrj || get_attr_type (condgen) == TYPE_ICMP)
607*38fd1498Szrj return true;
608*38fd1498Szrj
609*38fd1498Szrj /* The following is the case that macro-fusion for alu + jmp. */
610*38fd1498Szrj if (!TARGET_FUSE_ALU_AND_BRANCH || !alu_set)
611*38fd1498Szrj return false;
612*38fd1498Szrj
613*38fd1498Szrj /* No fusion for alu op with memory destination operand. */
614*38fd1498Szrj dest = SET_DEST (alu_set);
615*38fd1498Szrj if (MEM_P (dest))
616*38fd1498Szrj return false;
617*38fd1498Szrj
618*38fd1498Szrj /* Macro-fusion for inc/dec + unsigned conditional jump is not
619*38fd1498Szrj supported. */
620*38fd1498Szrj if (get_attr_type (condgen) == TYPE_INCDEC
621*38fd1498Szrj && (ccode == GEU
622*38fd1498Szrj || ccode == GTU
623*38fd1498Szrj || ccode == LEU
624*38fd1498Szrj || ccode == LTU))
625*38fd1498Szrj return false;
626*38fd1498Szrj
627*38fd1498Szrj return true;
628*38fd1498Szrj }
629*38fd1498Szrj
630