xref: /llvm-project/llvm/lib/Target/X86/X86SchedSandyBridge.td (revision 37aebcf4e60e5c913e3d99675548b3e2c631398b)
1//=- X86SchedSandyBridge.td - X86 Sandy Bridge Scheduling ----*- tablegen -*-=//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the machine model for Sandy Bridge to support instruction
10// scheduling and other instruction cost heuristics.
11//
12// Note that we define some instructions here that are not supported by SNB,
13// but we still have to define them because SNB is the default subtarget for
14// X86. These instructions are tagged with a comment `Unsupported = 1`.
15//
16//===----------------------------------------------------------------------===//
17
18def SandyBridgeModel : SchedMachineModel {
19  // All x86 instructions are modeled as a single micro-op, and SB can decode 4
20  // instructions per cycle.
21  // FIXME: Identify instructions that aren't a single fused micro-op.
22  let IssueWidth = 4;
23  let MicroOpBufferSize = 168; // Based on the reorder buffer.
24  let LoadLatency = 5;
25  let MispredictPenalty = 16;
26
27  // Based on the LSD (loop-stream detector) queue size.
28  let LoopMicroOpBufferSize = 28;
29
30  // This flag is set to allow the scheduler to assign
31  // a default model to unrecognized opcodes.
32  let CompleteModel = 0;
33}
34
35let SchedModel = SandyBridgeModel in {
36
37// Sandy Bridge can issue micro-ops to 6 different ports in one cycle.
38
39// Ports 0, 1, and 5 handle all computation.
40def SBPort0 : ProcResource<1>;
41def SBPort1 : ProcResource<1>;
42def SBPort5 : ProcResource<1>;
43
44// Ports 2 and 3 are identical. They handle loads and the address half of
45// stores.
46def SBPort23 : ProcResource<2>;
47
48// Port 4 gets the data half of stores. Store data can be available later than
49// the store address, but since we don't model the latency of stores, we can
50// ignore that.
51def SBPort4 : ProcResource<1>;
52
53// Many micro-ops are capable of issuing on multiple ports.
54def SBPort01  : ProcResGroup<[SBPort0, SBPort1]>;
55def SBPort05  : ProcResGroup<[SBPort0, SBPort5]>;
56def SBPort15  : ProcResGroup<[SBPort1, SBPort5]>;
57def SBPort015 : ProcResGroup<[SBPort0, SBPort1, SBPort5]>;
58
59// 54 Entry Unified Scheduler
60def SBPortAny : ProcResGroup<[SBPort0, SBPort1, SBPort23, SBPort4, SBPort5]> {
61  let BufferSize=54;
62}
63
64// Integer division issued on port 0.
65def SBDivider : ProcResource<1>;
66// FP division and sqrt on port 0.
67def SBFPDivider : ProcResource<1>;
68
69// Integer loads are 5 cycles, so ReadAfterLd registers needn't be available until 5
70// cycles after the memory operand.
71def : ReadAdvance<ReadAfterLd, 5>;
72
73// Vector loads are 5/6/7 cycles, so ReadAfterVec*Ld registers needn't be available
74// until 5/6/7 cycles after the memory operand.
75def : ReadAdvance<ReadAfterVecLd, 5>;
76def : ReadAdvance<ReadAfterVecXLd, 6>;
77def : ReadAdvance<ReadAfterVecYLd, 7>;
78
79def : ReadAdvance<ReadInt2Fpu, 0>;
80
81// Many SchedWrites are defined in pairs with and without a folded load.
82// Instructions with folded loads are usually micro-fused, so they only appear
83// as two micro-ops when queued in the reservation station.
84// This multiclass defines the resource usage for variants with and without
85// folded loads.
86multiclass SBWriteResPair<X86FoldableSchedWrite SchedRW,
87                          list<ProcResourceKind> ExePorts,
88                          int Lat, list<int> Res = [1], int UOps = 1,
89                          int LoadLat = 5, int LoadUOps = 1> {
90  // Register variant is using a single cycle on ExePort.
91  def : WriteRes<SchedRW, ExePorts> {
92    let Latency = Lat;
93    let ReleaseAtCycles = Res;
94    let NumMicroOps = UOps;
95  }
96
97  // Memory variant also uses a cycle on port 2/3 and adds LoadLat cycles to
98  // the latency (default = 5).
99  def : WriteRes<SchedRW.Folded, !listconcat([SBPort23], ExePorts)> {
100    let Latency = !add(Lat, LoadLat);
101    let ReleaseAtCycles = !listconcat([1], Res);
102    let NumMicroOps = !add(UOps, LoadUOps);
103  }
104}
105
106// A folded store needs a cycle on port 4 for the store data, and an extra port
107// 2/3 cycle to recompute the address.
108def : WriteRes<WriteRMW, [SBPort23,SBPort4]>;
109
110def : WriteRes<WriteStore,   [SBPort23, SBPort4]>;
111def : WriteRes<WriteStoreNT, [SBPort23, SBPort4]>;
112def : WriteRes<WriteLoad,    [SBPort23]> { let Latency = 5; }
113def : WriteRes<WriteMove,    [SBPort015]>;
114
115// Treat misc copies as a move.
116def : InstRW<[WriteMove], (instrs COPY)>;
117
118// Idioms that clear a register, like xorps %xmm0, %xmm0.
119// These can often bypass execution ports completely.
120def : WriteRes<WriteZero,    []>;
121
122// Model the effect of clobbering the read-write mask operand of the GATHER operation.
123// Does not cost anything by itself, only has latency, matching that of the WriteLoad,
124defm : X86WriteRes<WriteVecMaskedGatherWriteback, [], 5, [], 0>;
125
126// Arithmetic.
127defm : SBWriteResPair<WriteALU,    [SBPort015], 1>;
128defm : SBWriteResPair<WriteADC,    [SBPort05,SBPort015], 2, [1,1], 2>;
129
130defm : SBWriteResPair<WriteIMul8,     [SBPort1],   3>;
131defm : SBWriteResPair<WriteIMul16,    [SBPort1,SBPort05,SBPort015], 4, [1,1,2], 4>;
132defm : X86WriteRes<WriteIMul16Imm,    [SBPort1,SBPort015], 4, [1,1], 2>;
133defm : X86WriteRes<WriteIMul16ImmLd,  [SBPort1,SBPort015,SBPort23], 8, [1,1,1], 3>;
134defm : SBWriteResPair<WriteIMul16Reg, [SBPort1],   3>;
135defm : SBWriteResPair<WriteIMul32,    [SBPort1,SBPort05,SBPort015], 4, [1,1,1], 3>;
136defm : SBWriteResPair<WriteMULX32,    [SBPort1,SBPort05,SBPort015], 3, [1,1,1], 3>;
137defm : SBWriteResPair<WriteIMul32Imm, [SBPort1],   3>;
138defm : SBWriteResPair<WriteIMul32Reg, [SBPort1],   3>;
139defm : SBWriteResPair<WriteIMul64,    [SBPort1,SBPort0], 4, [1,1], 2>;
140defm : SBWriteResPair<WriteMULX64,    [SBPort1,SBPort0], 3, [1,1], 2>;
141defm : SBWriteResPair<WriteIMul64Imm, [SBPort1],   3>;
142defm : SBWriteResPair<WriteIMul64Reg, [SBPort1],   3>;
143def SBWriteIMulH : WriteRes<WriteIMulH, []> { let Latency = 4; }
144def  : WriteRes<WriteIMulHLd, []> {
145  let Latency = !add(SBWriteIMulH.Latency, SandyBridgeModel.LoadLatency);
146}
147
148defm : X86WriteRes<WriteXCHG,      [SBPort015], 2, [3], 3>;
149defm : X86WriteRes<WriteBSWAP32,   [SBPort1], 1, [1], 1>;
150defm : X86WriteRes<WriteBSWAP64,   [SBPort1, SBPort05], 2, [1,1], 2>;
151defm : X86WriteRes<WriteCMPXCHG,   [SBPort05, SBPort015], 5, [1,3], 4>;
152defm : X86WriteRes<WriteCMPXCHGRMW,[SBPort015, SBPort5, SBPort23, SBPort4], 8, [1, 2, 2, 1], 6>;
153
154defm : SBWriteResPair<WriteDiv8,   [SBPort0, SBDivider], 25, [1, 10]>;
155defm : SBWriteResPair<WriteDiv16,  [SBPort0, SBDivider], 25, [1, 10]>;
156defm : SBWriteResPair<WriteDiv32,  [SBPort0, SBDivider], 25, [1, 10]>;
157defm : SBWriteResPair<WriteDiv64,  [SBPort0, SBDivider], 25, [1, 10]>;
158defm : SBWriteResPair<WriteIDiv8,  [SBPort0, SBDivider], 25, [1, 10]>;
159defm : SBWriteResPair<WriteIDiv16, [SBPort0, SBDivider], 25, [1, 10]>;
160defm : SBWriteResPair<WriteIDiv32, [SBPort0, SBDivider], 25, [1, 10]>;
161defm : SBWriteResPair<WriteIDiv64, [SBPort0, SBDivider], 25, [1, 10]>;
162
163// SHLD/SHRD.
164defm : X86WriteRes<WriteSHDrri, [SBPort05, SBPort015], 2, [1, 1], 2>;
165defm : X86WriteRes<WriteSHDrrcl,[SBPort05, SBPort015], 4, [3, 1], 4>;
166defm : X86WriteRes<WriteSHDmri, [SBPort4,SBPort23,SBPort05,SBPort015], 8, [1, 2, 1, 1], 5>;
167defm : X86WriteRes<WriteSHDmrcl,[SBPort4,SBPort23,SBPort05,SBPort015], 10, [1, 2, 3, 1], 7>;
168
169defm : SBWriteResPair<WriteShift,    [SBPort05],  1>;
170defm : SBWriteResPair<WriteShiftCL,  [SBPort05],  3, [3], 3>;
171defm : SBWriteResPair<WriteRotate,   [SBPort05],  2, [2], 2>;
172defm : SBWriteResPair<WriteRotateCL, [SBPort05],  3, [3], 3>;
173
174defm : SBWriteResPair<WriteJump,  [SBPort5],   1>;
175defm : SBWriteResPair<WriteCRC32, [SBPort1],   3, [1], 1, 5>;
176
177defm : SBWriteResPair<WriteCMOV,  [SBPort05,SBPort015], 2, [1,1], 2>; // Conditional move.
178defm : X86WriteRes<WriteFCMOV, [SBPort5,SBPort05], 3, [2,1], 3>; // x87 conditional move.
179def  : WriteRes<WriteSETCC, [SBPort05]>; // Setcc.
180def  : WriteRes<WriteSETCCStore, [SBPort05,SBPort4,SBPort23]> {
181  let Latency = 2;
182  let NumMicroOps = 3;
183}
184
185defm : X86WriteRes<WriteLAHFSAHF,        [SBPort05], 1, [1], 1>;
186defm : X86WriteRes<WriteBitTest,         [SBPort05], 1, [1], 1>;
187defm : X86WriteRes<WriteBitTestImmLd,    [SBPort05,SBPort23], 6, [1,1], 2>;
188//defm : X86WriteRes<WriteBitTestRegLd,    [SBPort05,SBPort23], 6, [1,1], 2>;
189defm : X86WriteRes<WriteBitTestSet,      [SBPort05], 1, [1], 1>;
190defm : X86WriteRes<WriteBitTestSetImmLd, [SBPort05,SBPort23], 6, [1,1], 3>;
191defm : X86WriteRes<WriteBitTestSetRegLd, [SBPort05,SBPort23,SBPort5,SBPort015], 8, [1,1,1,1], 5>;
192
193// This is for simple LEAs with one or two input operands.
194// The complex ones can only execute on port 1, and they require two cycles on
195// the port to read all inputs. We don't model that.
196def : WriteRes<WriteLEA, [SBPort01]>;
197
198// Bit counts.
199defm : SBWriteResPair<WriteBSF, [SBPort1], 3, [1], 1, 5>;
200defm : SBWriteResPair<WriteBSR, [SBPort1], 3, [1], 1, 5>;
201defm : SBWriteResPair<WriteLZCNT,          [SBPort1], 3, [1], 1, 5>;
202defm : SBWriteResPair<WriteTZCNT,          [SBPort1], 3, [1], 1, 5>;
203defm : SBWriteResPair<WritePOPCNT,         [SBPort1], 3, [1], 1, 6>;
204
205// BMI1 BEXTR/BLS, BMI2 BZHI
206// NOTE: These don't exist on Sandy Bridge. Ports are guesses.
207defm : SBWriteResPair<WriteBEXTR, [SBPort05,SBPort1], 2, [1,1], 2>;
208defm : SBWriteResPair<WriteBLS,   [SBPort015], 1>;
209defm : SBWriteResPair<WriteBZHI,  [SBPort1], 1>;
210
211// Scalar and vector floating point.
212defm : X86WriteRes<WriteFLD0,          [SBPort5], 1, [1], 1>;
213defm : X86WriteRes<WriteFLD1,          [SBPort0,SBPort5], 1, [1,1], 2>;
214defm : X86WriteRes<WriteFLDC,          [SBPort0,SBPort1], 1, [1,1], 2>;
215defm : X86WriteRes<WriteFLoad,         [SBPort23], 5, [1], 1>;
216defm : X86WriteRes<WriteFLoadX,        [SBPort23], 6, [1], 1>;
217defm : X86WriteRes<WriteFLoadY,        [SBPort23], 7, [1], 1>;
218defm : X86WriteRes<WriteFMaskedLoad,   [SBPort23,SBPort05], 8, [1,2], 3>;
219defm : X86WriteRes<WriteFMaskedLoadY,  [SBPort23,SBPort05], 9, [1,2], 3>;
220defm : X86WriteRes<WriteFStore,        [SBPort23,SBPort4], 1, [1,1], 1>;
221defm : X86WriteRes<WriteFStoreX,       [SBPort23,SBPort4], 1, [1,1], 1>;
222defm : X86WriteRes<WriteFStoreY,       [SBPort23,SBPort4], 1, [1,1], 1>;
223defm : X86WriteRes<WriteFStoreNT,      [SBPort23,SBPort4], 1, [1,1], 1>;
224defm : X86WriteRes<WriteFStoreNTX,     [SBPort23,SBPort4], 1, [1,1], 1>;
225defm : X86WriteRes<WriteFStoreNTY,     [SBPort23,SBPort4], 1, [1,1], 1>;
226
227defm : X86WriteRes<WriteFMaskedStore32,  [SBPort4,SBPort01,SBPort23], 5, [1,2,1], 4>;
228defm : X86WriteRes<WriteFMaskedStore32Y, [SBPort4,SBPort01,SBPort23], 5, [1,2,1], 4>;
229defm : X86WriteRes<WriteFMaskedStore64,  [SBPort4,SBPort01,SBPort23], 5, [1,2,1], 4>;
230defm : X86WriteRes<WriteFMaskedStore64Y, [SBPort4,SBPort01,SBPort23], 5, [1,2,1], 4>;
231
232defm : X86WriteRes<WriteFMove,         [SBPort5], 1, [1], 1>;
233defm : X86WriteRes<WriteFMoveX,        [SBPort5], 1, [1], 1>;
234defm : X86WriteRes<WriteFMoveY,        [SBPort5], 1, [1], 1>;
235defm : X86WriteRes<WriteFMoveZ,        [SBPort5], 1, [1], 1>;
236defm : X86WriteRes<WriteEMMS,          [SBPort015], 31, [31], 31>;
237
238defm : SBWriteResPair<WriteFAdd,    [SBPort1],  3, [1], 1, 6>;
239defm : SBWriteResPair<WriteFAddX,   [SBPort1],  3, [1], 1, 6>;
240defm : SBWriteResPair<WriteFAddY,   [SBPort1],  3, [1], 1, 7>;
241defm : SBWriteResPair<WriteFAddZ,   [SBPort1],  3, [1], 1, 7>; // Unsupported = 1
242defm : SBWriteResPair<WriteFAdd64,  [SBPort1],  3, [1], 1, 6>;
243defm : SBWriteResPair<WriteFAdd64X, [SBPort1],  3, [1], 1, 6>;
244defm : SBWriteResPair<WriteFAdd64Y, [SBPort1],  3, [1], 1, 7>;
245defm : SBWriteResPair<WriteFAdd64Z, [SBPort1],  3, [1], 1, 7>; // Unsupported = 1
246
247defm : SBWriteResPair<WriteFCmp,    [SBPort1],  3, [1], 1, 6>;
248defm : SBWriteResPair<WriteFCmpX,   [SBPort1],  3, [1], 1, 6>;
249defm : SBWriteResPair<WriteFCmpY,   [SBPort1],  3, [1], 1, 7>;
250defm : SBWriteResPair<WriteFCmpZ,   [SBPort1],  3, [1], 1, 7>; // Unsupported = 1
251defm : SBWriteResPair<WriteFCmp64,  [SBPort1],  3, [1], 1, 6>;
252defm : SBWriteResPair<WriteFCmp64X, [SBPort1],  3, [1], 1, 6>;
253defm : SBWriteResPair<WriteFCmp64Y, [SBPort1],  3, [1], 1, 7>;
254defm : SBWriteResPair<WriteFCmp64Z, [SBPort1],  3, [1], 1, 7>; // Unsupported = 1
255
256defm : SBWriteResPair<WriteFCom,    [SBPort1],  3>;
257defm : SBWriteResPair<WriteFComX,   [SBPort1],  3>;
258
259defm : SBWriteResPair<WriteFMul,    [SBPort0],  5, [1], 1, 6>;
260defm : SBWriteResPair<WriteFMulX,   [SBPort0],  5, [1], 1, 6>;
261defm : SBWriteResPair<WriteFMulY,   [SBPort0],  5, [1], 1, 7>;
262defm : SBWriteResPair<WriteFMulZ,   [SBPort0],  5, [1], 1, 7>; // Unsupported = 1
263defm : SBWriteResPair<WriteFMul64,  [SBPort0],  5, [1], 1, 6>;
264defm : SBWriteResPair<WriteFMul64X, [SBPort0],  5, [1], 1, 6>;
265defm : SBWriteResPair<WriteFMul64Y, [SBPort0],  5, [1], 1, 7>;
266defm : SBWriteResPair<WriteFMul64Z, [SBPort0],  5, [1], 1, 7>; // Unsupported = 1
267
268defm : SBWriteResPair<WriteFDiv,    [SBPort0,SBFPDivider], 14, [1,14], 1, 6>;
269defm : SBWriteResPair<WriteFDivX,   [SBPort0,SBFPDivider], 14, [1,14], 1, 6>;
270defm : SBWriteResPair<WriteFDivY,   [SBPort0,SBPort05,SBFPDivider], 29, [2,1,28], 3, 7>;
271defm : SBWriteResPair<WriteFDivZ,   [SBPort0,SBPort05,SBFPDivider], 29, [2,1,28], 3, 7>; // Unsupported = 1
272defm : SBWriteResPair<WriteFDiv64,  [SBPort0,SBFPDivider], 22, [1,22], 1, 6>;
273defm : SBWriteResPair<WriteFDiv64X, [SBPort0,SBFPDivider], 22, [1,22], 1, 6>;
274defm : SBWriteResPair<WriteFDiv64Y, [SBPort0,SBPort05,SBFPDivider], 45, [2,1,44], 3, 7>;
275defm : SBWriteResPair<WriteFDiv64Z, [SBPort0,SBPort05,SBFPDivider], 45, [2,1,44], 3, 7>; // Unsupported = 1
276
277defm : SBWriteResPair<WriteFRcp,   [SBPort0],  5, [1], 1, 6>;
278defm : SBWriteResPair<WriteFRcpX,  [SBPort0],  5, [1], 1, 6>;
279defm : SBWriteResPair<WriteFRcpY,  [SBPort0,SBPort05],  7, [2,1], 3, 7>;
280defm : SBWriteResPair<WriteFRcpZ,  [SBPort0,SBPort05],  7, [2,1], 3, 7>; // Unsupported = 1
281
282defm : SBWriteResPair<WriteFRsqrt, [SBPort0],  5, [1], 1, 6>;
283defm : SBWriteResPair<WriteFRsqrtX,[SBPort0],  5, [1], 1, 6>;
284defm : SBWriteResPair<WriteFRsqrtY,[SBPort0,SBPort05],  7, [2,1], 3, 7>;
285defm : SBWriteResPair<WriteFRsqrtZ,[SBPort0,SBPort05],  7, [2,1], 3, 7>; // Unsupported = 1
286
287defm : SBWriteResPair<WriteFSqrt,    [SBPort0,SBFPDivider], 14, [1,14], 1, 6>;
288defm : SBWriteResPair<WriteFSqrtX,   [SBPort0,SBFPDivider], 14, [1,14], 1, 6>;
289defm : SBWriteResPair<WriteFSqrtY,   [SBPort0,SBPort05,SBFPDivider], 29, [2,1,28], 3, 7>;
290defm : SBWriteResPair<WriteFSqrtZ,   [SBPort0,SBPort05,SBFPDivider], 29, [2,1,28], 3, 7>; // Unsupported = 1
291defm : SBWriteResPair<WriteFSqrt64,  [SBPort0,SBFPDivider], 21, [1,21], 1, 6>;
292defm : SBWriteResPair<WriteFSqrt64X, [SBPort0,SBFPDivider], 21, [1,21], 1, 6>;
293defm : SBWriteResPair<WriteFSqrt64Y, [SBPort0,SBPort05,SBFPDivider], 45, [2,1,44], 3, 7>;
294defm : SBWriteResPair<WriteFSqrt64Z, [SBPort0,SBPort05,SBFPDivider], 45, [2,1,44], 3, 7>; // Unsupported = 1
295defm : SBWriteResPair<WriteFSqrt80,  [SBPort0,SBFPDivider], 24, [1,24], 1, 6>;
296
297defm : SBWriteResPair<WriteDPPD,     [SBPort0,SBPort1,SBPort5],  9, [1,1,1], 3, 6>;
298defm : X86WriteRes<WriteDPPS,        [SBPort0,SBPort1,SBPort5], 12, [1,2,1], 4>;
299defm : X86WriteRes<WriteDPPSY,       [SBPort0,SBPort1,SBPort5], 12, [1,2,1], 4>;
300defm : X86WriteRes<WriteDPPSLd,      [SBPort0,SBPort1,SBPort5,SBPort23], 18, [1,2,2,1], 6>;
301defm : X86WriteRes<WriteDPPSYLd,     [SBPort0,SBPort1,SBPort5,SBPort23], 19, [1,2,2,1], 6>;
302defm : SBWriteResPair<WriteFSign,    [SBPort5], 1>;
303defm : SBWriteResPair<WriteFRnd,     [SBPort1], 3, [1], 1, 6>;
304defm : SBWriteResPair<WriteFRndY,    [SBPort1], 3, [1], 1, 7>;
305defm : SBWriteResPair<WriteFRndZ,    [SBPort1], 3, [1], 1, 7>; // Unsupported = 1
306defm : SBWriteResPair<WriteFLogic,   [SBPort5], 1, [1], 1, 6>;
307defm : SBWriteResPair<WriteFLogicY,  [SBPort5], 1, [1], 1, 7>;
308defm : SBWriteResPair<WriteFLogicZ,  [SBPort5], 1, [1], 1, 7>; // Unsupported = 1
309defm : SBWriteResPair<WriteFTest,    [SBPort0], 1, [1], 1, 6>;
310defm : SBWriteResPair<WriteFTestY,   [SBPort0], 1, [1], 1, 7>;
311defm : SBWriteResPair<WriteFTestZ,   [SBPort0], 1, [1], 1, 7>; // Unsupported = 1
312defm : SBWriteResPair<WriteFShuffle, [SBPort5], 1, [1], 1, 6>;
313defm : SBWriteResPair<WriteFShuffleY,[SBPort5], 1, [1], 1, 7>;
314defm : SBWriteResPair<WriteFShuffleZ,[SBPort5], 1, [1], 1, 7>; // Unsupported = 1
315defm : SBWriteResPair<WriteFVarShuffle, [SBPort5], 1, [1], 1, 6>;
316defm : SBWriteResPair<WriteFVarShuffleY,[SBPort5], 1, [1], 1, 7>;
317defm : SBWriteResPair<WriteFVarShuffleZ,[SBPort5], 1, [1], 1, 7>; // Unsupported = 1
318defm : SBWriteResPair<WriteFBlend,    [SBPort05], 1, [1], 1, 6>;
319defm : SBWriteResPair<WriteFBlendY,   [SBPort05], 1, [1], 1, 7>;
320defm : SBWriteResPair<WriteFBlendZ,   [SBPort05], 1, [1], 1, 7>; // Unsupported = 1
321defm : SBWriteResPair<WriteFVarBlend, [SBPort05], 2, [2], 2, 6>;
322defm : SBWriteResPair<WriteFVarBlendY,[SBPort05], 2, [2], 2, 7>;
323defm : SBWriteResPair<WriteFVarBlendZ,[SBPort05], 2, [2], 2, 7>; // Unsupported = 1
324
325// Conversion between integer and float.
326defm : SBWriteResPair<WriteCvtSS2I,   [SBPort0,SBPort1], 5, [1,1], 2>;
327defm : SBWriteResPair<WriteCvtPS2I,           [SBPort1], 3, [1], 1, 6>;
328defm : SBWriteResPair<WriteCvtPS2IY,          [SBPort1], 3, [1], 1, 7>;
329defm : SBWriteResPair<WriteCvtPS2IZ,          [SBPort1], 3, [1], 1, 7>; // Unsupported = 1
330defm : SBWriteResPair<WriteCvtSD2I,   [SBPort0,SBPort1], 5, [1,1], 2>;
331defm : SBWriteResPair<WriteCvtPD2I,   [SBPort1,SBPort5], 4, [1,1], 2, 6>;
332defm : X86WriteRes<WriteCvtPD2IY,     [SBPort1,SBPort5], 4, [1,1], 2>;
333defm : X86WriteRes<WriteCvtPD2IZ,     [SBPort1,SBPort5], 4, [1,1], 2>; // Unsupported = 1
334defm : X86WriteRes<WriteCvtPD2IYLd,   [SBPort1,SBPort5,SBPort23], 11, [1,1,1], 3>;
335defm : X86WriteRes<WriteCvtPD2IZLd,   [SBPort1,SBPort5,SBPort23], 11, [1,1,1], 3>; // Unsupported = 1
336
337defm : X86WriteRes<WriteCvtI2SS,      [SBPort1,SBPort5],  5, [1,2], 3>;
338defm : X86WriteRes<WriteCvtI2SSLd,    [SBPort1,SBPort5,SBPort23], 10, [1,1,1], 3>;
339defm : SBWriteResPair<WriteCvtI2PS,           [SBPort1],  3, [1], 1, 6>;
340defm : SBWriteResPair<WriteCvtI2PSY,          [SBPort1],  3, [1], 1, 7>;
341defm : SBWriteResPair<WriteCvtI2PSZ,          [SBPort1],  3, [1], 1, 7>; // Unsupported = 1
342defm : X86WriteRes<WriteCvtI2SD,      [SBPort1,SBPort5],  4, [1,1], 2>;
343defm : X86WriteRes<WriteCvtI2PD,      [SBPort1,SBPort5],  4, [1,1], 2>;
344defm : X86WriteRes<WriteCvtI2PDY,     [SBPort1,SBPort5],  4, [1,1], 2>;
345defm : X86WriteRes<WriteCvtI2PDZ,     [SBPort1,SBPort5],  4, [1,1], 2>; // Unsupported = 1
346defm : X86WriteRes<WriteCvtI2SDLd,   [SBPort1,SBPort23],  9, [1,1], 2>;
347defm : X86WriteRes<WriteCvtI2PDLd,   [SBPort1,SBPort5,SBPort23], 10, [1,1,1], 3>;
348defm : X86WriteRes<WriteCvtI2PDYLd,  [SBPort1,SBPort5,SBPort23], 10, [1,1,1], 3>;
349defm : X86WriteRes<WriteCvtI2PDZLd,  [SBPort1,SBPort5,SBPort23], 10, [1,1,1], 3>; // Unsupported = 1
350
351defm : X86WriteRes<WriteCvtSS2SD,     [SBPort0,SBPort5], 1, [1,1], 2>;
352defm : X86WriteRes<WriteCvtPS2PD,     [SBPort0,SBPort5], 2, [1,1], 2>;
353defm : X86WriteRes<WriteCvtPS2PDY,    [SBPort0,SBPort5], 2, [1,1], 2>;
354defm : X86WriteRes<WriteCvtPS2PDZ,    [SBPort0,SBPort5], 2, [1,1], 2>; // Unsupported = 1
355defm : X86WriteRes<WriteCvtSS2SDLd,  [SBPort0,SBPort23], 7, [1,1], 2>;
356defm : X86WriteRes<WriteCvtPS2PDLd,  [SBPort0,SBPort23], 7, [1,1], 2>;
357defm : X86WriteRes<WriteCvtPS2PDYLd, [SBPort0,SBPort5,SBPort23], 7, [1,1,1], 3>;
358defm : X86WriteRes<WriteCvtPS2PDZLd, [SBPort0,SBPort5,SBPort23], 7, [1,1,1], 3>; // Unsupported = 1
359defm : SBWriteResPair<WriteCvtSD2SS,  [SBPort1,SBPort5], 4, [1,1], 2, 6>;
360defm : SBWriteResPair<WriteCvtPD2PS,  [SBPort1,SBPort5], 4, [1,1], 2, 6>;
361defm : SBWriteResPair<WriteCvtPD2PSY, [SBPort1,SBPort5], 4, [1,1], 2, 7>;
362defm : SBWriteResPair<WriteCvtPD2PSZ, [SBPort1,SBPort5], 4, [1,1], 2, 7>; // Unsupported = 1
363
364// F16C Instructions (IvyBridge+)
365defm : X86WriteRes<WriteCvtPH2PS,     [SBPort0,SBPort5], 3, [1,1], 2>;
366defm : X86WriteRes<WriteCvtPH2PSY,    [SBPort0,SBPort5], 3, [1,1], 2>;
367defm : X86WriteRes<WriteCvtPH2PSZ,    [SBPort0,SBPort5], 3, [1,1], 2>; // Unsupported = 1
368defm : X86WriteRes<WriteCvtPH2PSLd,  [SBPort0,SBPort23], 8, [1,1], 2>;
369defm : X86WriteRes<WriteCvtPH2PSYLd, [SBPort0,SBPort5,SBPort23], 8, [1,1,1], 3>;
370defm : X86WriteRes<WriteCvtPH2PSZLd, [SBPort0,SBPort5,SBPort23], 8, [1,1,1], 3>; // Unsupported = 1
371
372defm : X86WriteRes<WriteCvtPS2PH,    [SBPort0,SBPort1,SBPort5], 10, [1,1,1], 3>;
373defm : X86WriteRes<WriteCvtPS2PHY,   [SBPort0,SBPort1,SBPort5], 10, [1,1,1], 3>;
374defm : X86WriteRes<WriteCvtPS2PHZ,   [SBPort0,SBPort1,SBPort5], 10, [1,1,1], 3>; // Unsupported = 1
375defm : X86WriteRes<WriteCvtPS2PHSt,  [SBPort0,SBPort1,SBPort23,SBPort4], 13, [1,1,1,1], 4>;
376defm : X86WriteRes<WriteCvtPS2PHYSt, [SBPort0,SBPort1,SBPort23,SBPort4], 13, [1,1,1,1], 4>;
377defm : X86WriteRes<WriteCvtPS2PHZSt, [SBPort0,SBPort1,SBPort23,SBPort4], 13, [1,1,1,1], 4>; // Unsupported = 1
378
379// Vector integer operations.
380defm : X86WriteRes<WriteVecLoad,         [SBPort23], 5, [1], 1>;
381defm : X86WriteRes<WriteVecLoadX,        [SBPort23], 6, [1], 1>;
382defm : X86WriteRes<WriteVecLoadY,        [SBPort23], 7, [1], 1>;
383defm : X86WriteRes<WriteVecLoadNT,       [SBPort23], 6, [1], 1>;
384defm : X86WriteRes<WriteVecLoadNTY,      [SBPort23], 7, [1], 1>;
385defm : X86WriteRes<WriteVecMaskedLoad,   [SBPort23,SBPort05], 8, [1,2], 3>;
386defm : X86WriteRes<WriteVecMaskedLoadY,  [SBPort23,SBPort05], 9, [1,2], 3>;
387defm : X86WriteRes<WriteVecStore,        [SBPort23,SBPort4], 1, [1,1], 1>;
388defm : X86WriteRes<WriteVecStoreX,       [SBPort23,SBPort4], 1, [1,1], 1>;
389defm : X86WriteRes<WriteVecStoreY,       [SBPort23,SBPort4], 1, [1,1], 1>;
390defm : X86WriteRes<WriteVecStoreNT,      [SBPort23,SBPort4], 1, [1,1], 1>;
391defm : X86WriteRes<WriteVecStoreNTY,     [SBPort23,SBPort4], 1, [1,1], 1>;
392defm : X86WriteRes<WriteVecMaskedStore32,  [SBPort4,SBPort01,SBPort23], 5, [1,2,1], 4>;
393defm : X86WriteRes<WriteVecMaskedStore32Y, [SBPort4,SBPort01,SBPort23], 5, [1,2,1], 4>;
394defm : X86WriteRes<WriteVecMaskedStore64,  [SBPort4,SBPort01,SBPort23], 5, [1,2,1], 4>;
395defm : X86WriteRes<WriteVecMaskedStore64Y, [SBPort4,SBPort01,SBPort23], 5, [1,2,1], 4>;
396defm : X86WriteRes<WriteVecMove,         [SBPort05], 1, [1], 1>;
397defm : X86WriteRes<WriteVecMoveX,        [SBPort015], 1, [1], 1>;
398defm : X86WriteRes<WriteVecMoveY,        [SBPort05], 1, [1], 1>;
399defm : X86WriteRes<WriteVecMoveZ,        [SBPort05], 1, [1], 1>;
400defm : X86WriteRes<WriteVecMoveToGpr,    [SBPort0], 2, [1], 1>;
401defm : X86WriteRes<WriteVecMoveFromGpr,  [SBPort5], 1, [1], 1>;
402
403defm : SBWriteResPair<WriteVecLogic, [SBPort015], 1, [1], 1, 5>;
404defm : SBWriteResPair<WriteVecLogicX,[SBPort015], 1, [1], 1, 6>;
405defm : SBWriteResPair<WriteVecLogicY,[SBPort015], 1, [1], 1, 7>;
406defm : SBWriteResPair<WriteVecLogicZ,[SBPort015], 1, [1], 1, 7>; // Unsupported = 1
407defm : SBWriteResPair<WriteVecTest,  [SBPort0,SBPort5], 2, [1,1], 2, 6>;
408defm : SBWriteResPair<WriteVecTestY, [SBPort0,SBPort5], 2, [1,1], 2, 7>;
409defm : SBWriteResPair<WriteVecTestZ, [SBPort0,SBPort5], 2, [1,1], 2, 7>; // Unsupported = 1
410defm : SBWriteResPair<WriteVecALU,   [SBPort1],  3, [1], 1, 5>;
411defm : SBWriteResPair<WriteVecALUX,  [SBPort15], 1, [1], 1, 6>;
412defm : SBWriteResPair<WriteVecALUY,  [SBPort15], 1, [1], 1, 7>;
413defm : SBWriteResPair<WriteVecALUZ,  [SBPort15], 1, [1], 1, 7>; // Unsupported = 1
414defm : SBWriteResPair<WriteVecIMul,  [SBPort0], 5, [1], 1, 5>;
415defm : SBWriteResPair<WriteVecIMulX, [SBPort0], 5, [1], 1, 6>;
416defm : SBWriteResPair<WriteVecIMulY, [SBPort0], 5, [1], 1, 7>;
417defm : SBWriteResPair<WriteVecIMulZ, [SBPort0], 5, [1], 1, 7>; // Unsupported = 1
418defm : SBWriteResPair<WritePMULLD,   [SBPort0], 5, [1], 1, 6>;
419defm : SBWriteResPair<WritePMULLDY,  [SBPort0], 5, [1], 1, 7>; // TODO this is probably wrong for 256/512-bit for the "generic" model
420defm : SBWriteResPair<WritePMULLDZ,  [SBPort0], 5, [1], 1, 7>;  // Unsupported = 1
421defm : SBWriteResPair<WriteShuffle,  [SBPort5], 1, [1], 1, 5>;
422defm : SBWriteResPair<WriteShuffleX, [SBPort15], 1, [1], 1, 6>;
423defm : SBWriteResPair<WriteShuffleY, [SBPort5], 1, [1], 1, 7>;
424defm : SBWriteResPair<WriteShuffleZ, [SBPort5], 1, [1], 1, 7>; // Unsupported = 1
425defm : SBWriteResPair<WriteVarShuffle,  [SBPort15], 1, [1], 1, 5>;
426defm : SBWriteResPair<WriteVarShuffleX, [SBPort15], 1, [1], 1, 6>;
427defm : SBWriteResPair<WriteVarShuffleY, [SBPort15], 1, [1], 1, 7>;
428defm : SBWriteResPair<WriteVarShuffleZ, [SBPort15], 1, [1], 1, 7>; // Unsupported = 1
429defm : SBWriteResPair<WriteBlend,   [SBPort15], 1, [1], 1, 6>;
430defm : SBWriteResPair<WriteBlendY,  [SBPort15], 1, [1], 1, 7>;
431defm : SBWriteResPair<WriteBlendZ,  [SBPort15], 1, [1], 1, 7>; // Unsupported = 1
432defm : SBWriteResPair<WriteVarBlend, [SBPort15], 2, [2], 2, 6>;
433defm : SBWriteResPair<WriteVarBlendY,[SBPort15], 2, [2], 2, 7>;
434defm : SBWriteResPair<WriteVarBlendZ,[SBPort15], 2, [2], 2, 7>; // Unsupported = 1
435defm : SBWriteResPair<WriteMPSAD,  [SBPort0, SBPort15], 7, [1,2], 3, 6>;
436defm : SBWriteResPair<WriteMPSADY, [SBPort0, SBPort15], 7, [1,2], 3, 7>;
437defm : SBWriteResPair<WriteMPSADZ, [SBPort0, SBPort15], 7, [1,2], 3, 7>; // Unsupported = 1
438defm : SBWriteResPair<WritePSADBW,  [SBPort0], 5, [1], 1, 5>;
439defm : SBWriteResPair<WritePSADBWX, [SBPort0], 5, [1], 1, 6>;
440defm : SBWriteResPair<WritePSADBWY, [SBPort0], 5, [1], 1, 7>;
441defm : SBWriteResPair<WritePSADBWZ, [SBPort0], 5, [1], 1, 7>; // Unsupported = 1
442defm : SBWriteResPair<WritePHMINPOS,  [SBPort0], 5, [1], 1, 6>;
443
444// Vector integer shifts.
445defm : SBWriteResPair<WriteVecShift,     [SBPort5], 1, [1], 1, 5>;
446defm : SBWriteResPair<WriteVecShiftX,    [SBPort0,SBPort15], 2, [1,1], 2, 6>;
447defm : SBWriteResPair<WriteVecShiftY,    [SBPort0,SBPort15], 4, [1,1], 2, 7>;
448defm : SBWriteResPair<WriteVecShiftZ,    [SBPort0,SBPort15], 4, [1,1], 2, 7>; // Unsupported = 1
449defm : SBWriteResPair<WriteVecShiftImm,  [SBPort5], 1, [1], 1, 5>;
450defm : SBWriteResPair<WriteVecShiftImmX, [SBPort0], 1, [1], 1, 6>;
451defm : SBWriteResPair<WriteVecShiftImmY, [SBPort0], 1, [1], 1, 7>;
452defm : SBWriteResPair<WriteVecShiftImmZ, [SBPort0], 1, [1], 1, 7>; // Unsupported = 1
453defm : SBWriteResPair<WriteVarVecShift,  [SBPort0], 1, [1], 1, 6>;
454defm : SBWriteResPair<WriteVarVecShiftY, [SBPort0], 1, [1], 1, 7>;
455defm : SBWriteResPair<WriteVarVecShiftZ, [SBPort0], 1, [1], 1, 7>; // Unsupported = 1
456
457// Vector insert/extract operations.
458def : WriteRes<WriteVecInsert, [SBPort5,SBPort15]> {
459  let Latency = 2;
460  let NumMicroOps = 2;
461}
462def : WriteRes<WriteVecInsertLd, [SBPort23,SBPort15]> {
463  let Latency = 7;
464  let NumMicroOps = 2;
465}
466
467def : WriteRes<WriteVecExtract, [SBPort0,SBPort15]> {
468  let Latency = 3;
469  let NumMicroOps = 2;
470}
471def : WriteRes<WriteVecExtractSt, [SBPort4,SBPort23,SBPort15]> {
472  let Latency = 5;
473  let NumMicroOps = 3;
474}
475
476////////////////////////////////////////////////////////////////////////////////
477// Horizontal add/sub  instructions.
478////////////////////////////////////////////////////////////////////////////////
479
480defm : SBWriteResPair<WriteFHAdd,  [SBPort1,SBPort5], 5, [1,2], 3, 6>;
481defm : SBWriteResPair<WriteFHAddY, [SBPort1,SBPort5], 5, [1,2], 3, 7>;
482defm : SBWriteResPair<WriteFHAddZ, [SBPort1,SBPort5], 5, [1,2], 3, 7>; // Unsupported = 1
483defm : SBWriteResPair<WritePHAdd,  [SBPort15], 3, [3], 3, 5>;
484defm : SBWriteResPair<WritePHAddX, [SBPort15], 3, [3], 3, 6>;
485defm : SBWriteResPair<WritePHAddY, [SBPort15], 3, [3], 3, 7>;
486defm : SBWriteResPair<WritePHAddZ, [SBPort15], 3, [3], 3, 7>; // Unsupported = 1
487
488////////////////////////////////////////////////////////////////////////////////
489// String instructions.
490////////////////////////////////////////////////////////////////////////////////
491
492// Packed Compare Implicit Length Strings, Return Mask
493def : WriteRes<WritePCmpIStrM, [SBPort0]> {
494  let Latency = 11;
495  let NumMicroOps = 3;
496  let ReleaseAtCycles = [3];
497}
498def : WriteRes<WritePCmpIStrMLd, [SBPort0, SBPort23]> {
499  let Latency = 17;
500  let NumMicroOps = 4;
501  let ReleaseAtCycles = [3,1];
502}
503
504// Packed Compare Explicit Length Strings, Return Mask
505def : WriteRes<WritePCmpEStrM, [SBPort015]> {
506  let Latency = 11;
507  let ReleaseAtCycles = [8];
508}
509def : WriteRes<WritePCmpEStrMLd, [SBPort015, SBPort23]> {
510  let Latency = 17;
511  let ReleaseAtCycles = [7, 1];
512}
513
514// Packed Compare Implicit Length Strings, Return Index
515def : WriteRes<WritePCmpIStrI, [SBPort0]> {
516  let Latency = 11;
517  let NumMicroOps = 3;
518  let ReleaseAtCycles = [3];
519}
520def : WriteRes<WritePCmpIStrILd, [SBPort0,SBPort23]> {
521  let Latency = 17;
522  let NumMicroOps = 4;
523  let ReleaseAtCycles = [3,1];
524}
525
526// Packed Compare Explicit Length Strings, Return Index
527def : WriteRes<WritePCmpEStrI, [SBPort015]> {
528  let Latency = 4;
529  let ReleaseAtCycles = [8];
530}
531def : WriteRes<WritePCmpEStrILd, [SBPort015, SBPort23]> {
532  let Latency = 10;
533  let ReleaseAtCycles = [7, 1];
534}
535
536// MOVMSK Instructions.
537def : WriteRes<WriteFMOVMSK,    [SBPort0]> { let Latency = 2; }
538def : WriteRes<WriteVecMOVMSK,  [SBPort0]> { let Latency = 2; }
539def : WriteRes<WriteVecMOVMSKY, [SBPort0]> { let Latency = 2; }
540def : WriteRes<WriteMMXMOVMSK,  [SBPort0]> { let Latency = 1; }
541
542// AES Instructions.
543def : WriteRes<WriteAESDecEnc, [SBPort5,SBPort015]> {
544  let Latency = 7;
545  let NumMicroOps = 2;
546  let ReleaseAtCycles = [1,1];
547}
548def : WriteRes<WriteAESDecEncLd, [SBPort5,SBPort23,SBPort015]> {
549  let Latency = 13;
550  let NumMicroOps = 3;
551  let ReleaseAtCycles = [1,1,1];
552}
553
554def : WriteRes<WriteAESIMC, [SBPort5]> {
555  let Latency = 12;
556  let NumMicroOps = 2;
557  let ReleaseAtCycles = [2];
558}
559def : WriteRes<WriteAESIMCLd, [SBPort5,SBPort23]> {
560  let Latency = 18;
561  let NumMicroOps = 3;
562  let ReleaseAtCycles = [2,1];
563}
564
565def : WriteRes<WriteAESKeyGen, [SBPort015]> {
566  let Latency = 8;
567  let ReleaseAtCycles = [11];
568}
569def : WriteRes<WriteAESKeyGenLd, [SBPort015, SBPort23]> {
570  let Latency = 14;
571  let ReleaseAtCycles = [10, 1];
572}
573
574// Carry-less multiplication instructions.
575def : WriteRes<WriteCLMul, [SBPort015]> {
576  let Latency = 14;
577  let ReleaseAtCycles = [18];
578}
579def : WriteRes<WriteCLMulLd, [SBPort015, SBPort23]> {
580  let Latency = 20;
581  let ReleaseAtCycles = [17, 1];
582}
583
584// Load/store MXCSR.
585// FIXME: This is probably wrong. Only STMXCSR should require Port4.
586def : WriteRes<WriteLDMXCSR, [SBPort0,SBPort4,SBPort5,SBPort23]> { let Latency = 5; let NumMicroOps = 4; let ReleaseAtCycles = [1,1,1,1]; }
587def : WriteRes<WriteSTMXCSR, [SBPort0,SBPort4,SBPort5,SBPort23]> { let Latency = 5; let NumMicroOps = 4; let ReleaseAtCycles = [1,1,1,1]; }
588
589def : WriteRes<WriteSystem,     [SBPort015]> { let Latency = 100; }
590def : WriteRes<WriteMicrocoded, [SBPort015]> { let Latency = 100; }
591def : WriteRes<WriteFence, [SBPort23, SBPort4]> { let NumMicroOps = 2; let ReleaseAtCycles = [1,1]; }
592def : WriteRes<WriteNop, []>;
593
594// AVX2/FMA is not supported on that architecture, but we should define the basic
595// scheduling resources anyway.
596defm : SBWriteResPair<WriteFShuffle256, [SBPort5], 1, [1], 1, 7>;
597defm : SBWriteResPair<WriteFVarShuffle256, [SBPort5], 1, [1], 1, 7>;
598defm : SBWriteResPair<WriteShuffle256, [SBPort5], 1, [1], 1, 7>;
599defm : SBWriteResPair<WriteVPMOV256, [SBPort5], 1, [1], 1, 7>;
600defm : SBWriteResPair<WriteVarShuffle256, [SBPort5], 1, [1], 1, 7>;
601defm : SBWriteResPair<WriteFMA,  [SBPort01],  5>;
602defm : SBWriteResPair<WriteFMAX, [SBPort01],  5>;
603defm : SBWriteResPair<WriteFMAY, [SBPort01],  5>;
604defm : SBWriteResPair<WriteFMAZ, [SBPort01],  5>;  // Unsupported = 1
605
606// Remaining SNB instrs.
607
608def SBWriteResGroup1 : SchedWriteRes<[SBPort1]> {
609  let Latency = 1;
610  let NumMicroOps = 1;
611  let ReleaseAtCycles = [1];
612}
613def: InstRW<[SBWriteResGroup1], (instrs COMP_FST0r,
614                                        COM_FST0r,
615                                        UCOM_FPr,
616                                        UCOM_Fr)>;
617
618def SBWriteResGroup2 : SchedWriteRes<[SBPort5]> {
619  let Latency = 1;
620  let NumMicroOps = 1;
621  let ReleaseAtCycles = [1];
622}
623def: InstRW<[SBWriteResGroup2], (instrs FDECSTP, FINCSTP, FFREE, FFREEP, FNOP,
624                                        LD_Frr, ST_Frr, ST_FPrr)>;
625def: InstRW<[SBWriteResGroup2], (instrs RET64)>;
626
627def SBWriteResGroup4 : SchedWriteRes<[SBPort05]> {
628  let Latency = 1;
629  let NumMicroOps = 1;
630  let ReleaseAtCycles = [1];
631}
632def: InstRW<[SBWriteResGroup4], (instrs CDQ, CQO)>;
633
634def SBWriteResGroup5 : SchedWriteRes<[SBPort15]> {
635  let Latency = 1;
636  let NumMicroOps = 1;
637  let ReleaseAtCycles = [1];
638}
639def: InstRW<[SBWriteResGroup5], (instrs MMX_PABSBrr,
640                                        MMX_PABSDrr,
641                                        MMX_PABSWrr,
642                                        MMX_PADDQrr,
643                                        MMX_PALIGNRrri,
644                                        MMX_PSIGNBrr,
645                                        MMX_PSIGNDrr,
646                                        MMX_PSIGNWrr,
647                                        MMX_PSUBQrr)>;
648
649def SBWriteResGroup11 : SchedWriteRes<[SBPort015]> {
650  let Latency = 2;
651  let NumMicroOps = 2;
652  let ReleaseAtCycles = [2];
653}
654def: InstRW<[SBWriteResGroup11], (instrs SCASB,
655                                         SCASL,
656                                         SCASQ,
657                                         SCASW)>;
658
659def SBWriteResGroup12 : SchedWriteRes<[SBPort0,SBPort1]> {
660  let Latency = 2;
661  let NumMicroOps = 2;
662  let ReleaseAtCycles = [1,1];
663}
664def: InstRW<[SBWriteResGroup12], (instregex "(V?)(U?)COMI(SD|SS)rr")>;
665
666def SBWriteResGroup15 : SchedWriteRes<[SBPort0,SBPort015]> {
667  let Latency = 2;
668  let NumMicroOps = 2;
669  let ReleaseAtCycles = [1,1];
670}
671def: InstRW<[SBWriteResGroup15], (instrs CWD,
672                                         FNSTSW16r)>;
673
674def SBWriteResGroup18 : SchedWriteRes<[SBPort5,SBPort015]> {
675  let Latency = 2;
676  let NumMicroOps = 2;
677  let ReleaseAtCycles = [1,1];
678}
679def: InstRW<[SBWriteResGroup18], (instrs JCXZ, JECXZ, JRCXZ,
680                                         MMX_MOVDQ2Qrr)>;
681
682def SBWriteResGroup21 : SchedWriteRes<[SBPort1]> {
683  let Latency = 3;
684  let NumMicroOps = 1;
685  let ReleaseAtCycles = [1];
686}
687def: InstRW<[SBWriteResGroup21], (instrs PUSHFS64)>;
688
689def SBWriteResGroup22 : SchedWriteRes<[SBPort0,SBPort5]> {
690  let Latency = 3;
691  let NumMicroOps = 2;
692  let ReleaseAtCycles = [1,1];
693}
694def: InstRW<[SBWriteResGroup22], (instregex "(V?)EXTRACTPSrri")>;
695
696def SBWriteResGroup23 : SchedWriteRes<[SBPort05,SBPort015]> {
697  let Latency = 2;
698  let NumMicroOps = 3;
699  let ReleaseAtCycles = [2,1];
700}
701def: InstRW<[SBWriteResGroup23], (instrs RCL8r1, RCL16r1, RCL32r1, RCL64r1,
702                                         RCR8r1, RCR16r1, RCR32r1, RCR64r1)>;
703
704def SBWriteResGroup24 : SchedWriteRes<[SBPort1,SBPort5,SBPort05,SBPort015]> {
705  let Latency = 3;
706  let NumMicroOps = 8;
707  let ReleaseAtCycles = [1,1,4,2];
708}
709def: InstRW<[SBWriteResGroup24], (instrs RCR8ri, RCR16ri, RCR32ri, RCR64ri)>;
710
711def SBWriteResGroup24b : SchedWriteRes<[SBPort1,SBPort5,SBPort05,SBPort015]> {
712  let Latency = 4;
713  let NumMicroOps = 8;
714  let ReleaseAtCycles = [1,1,4,2];
715}
716def: InstRW<[SBWriteResGroup24b], (instrs RCL8ri, RCL16ri, RCL32ri, RCL64ri)>;
717
718def SBWriteResGroup25_1 : SchedWriteRes<[SBPort23,SBPort015]> {
719  let Latency = 7;
720  let NumMicroOps = 3;
721  let ReleaseAtCycles = [1,2];
722}
723def: InstRW<[SBWriteResGroup25_1], (instrs LEAVE, LEAVE64)>;
724
725def SBWriteResGroup26_2 : SchedWriteRes<[SBPort0,SBPort1,SBPort5]> {
726  let Latency = 3;
727  let NumMicroOps = 3;
728  let ReleaseAtCycles = [1,1,1];
729}
730def: InstRW<[SBWriteResGroup26_2], (instrs COM_FIPr, COM_FIr, UCOM_FIPr, UCOM_FIr)>;
731
732def SBWriteResGroup29 : SchedWriteRes<[SBPort1,SBPort015]> {
733  let Latency = 4;
734  let NumMicroOps = 2;
735  let ReleaseAtCycles = [1,1];
736}
737def: InstRW<[SBWriteResGroup29], (instrs MOV64sr)>;
738
739def SBWriteResGroup29_2 : SchedWriteRes<[SBPort5,SBPort015]> {
740  let Latency = 4;
741  let NumMicroOps = 4;
742  let ReleaseAtCycles = [1,3];
743}
744def: InstRW<[SBWriteResGroup29_2], (instrs PAUSE)>;
745
746def SBWriteResGroup30 : SchedWriteRes<[SBPort1,SBPort5,SBPort015]> {
747  let Latency = 3;
748  let NumMicroOps = 8;
749  let ReleaseAtCycles = [1,3,4];
750}
751def: InstRW<[SBWriteResGroup30], (instrs LOOP)>;
752
753def SBWriteResGroup31 : SchedWriteRes<[SBPort1,SBPort5,SBPort015,SBPort05]> {
754  let Latency = 4;
755  let NumMicroOps = 12;
756  let ReleaseAtCycles = [1,3,6,2];
757}
758def: InstRW<[SBWriteResGroup31], (instrs LOOPE, LOOPNE)>;
759
760def SBWriteResGroup76 : SchedWriteRes<[SBPort05]> {
761  let Latency = 5;
762  let NumMicroOps = 8;
763  let ReleaseAtCycles = [8];
764}
765def: InstRW<[SBWriteResGroup76], (instregex "RCL(8|16|32|64)rCL",
766                                            "RCR(8|16|32|64)rCL")>;
767
768def SBWriteResGroup33 : SchedWriteRes<[SBPort4,SBPort23]> {
769  let Latency = 5;
770  let NumMicroOps = 2;
771  let ReleaseAtCycles = [1,1];
772}
773def: InstRW<[SBWriteResGroup33], (instregex "PUSH(16r|32r|64r|64i8)")>;
774
775def SBWriteResGroup35 : SchedWriteRes<[SBPort1,SBPort5]> {
776  let Latency = 5;
777  let NumMicroOps = 3;
778  let ReleaseAtCycles = [1,2];
779}
780def: InstRW<[SBWriteResGroup35], (instrs CLI)>;
781
782def SBWriteResGroup35_2 : SchedWriteRes<[SBPort1,SBPort4,SBPort23]> {
783  let Latency = 5;
784  let NumMicroOps = 3;
785  let ReleaseAtCycles = [1,1,1];
786}
787def: InstRW<[SBWriteResGroup35_2], (instrs PUSHGS64)>;
788def: InstRW<[SBWriteResGroup35_2], (instregex "ISTT_FP(16|32|64)m")>;
789
790def SBWriteResGroup36 : SchedWriteRes<[SBPort4,SBPort5,SBPort23]> {
791  let Latency = 5;
792  let NumMicroOps = 3;
793  let ReleaseAtCycles = [1,1,1];
794}
795def: InstRW<[SBWriteResGroup36], (instrs CALL64pcrel32)>;
796def: InstRW<[SBWriteResGroup36], (instregex "CALL(16|32|64)r",
797                                            "(V?)EXTRACTPSmri")>;
798
799def SBWriteResGroup40 : SchedWriteRes<[SBPort4,SBPort23,SBPort015]> {
800  let Latency = 5;
801  let NumMicroOps = 3;
802  let ReleaseAtCycles = [1,1,1];
803}
804def: InstRW<[SBWriteResGroup40], (instrs STOSB, STOSL, STOSQ, STOSW)>;
805
806def SBWriteResGroup41 : SchedWriteRes<[SBPort5,SBPort015]> {
807  let Latency = 5;
808  let NumMicroOps = 4;
809  let ReleaseAtCycles = [1,3];
810}
811def: InstRW<[SBWriteResGroup41], (instrs FNINIT)>;
812
813def SBWriteResGroup45 : SchedWriteRes<[SBPort0,SBPort4,SBPort23,SBPort15]> {
814  let Latency = 5;
815  let NumMicroOps = 4;
816  let ReleaseAtCycles = [1,1,1,1];
817}
818def: InstRW<[SBWriteResGroup45], (instregex "(V?)PEXTR(D|Q)mr",
819                                            "PUSHF(16|64)")>;
820
821def SBWriteResGroup46 : SchedWriteRes<[SBPort4,SBPort5,SBPort01,SBPort23]> {
822  let Latency = 5;
823  let NumMicroOps = 4;
824  let ReleaseAtCycles = [1,1,1,1];
825}
826def: InstRW<[SBWriteResGroup46], (instregex "CLFLUSH")>;
827
828def SBWriteResGroup47 : SchedWriteRes<[SBPort4,SBPort5,SBPort01,SBPort23]> {
829  let Latency = 5;
830  let NumMicroOps = 5;
831  let ReleaseAtCycles = [1,2,1,1];
832}
833def: InstRW<[SBWriteResGroup47], (instregex "FXRSTOR")>;
834
835def SBWriteResGroup48 : SchedWriteRes<[SBPort23]> {
836  let Latency = 6;
837  let NumMicroOps = 1;
838  let ReleaseAtCycles = [1];
839}
840def: InstRW<[SBWriteResGroup48], (instrs VBROADCASTSSrm)>;
841def: InstRW<[SBWriteResGroup48], (instregex "POP(16|32|64)r",
842                                            "(V?)MOV64toPQIrm",
843                                            "(V?)MOVDDUPrm",
844                                            "(V?)MOVDI2PDIrm",
845                                            "(V?)MOVQI2PQIrm",
846                                            "(V?)MOVSDrm",
847                                            "(V?)MOVSHDUPrm",
848                                            "(V?)MOVSLDUPrm",
849                                            "(V?)MOVSSrm")>;
850
851def SBWriteResGroup49 : SchedWriteRes<[SBPort5,SBPort23]> {
852  let Latency = 6;
853  let NumMicroOps = 2;
854  let ReleaseAtCycles = [1,1];
855}
856def: InstRW<[SBWriteResGroup49], (instrs MOV16sm)>;
857
858def SBWriteResGroup51 : SchedWriteRes<[SBPort23,SBPort15]> {
859  let Latency = 6;
860  let NumMicroOps = 2;
861  let ReleaseAtCycles = [1,1];
862}
863def: InstRW<[SBWriteResGroup51], (instrs MMX_PABSBrm,
864                                         MMX_PABSDrm,
865                                         MMX_PABSWrm,
866                                         MMX_PALIGNRrmi,
867                                         MMX_PSIGNBrm,
868                                         MMX_PSIGNDrm,
869                                         MMX_PSIGNWrm)>;
870
871def SBWriteResGroup52 : SchedWriteRes<[SBPort23,SBPort015]> {
872  let Latency = 6;
873  let NumMicroOps = 2;
874  let ReleaseAtCycles = [1,1];
875}
876def: InstRW<[SBWriteResGroup52], (instrs LODSL, LODSQ)>;
877
878def SBWriteResGroup53 : SchedWriteRes<[SBPort4,SBPort23]> {
879  let Latency = 6;
880  let NumMicroOps = 3;
881  let ReleaseAtCycles = [1,2];
882}
883def: InstRW<[SBWriteResGroup53], (instregex "ST_F(32|64)m",
884                                            "ST_FP(32|64|80)m")>;
885
886def SBWriteResGroup54 : SchedWriteRes<[SBPort23]> {
887  let Latency = 7;
888  let NumMicroOps = 1;
889  let ReleaseAtCycles = [1];
890}
891def: InstRW<[SBWriteResGroup54], (instrs VMOVDDUPYrm,
892                                         VMOVSHDUPYrm,
893                                         VMOVSLDUPYrm)>;
894
895def SBWriteResGroup58 : SchedWriteRes<[SBPort23,SBPort05]> {
896  let Latency = 7;
897  let NumMicroOps = 2;
898  let ReleaseAtCycles = [1,1];
899}
900def: InstRW<[SBWriteResGroup58], (instrs VINSERTF128rmi)>;
901
902def SBWriteResGroup59 : SchedWriteRes<[SBPort23,SBPort15]> {
903  let Latency = 7;
904  let NumMicroOps = 2;
905  let ReleaseAtCycles = [1,1];
906}
907def: InstRW<[SBWriteResGroup59], (instrs MMX_PADDQrm,
908                                         MMX_PSUBQrm)>;
909
910def SBWriteResGroup62 : SchedWriteRes<[SBPort5,SBPort23]> {
911  let Latency = 7;
912  let NumMicroOps = 3;
913  let ReleaseAtCycles = [2,1];
914}
915def: InstRW<[SBWriteResGroup62], (instrs VERRm, VERWm)>;
916
917def SBWriteResGroup63 : SchedWriteRes<[SBPort23,SBPort015]> {
918  let Latency = 7;
919  let NumMicroOps = 3;
920  let ReleaseAtCycles = [1,2];
921}
922def: InstRW<[SBWriteResGroup63], (instrs LODSB, LODSW)>;
923
924def SBWriteResGroup64 : SchedWriteRes<[SBPort5,SBPort01,SBPort23]> {
925  let Latency = 7;
926  let NumMicroOps = 3;
927  let ReleaseAtCycles = [1,1,1];
928}
929def: InstRW<[SBWriteResGroup64], (instrs FARJMP64m)>;
930
931def SBWriteResGroup66 : SchedWriteRes<[SBPort0,SBPort4,SBPort23]> {
932  let Latency = 7;
933  let NumMicroOps = 4;
934  let ReleaseAtCycles = [1,1,2];
935}
936def: InstRW<[SBWriteResGroup66], (instrs FNSTSWm)>;
937
938def SBWriteResGroup67 : SchedWriteRes<[SBPort1,SBPort5,SBPort015]> {
939  let Latency = 7;
940  let NumMicroOps = 4;
941  let ReleaseAtCycles = [1,2,1];
942}
943def: InstRW<[SBWriteResGroup67], (instregex "SLDT(16|32|64)r",
944                                            "STR(16|32|64)r")>;
945
946def SBWriteResGroup68 : SchedWriteRes<[SBPort4,SBPort5,SBPort23]> {
947  let Latency = 7;
948  let NumMicroOps = 4;
949  let ReleaseAtCycles = [1,1,2];
950}
951def: InstRW<[SBWriteResGroup68], (instrs FNSTCW16m)>;
952def: InstRW<[SBWriteResGroup68], (instregex "CALL(16|32|64)m")>;
953
954def SBWriteResGroup69 : SchedWriteRes<[SBPort4,SBPort23,SBPort05]> {
955  let Latency = 7;
956  let NumMicroOps = 4;
957  let ReleaseAtCycles = [1,2,1];
958}
959def: InstRW<[SBWriteResGroup69], (instregex "SAR(8|16|32|64)m(1|i)",
960                                            "SHL(8|16|32|64)m(1|i)",
961                                            "SHR(8|16|32|64)m(1|i)")>;
962
963def SBWriteResGroup77 : SchedWriteRes<[SBPort0,SBPort1,SBPort23]> {
964  let Latency = 8;
965  let NumMicroOps = 3;
966  let ReleaseAtCycles = [1,1,1];
967}
968def: InstRW<[SBWriteResGroup77], (instregex "(V?)(U?)COMI(SD|SS)rm")>;
969
970def SBWriteResGroup81 : SchedWriteRes<[SBPort4, SBPort23, SBPort015]> {
971  let Latency = 6;
972  let NumMicroOps = 3;
973  let ReleaseAtCycles = [1, 2, 1];
974}
975def: InstRW<[SBWriteResGroup81], (instregex "CMPXCHG(8|16)B")>;
976
977def SBWriteResGroup83 : SchedWriteRes<[SBPort23,SBPort015]> {
978  let Latency = 8;
979  let NumMicroOps = 5;
980  let ReleaseAtCycles = [2,3];
981}
982def: InstRW<[SBWriteResGroup83], (instrs CMPSB,
983                                         CMPSL,
984                                         CMPSQ,
985                                         CMPSW)>;
986
987def SBWriteResGroup84 : SchedWriteRes<[SBPort4,SBPort5,SBPort23]> {
988  let Latency = 8;
989  let NumMicroOps = 5;
990  let ReleaseAtCycles = [1,2,2];
991}
992def: InstRW<[SBWriteResGroup84], (instrs FLDCW16m)>;
993
994def SBWriteResGroup85 : SchedWriteRes<[SBPort4,SBPort23,SBPort05]> {
995  let Latency = 8;
996  let NumMicroOps = 5;
997  let ReleaseAtCycles = [1,2,2];
998}
999def: InstRW<[SBWriteResGroup85], (instregex "ROL(8|16|32|64)m(1|i)",
1000                                            "ROR(8|16|32|64)m(1|i)")>;
1001
1002def SBWriteResGroup86 : SchedWriteRes<[SBPort4,SBPort23,SBPort015]> {
1003  let Latency = 8;
1004  let NumMicroOps = 5;
1005  let ReleaseAtCycles = [1,2,2];
1006}
1007def: InstRW<[SBWriteResGroup86], (instrs MOVSB, MOVSL, MOVSQ, MOVSW)>;
1008def: InstRW<[SBWriteResGroup86], (instregex "XADD(8|16|32|64)rm")>;
1009
1010def SBWriteResGroup87 : SchedWriteRes<[SBPort4,SBPort5,SBPort01,SBPort23]> {
1011  let Latency = 8;
1012  let NumMicroOps = 5;
1013  let ReleaseAtCycles = [1,1,1,2];
1014}
1015def: InstRW<[SBWriteResGroup87], (instrs FARCALL64m)>;
1016
1017def SBWriteResGroup95 : SchedWriteRes<[SBPort5,SBPort01,SBPort23]> {
1018  let Latency = 9;
1019  let NumMicroOps = 3;
1020  let ReleaseAtCycles = [1,1,1];
1021}
1022def: InstRW<[SBWriteResGroup95], (instregex "LD_F(32|64|80)m")>;
1023
1024def SBWriteResGroup97 : SchedWriteRes<[SBPort1,SBPort4,SBPort23]> {
1025  let Latency = 9;
1026  let NumMicroOps = 4;
1027  let ReleaseAtCycles = [1,1,2];
1028}
1029def: InstRW<[SBWriteResGroup97], (instregex "IST_F(16|32)m",
1030                                            "IST_FP(16|32|64)m")>;
1031
1032def SBWriteResGroup97_2 : SchedWriteRes<[SBPort4,SBPort23,SBPort05]> {
1033  let Latency = 9;
1034  let NumMicroOps = 6;
1035  let ReleaseAtCycles = [1,2,3];
1036}
1037def: InstRW<[SBWriteResGroup97_2], (instregex "ROL(8|16|32|64)mCL",
1038                                              "ROR(8|16|32|64)mCL",
1039                                              "SAR(8|16|32|64)mCL",
1040                                              "SHL(8|16|32|64)mCL",
1041                                              "SHR(8|16|32|64)mCL")>;
1042
1043def SBWriteResGroup98 : SchedWriteRes<[SBPort4,SBPort23,SBPort015]> {
1044  let Latency = 9;
1045  let NumMicroOps = 4;
1046  let ReleaseAtCycles = [1,2,3];
1047}
1048def: SchedAlias<WriteADCRMW, SBWriteResGroup98>;
1049
1050def SBWriteResGroup99 : SchedWriteRes<[SBPort4,SBPort23,SBPort05,SBPort015]> {
1051  let Latency = 9;
1052  let NumMicroOps = 4;
1053  let ReleaseAtCycles = [1,2,2,1];
1054}
1055def: InstRW<[SBWriteResGroup99, ReadAfterLd], (instrs ADC8mr, ADC16mr, ADC32mr, ADC64mr,
1056                                                      SBB8mr, SBB16mr, SBB32mr, SBB64mr)>;
1057
1058def SBWriteResGroup100 : SchedWriteRes<[SBPort4,SBPort5,SBPort23,SBPort05,SBPort015]> {
1059  let Latency = 9;
1060  let NumMicroOps = 6;
1061  let ReleaseAtCycles = [1,1,2,1,1];
1062}
1063def : SchedAlias<WriteBitTestRegLd, SBWriteResGroup100>; // TODO - this is incorrect - no RMW
1064
1065def SBWriteResGroup101 : SchedWriteRes<[SBPort1,SBPort23]> {
1066  let Latency = 10;
1067  let NumMicroOps = 2;
1068  let ReleaseAtCycles = [1,1];
1069}
1070def: InstRW<[SBWriteResGroup101], (instregex "(ADD|SUB|SUBR)_F(32|64)m",
1071                                             "ILD_F(16|32|64)m")>;
1072
1073def SBWriteResGroup104 : SchedWriteRes<[SBPort0,SBPort23]> {
1074  let Latency = 11;
1075  let NumMicroOps = 2;
1076  let ReleaseAtCycles = [1,1];
1077}
1078def: InstRW<[SBWriteResGroup104], (instregex "(V?)PCMPGTQrm")>;
1079
1080def SBWriteResGroup106 : SchedWriteRes<[SBPort1,SBPort23]> {
1081  let Latency = 11;
1082  let NumMicroOps = 3;
1083  let ReleaseAtCycles = [2,1];
1084}
1085def: InstRW<[SBWriteResGroup106], (instregex "FICOM(P?)(16|32)m")>;
1086
1087def SBWriteResGroup108 : SchedWriteRes<[SBPort05,SBPort23]> {
1088  let Latency = 11;
1089  let NumMicroOps = 11;
1090  let ReleaseAtCycles = [7,4];
1091}
1092def: InstRW<[SBWriteResGroup108], (instregex "RCL(8|16|32|64)m",
1093                                             "RCR(8|16|32|64)m")>;
1094
1095def SBWriteResGroup111 : SchedWriteRes<[SBPort0,SBPort23]> {
1096  let Latency = 12;
1097  let NumMicroOps = 2;
1098  let ReleaseAtCycles = [1,1];
1099}
1100def: InstRW<[SBWriteResGroup111], (instregex "MUL_F(32|64)m")>;
1101
1102def SBWriteResGroup114 : SchedWriteRes<[SBPort1,SBPort23]> {
1103  let Latency = 13;
1104  let NumMicroOps = 3;
1105  let ReleaseAtCycles = [2,1];
1106}
1107def: InstRW<[SBWriteResGroup114], (instregex "(ADD|SUB|SUBR)_FI(16|32)m")>;
1108
1109def SBWriteResGroup119 : SchedWriteRes<[SBPort0,SBPort1,SBPort23]> {
1110  let Latency = 15;
1111  let NumMicroOps = 3;
1112  let ReleaseAtCycles = [1,1,1];
1113}
1114def: InstRW<[SBWriteResGroup119], (instregex "MUL_FI(16|32)m")>;
1115
1116def SBWriteResGroup130 : SchedWriteRes<[SBPort0,SBPort23]> {
1117  let Latency = 31;
1118  let NumMicroOps = 2;
1119  let ReleaseAtCycles = [1,1];
1120}
1121def: InstRW<[SBWriteResGroup130], (instregex "DIV(R?)_F(32|64)m")>;
1122
1123def SBWriteResGroup131 : SchedWriteRes<[SBPort0,SBPort1,SBPort23]> {
1124  let Latency = 34;
1125  let NumMicroOps = 3;
1126  let ReleaseAtCycles = [1,1,1];
1127}
1128def: InstRW<[SBWriteResGroup131], (instregex "DIV(R?)_FI(16|32)m")>;
1129
1130def SBWriteResGroupVzeroall : SchedWriteRes<[SBPort5]> {
1131  let Latency = 9;
1132  let NumMicroOps = 20;
1133  let ReleaseAtCycles = [2];
1134}
1135def: InstRW<[SBWriteResGroupVzeroall], (instrs VZEROALL)>;
1136
1137def SBWriteResGroupVzeroupper : SchedWriteRes<[]> {
1138  let Latency = 1;
1139  let NumMicroOps = 4;
1140  let ReleaseAtCycles = [];
1141}
1142def: InstRW<[SBWriteResGroupVzeroupper], (instrs VZEROUPPER)>;
1143
1144def: InstRW<[WriteZero], (instrs CLC)>;
1145
1146// Instruction variants handled by the renamer. These might not need execution
1147// ports in certain conditions.
1148// See Agner's Fog "The microarchitecture of Intel, AMD and VIA CPUs",
1149// section "Sandy Bridge and Ivy Bridge Pipeline" > "Register allocation and
1150// renaming".
1151// These can be investigated with llvm-exegesis, e.g.
1152// echo 'pxor %mm0, %mm0' | /tmp/llvm-exegesis -mode=uops -snippets-file=-
1153// echo 'vxorpd %xmm0, %xmm0, %xmm1' | /tmp/llvm-exegesis -mode=uops -snippets-file=-
1154
1155def SBWriteZeroLatency : SchedWriteRes<[]> {
1156  let Latency = 0;
1157}
1158
1159def SBWriteZeroIdiom : SchedWriteVariant<[
1160    SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SBWriteZeroLatency]>,
1161    SchedVar<NoSchedPred,                          [WriteALU]>
1162]>;
1163def : InstRW<[SBWriteZeroIdiom], (instrs SUB32rr, SUB64rr,
1164                                         XOR32rr, XOR64rr)>;
1165
1166def SBWriteFZeroIdiom : SchedWriteVariant<[
1167    SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SBWriteZeroLatency]>,
1168    SchedVar<NoSchedPred,                          [WriteFLogic]>
1169]>;
1170def : InstRW<[SBWriteFZeroIdiom], (instrs XORPSrr, VXORPSrr, XORPDrr,
1171                                          VXORPDrr)>;
1172
1173def SBWriteFZeroIdiomY : SchedWriteVariant<[
1174    SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SBWriteZeroLatency]>,
1175    SchedVar<NoSchedPred,                          [WriteFLogicY]>
1176]>;
1177def : InstRW<[SBWriteFZeroIdiomY], (instrs VXORPSYrr, VXORPDYrr)>;
1178
1179def SBWriteVZeroIdiomLogicX : SchedWriteVariant<[
1180    SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SBWriteZeroLatency]>,
1181    SchedVar<NoSchedPred,                          [WriteVecLogicX]>
1182]>;
1183def : InstRW<[SBWriteVZeroIdiomLogicX], (instrs PXORrr, VPXORrr)>;
1184
1185def SBWriteVZeroIdiomALUX : SchedWriteVariant<[
1186    SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SBWriteZeroLatency]>,
1187    SchedVar<NoSchedPred,                          [WriteVecALUX]>
1188]>;
1189def : InstRW<[SBWriteVZeroIdiomALUX], (instrs PSUBBrr, VPSUBBrr,
1190                                              PSUBDrr, VPSUBDrr,
1191                                              PSUBQrr, VPSUBQrr,
1192                                              PSUBWrr, VPSUBWrr,
1193                                              PCMPGTBrr, VPCMPGTBrr,
1194                                              PCMPGTDrr, VPCMPGTDrr,
1195                                              PCMPGTWrr, VPCMPGTWrr)>;
1196
1197def SBWritePCMPGTQ : SchedWriteRes<[SBPort0]> {
1198  let Latency = 5;
1199  let NumMicroOps = 1;
1200  let ReleaseAtCycles = [1];
1201}
1202
1203def SBWriteVZeroIdiomPCMPGTQ : SchedWriteVariant<[
1204    SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SBWriteZeroLatency]>,
1205    SchedVar<NoSchedPred,                          [SBWritePCMPGTQ]>
1206]>;
1207def : InstRW<[SBWriteVZeroIdiomPCMPGTQ], (instrs PCMPGTQrr, VPCMPGTQrr)>;
1208
1209// CMOVs that use both Z and C flag require an extra uop.
1210def SBWriteCMOVA_CMOVBErr : SchedWriteRes<[SBPort05,SBPort015]> {
1211  let Latency = 3;
1212  let ReleaseAtCycles = [2,1];
1213  let NumMicroOps = 3;
1214}
1215
1216def SBWriteCMOVA_CMOVBErm : SchedWriteRes<[SBPort23,SBPort05,SBPort015]> {
1217  let Latency = 8;
1218  let ReleaseAtCycles = [1,2,1];
1219  let NumMicroOps = 4;
1220}
1221
1222def SBCMOVA_CMOVBErr :  SchedWriteVariant<[
1223  SchedVar<MCSchedPredicate<IsCMOVArr_Or_CMOVBErr>, [SBWriteCMOVA_CMOVBErr]>,
1224  SchedVar<NoSchedPred,                             [WriteCMOV]>
1225]>;
1226
1227def SBCMOVA_CMOVBErm :  SchedWriteVariant<[
1228  SchedVar<MCSchedPredicate<IsCMOVArm_Or_CMOVBErm>, [SBWriteCMOVA_CMOVBErm]>,
1229  SchedVar<NoSchedPred,                             [WriteCMOV.Folded]>
1230]>;
1231
1232def : InstRW<[SBCMOVA_CMOVBErr], (instrs CMOV16rr, CMOV32rr, CMOV64rr)>;
1233def : InstRW<[SBCMOVA_CMOVBErm], (instrs CMOV16rm, CMOV32rm, CMOV64rm)>;
1234
1235// SETCCs that use both Z and C flag require an extra uop.
1236def SBWriteSETA_SETBEr : SchedWriteRes<[SBPort05]> {
1237  let Latency = 2;
1238  let ReleaseAtCycles = [2];
1239  let NumMicroOps = 2;
1240}
1241
1242def SBWriteSETA_SETBEm : SchedWriteRes<[SBPort4,SBPort23,SBPort05]> {
1243  let Latency = 3;
1244  let ReleaseAtCycles = [1,1,2];
1245  let NumMicroOps = 4;
1246}
1247
1248def SBSETA_SETBErr :  SchedWriteVariant<[
1249  SchedVar<MCSchedPredicate<IsSETAr_Or_SETBEr>, [SBWriteSETA_SETBEr]>,
1250  SchedVar<NoSchedPred,                         [WriteSETCC]>
1251]>;
1252
1253def SBSETA_SETBErm :  SchedWriteVariant<[
1254  SchedVar<MCSchedPredicate<IsSETAm_Or_SETBEm>, [SBWriteSETA_SETBEm]>,
1255  SchedVar<NoSchedPred,                         [WriteSETCCStore]>
1256]>;
1257
1258def : InstRW<[SBSETA_SETBErr], (instrs SETCCr)>;
1259def : InstRW<[SBSETA_SETBErm], (instrs SETCCm)>;
1260
1261///////////////////////////////////////////////////////////////////////////////
1262// Dependency breaking instructions.
1263///////////////////////////////////////////////////////////////////////////////
1264
1265def : IsZeroIdiomFunction<[
1266  // GPR Zero-idioms.
1267  DepBreakingClass<[ SUB32rr, SUB64rr, XOR32rr, XOR64rr ], ZeroIdiomPredicate>,
1268
1269  // SSE Zero-idioms.
1270  DepBreakingClass<[
1271    // fp variants.
1272    XORPSrr, XORPDrr,
1273
1274    // int variants.
1275    PXORrr,
1276    PSUBBrr, PSUBWrr, PSUBDrr, PSUBQrr,
1277    PCMPGTBrr, PCMPGTDrr, PCMPGTQrr, PCMPGTWrr
1278  ], ZeroIdiomPredicate>,
1279
1280  // AVX Zero-idioms.
1281  DepBreakingClass<[
1282    // xmm fp variants.
1283    VXORPSrr, VXORPDrr,
1284
1285    // xmm int variants.
1286    VPXORrr,
1287    VPSUBBrr, VPSUBWrr, VPSUBDrr, VPSUBQrr,
1288    VPCMPGTBrr, VPCMPGTWrr, VPCMPGTDrr, VPCMPGTQrr,
1289  ], ZeroIdiomPredicate>,
1290]>;
1291
1292} // SchedModel
1293