1//=- X86SchedSandyBridge.td - X86 Sandy Bridge Scheduling ----*- tablegen -*-=// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This file defines the machine model for Sandy Bridge to support instruction 10// scheduling and other instruction cost heuristics. 11// 12// Note that we define some instructions here that are not supported by SNB, 13// but we still have to define them because SNB is the default subtarget for 14// X86. These instructions are tagged with a comment `Unsupported = 1`. 15// 16//===----------------------------------------------------------------------===// 17 18def SandyBridgeModel : SchedMachineModel { 19 // All x86 instructions are modeled as a single micro-op, and SB can decode 4 20 // instructions per cycle. 21 // FIXME: Identify instructions that aren't a single fused micro-op. 22 let IssueWidth = 4; 23 let MicroOpBufferSize = 168; // Based on the reorder buffer. 24 let LoadLatency = 5; 25 let MispredictPenalty = 16; 26 27 // Based on the LSD (loop-stream detector) queue size. 28 let LoopMicroOpBufferSize = 28; 29 30 // This flag is set to allow the scheduler to assign 31 // a default model to unrecognized opcodes. 32 let CompleteModel = 0; 33} 34 35let SchedModel = SandyBridgeModel in { 36 37// Sandy Bridge can issue micro-ops to 6 different ports in one cycle. 38 39// Ports 0, 1, and 5 handle all computation. 40def SBPort0 : ProcResource<1>; 41def SBPort1 : ProcResource<1>; 42def SBPort5 : ProcResource<1>; 43 44// Ports 2 and 3 are identical. They handle loads and the address half of 45// stores. 46def SBPort23 : ProcResource<2>; 47 48// Port 4 gets the data half of stores. Store data can be available later than 49// the store address, but since we don't model the latency of stores, we can 50// ignore that. 51def SBPort4 : ProcResource<1>; 52 53// Many micro-ops are capable of issuing on multiple ports. 54def SBPort01 : ProcResGroup<[SBPort0, SBPort1]>; 55def SBPort05 : ProcResGroup<[SBPort0, SBPort5]>; 56def SBPort15 : ProcResGroup<[SBPort1, SBPort5]>; 57def SBPort015 : ProcResGroup<[SBPort0, SBPort1, SBPort5]>; 58 59// 54 Entry Unified Scheduler 60def SBPortAny : ProcResGroup<[SBPort0, SBPort1, SBPort23, SBPort4, SBPort5]> { 61 let BufferSize=54; 62} 63 64// Integer division issued on port 0. 65def SBDivider : ProcResource<1>; 66// FP division and sqrt on port 0. 67def SBFPDivider : ProcResource<1>; 68 69// Integer loads are 5 cycles, so ReadAfterLd registers needn't be available until 5 70// cycles after the memory operand. 71def : ReadAdvance<ReadAfterLd, 5>; 72 73// Vector loads are 5/6/7 cycles, so ReadAfterVec*Ld registers needn't be available 74// until 5/6/7 cycles after the memory operand. 75def : ReadAdvance<ReadAfterVecLd, 5>; 76def : ReadAdvance<ReadAfterVecXLd, 6>; 77def : ReadAdvance<ReadAfterVecYLd, 7>; 78 79def : ReadAdvance<ReadInt2Fpu, 0>; 80 81// Many SchedWrites are defined in pairs with and without a folded load. 82// Instructions with folded loads are usually micro-fused, so they only appear 83// as two micro-ops when queued in the reservation station. 84// This multiclass defines the resource usage for variants with and without 85// folded loads. 86multiclass SBWriteResPair<X86FoldableSchedWrite SchedRW, 87 list<ProcResourceKind> ExePorts, 88 int Lat, list<int> Res = [1], int UOps = 1, 89 int LoadLat = 5, int LoadUOps = 1> { 90 // Register variant is using a single cycle on ExePort. 91 def : WriteRes<SchedRW, ExePorts> { 92 let Latency = Lat; 93 let ReleaseAtCycles = Res; 94 let NumMicroOps = UOps; 95 } 96 97 // Memory variant also uses a cycle on port 2/3 and adds LoadLat cycles to 98 // the latency (default = 5). 99 def : WriteRes<SchedRW.Folded, !listconcat([SBPort23], ExePorts)> { 100 let Latency = !add(Lat, LoadLat); 101 let ReleaseAtCycles = !listconcat([1], Res); 102 let NumMicroOps = !add(UOps, LoadUOps); 103 } 104} 105 106// A folded store needs a cycle on port 4 for the store data, and an extra port 107// 2/3 cycle to recompute the address. 108def : WriteRes<WriteRMW, [SBPort23,SBPort4]>; 109 110def : WriteRes<WriteStore, [SBPort23, SBPort4]>; 111def : WriteRes<WriteStoreNT, [SBPort23, SBPort4]>; 112def : WriteRes<WriteLoad, [SBPort23]> { let Latency = 5; } 113def : WriteRes<WriteMove, [SBPort015]>; 114 115// Treat misc copies as a move. 116def : InstRW<[WriteMove], (instrs COPY)>; 117 118// Idioms that clear a register, like xorps %xmm0, %xmm0. 119// These can often bypass execution ports completely. 120def : WriteRes<WriteZero, []>; 121 122// Model the effect of clobbering the read-write mask operand of the GATHER operation. 123// Does not cost anything by itself, only has latency, matching that of the WriteLoad, 124defm : X86WriteRes<WriteVecMaskedGatherWriteback, [], 5, [], 0>; 125 126// Arithmetic. 127defm : SBWriteResPair<WriteALU, [SBPort015], 1>; 128defm : SBWriteResPair<WriteADC, [SBPort05,SBPort015], 2, [1,1], 2>; 129 130defm : SBWriteResPair<WriteIMul8, [SBPort1], 3>; 131defm : SBWriteResPair<WriteIMul16, [SBPort1,SBPort05,SBPort015], 4, [1,1,2], 4>; 132defm : X86WriteRes<WriteIMul16Imm, [SBPort1,SBPort015], 4, [1,1], 2>; 133defm : X86WriteRes<WriteIMul16ImmLd, [SBPort1,SBPort015,SBPort23], 8, [1,1,1], 3>; 134defm : SBWriteResPair<WriteIMul16Reg, [SBPort1], 3>; 135defm : SBWriteResPair<WriteIMul32, [SBPort1,SBPort05,SBPort015], 4, [1,1,1], 3>; 136defm : SBWriteResPair<WriteMULX32, [SBPort1,SBPort05,SBPort015], 3, [1,1,1], 3>; 137defm : SBWriteResPair<WriteIMul32Imm, [SBPort1], 3>; 138defm : SBWriteResPair<WriteIMul32Reg, [SBPort1], 3>; 139defm : SBWriteResPair<WriteIMul64, [SBPort1,SBPort0], 4, [1,1], 2>; 140defm : SBWriteResPair<WriteMULX64, [SBPort1,SBPort0], 3, [1,1], 2>; 141defm : SBWriteResPair<WriteIMul64Imm, [SBPort1], 3>; 142defm : SBWriteResPair<WriteIMul64Reg, [SBPort1], 3>; 143def SBWriteIMulH : WriteRes<WriteIMulH, []> { let Latency = 4; } 144def : WriteRes<WriteIMulHLd, []> { 145 let Latency = !add(SBWriteIMulH.Latency, SandyBridgeModel.LoadLatency); 146} 147 148defm : X86WriteRes<WriteXCHG, [SBPort015], 2, [3], 3>; 149defm : X86WriteRes<WriteBSWAP32, [SBPort1], 1, [1], 1>; 150defm : X86WriteRes<WriteBSWAP64, [SBPort1, SBPort05], 2, [1,1], 2>; 151defm : X86WriteRes<WriteCMPXCHG, [SBPort05, SBPort015], 5, [1,3], 4>; 152defm : X86WriteRes<WriteCMPXCHGRMW,[SBPort015, SBPort5, SBPort23, SBPort4], 8, [1, 2, 2, 1], 6>; 153 154defm : SBWriteResPair<WriteDiv8, [SBPort0, SBDivider], 25, [1, 10]>; 155defm : SBWriteResPair<WriteDiv16, [SBPort0, SBDivider], 25, [1, 10]>; 156defm : SBWriteResPair<WriteDiv32, [SBPort0, SBDivider], 25, [1, 10]>; 157defm : SBWriteResPair<WriteDiv64, [SBPort0, SBDivider], 25, [1, 10]>; 158defm : SBWriteResPair<WriteIDiv8, [SBPort0, SBDivider], 25, [1, 10]>; 159defm : SBWriteResPair<WriteIDiv16, [SBPort0, SBDivider], 25, [1, 10]>; 160defm : SBWriteResPair<WriteIDiv32, [SBPort0, SBDivider], 25, [1, 10]>; 161defm : SBWriteResPair<WriteIDiv64, [SBPort0, SBDivider], 25, [1, 10]>; 162 163// SHLD/SHRD. 164defm : X86WriteRes<WriteSHDrri, [SBPort05, SBPort015], 2, [1, 1], 2>; 165defm : X86WriteRes<WriteSHDrrcl,[SBPort05, SBPort015], 4, [3, 1], 4>; 166defm : X86WriteRes<WriteSHDmri, [SBPort4,SBPort23,SBPort05,SBPort015], 8, [1, 2, 1, 1], 5>; 167defm : X86WriteRes<WriteSHDmrcl,[SBPort4,SBPort23,SBPort05,SBPort015], 10, [1, 2, 3, 1], 7>; 168 169defm : SBWriteResPair<WriteShift, [SBPort05], 1>; 170defm : SBWriteResPair<WriteShiftCL, [SBPort05], 3, [3], 3>; 171defm : SBWriteResPair<WriteRotate, [SBPort05], 2, [2], 2>; 172defm : SBWriteResPair<WriteRotateCL, [SBPort05], 3, [3], 3>; 173 174defm : SBWriteResPair<WriteJump, [SBPort5], 1>; 175defm : SBWriteResPair<WriteCRC32, [SBPort1], 3, [1], 1, 5>; 176 177defm : SBWriteResPair<WriteCMOV, [SBPort05,SBPort015], 2, [1,1], 2>; // Conditional move. 178defm : X86WriteRes<WriteFCMOV, [SBPort5,SBPort05], 3, [2,1], 3>; // x87 conditional move. 179def : WriteRes<WriteSETCC, [SBPort05]>; // Setcc. 180def : WriteRes<WriteSETCCStore, [SBPort05,SBPort4,SBPort23]> { 181 let Latency = 2; 182 let NumMicroOps = 3; 183} 184 185defm : X86WriteRes<WriteLAHFSAHF, [SBPort05], 1, [1], 1>; 186defm : X86WriteRes<WriteBitTest, [SBPort05], 1, [1], 1>; 187defm : X86WriteRes<WriteBitTestImmLd, [SBPort05,SBPort23], 6, [1,1], 2>; 188//defm : X86WriteRes<WriteBitTestRegLd, [SBPort05,SBPort23], 6, [1,1], 2>; 189defm : X86WriteRes<WriteBitTestSet, [SBPort05], 1, [1], 1>; 190defm : X86WriteRes<WriteBitTestSetImmLd, [SBPort05,SBPort23], 6, [1,1], 3>; 191defm : X86WriteRes<WriteBitTestSetRegLd, [SBPort05,SBPort23,SBPort5,SBPort015], 8, [1,1,1,1], 5>; 192 193// This is for simple LEAs with one or two input operands. 194// The complex ones can only execute on port 1, and they require two cycles on 195// the port to read all inputs. We don't model that. 196def : WriteRes<WriteLEA, [SBPort01]>; 197 198// Bit counts. 199defm : SBWriteResPair<WriteBSF, [SBPort1], 3, [1], 1, 5>; 200defm : SBWriteResPair<WriteBSR, [SBPort1], 3, [1], 1, 5>; 201defm : SBWriteResPair<WriteLZCNT, [SBPort1], 3, [1], 1, 5>; 202defm : SBWriteResPair<WriteTZCNT, [SBPort1], 3, [1], 1, 5>; 203defm : SBWriteResPair<WritePOPCNT, [SBPort1], 3, [1], 1, 6>; 204 205// BMI1 BEXTR/BLS, BMI2 BZHI 206// NOTE: These don't exist on Sandy Bridge. Ports are guesses. 207defm : SBWriteResPair<WriteBEXTR, [SBPort05,SBPort1], 2, [1,1], 2>; 208defm : SBWriteResPair<WriteBLS, [SBPort015], 1>; 209defm : SBWriteResPair<WriteBZHI, [SBPort1], 1>; 210 211// Scalar and vector floating point. 212defm : X86WriteRes<WriteFLD0, [SBPort5], 1, [1], 1>; 213defm : X86WriteRes<WriteFLD1, [SBPort0,SBPort5], 1, [1,1], 2>; 214defm : X86WriteRes<WriteFLDC, [SBPort0,SBPort1], 1, [1,1], 2>; 215defm : X86WriteRes<WriteFLoad, [SBPort23], 5, [1], 1>; 216defm : X86WriteRes<WriteFLoadX, [SBPort23], 6, [1], 1>; 217defm : X86WriteRes<WriteFLoadY, [SBPort23], 7, [1], 1>; 218defm : X86WriteRes<WriteFMaskedLoad, [SBPort23,SBPort05], 8, [1,2], 3>; 219defm : X86WriteRes<WriteFMaskedLoadY, [SBPort23,SBPort05], 9, [1,2], 3>; 220defm : X86WriteRes<WriteFStore, [SBPort23,SBPort4], 1, [1,1], 1>; 221defm : X86WriteRes<WriteFStoreX, [SBPort23,SBPort4], 1, [1,1], 1>; 222defm : X86WriteRes<WriteFStoreY, [SBPort23,SBPort4], 1, [1,1], 1>; 223defm : X86WriteRes<WriteFStoreNT, [SBPort23,SBPort4], 1, [1,1], 1>; 224defm : X86WriteRes<WriteFStoreNTX, [SBPort23,SBPort4], 1, [1,1], 1>; 225defm : X86WriteRes<WriteFStoreNTY, [SBPort23,SBPort4], 1, [1,1], 1>; 226 227defm : X86WriteRes<WriteFMaskedStore32, [SBPort4,SBPort01,SBPort23], 5, [1,2,1], 4>; 228defm : X86WriteRes<WriteFMaskedStore32Y, [SBPort4,SBPort01,SBPort23], 5, [1,2,1], 4>; 229defm : X86WriteRes<WriteFMaskedStore64, [SBPort4,SBPort01,SBPort23], 5, [1,2,1], 4>; 230defm : X86WriteRes<WriteFMaskedStore64Y, [SBPort4,SBPort01,SBPort23], 5, [1,2,1], 4>; 231 232defm : X86WriteRes<WriteFMove, [SBPort5], 1, [1], 1>; 233defm : X86WriteRes<WriteFMoveX, [SBPort5], 1, [1], 1>; 234defm : X86WriteRes<WriteFMoveY, [SBPort5], 1, [1], 1>; 235defm : X86WriteRes<WriteFMoveZ, [SBPort5], 1, [1], 1>; 236defm : X86WriteRes<WriteEMMS, [SBPort015], 31, [31], 31>; 237 238defm : SBWriteResPair<WriteFAdd, [SBPort1], 3, [1], 1, 6>; 239defm : SBWriteResPair<WriteFAddX, [SBPort1], 3, [1], 1, 6>; 240defm : SBWriteResPair<WriteFAddY, [SBPort1], 3, [1], 1, 7>; 241defm : SBWriteResPair<WriteFAddZ, [SBPort1], 3, [1], 1, 7>; // Unsupported = 1 242defm : SBWriteResPair<WriteFAdd64, [SBPort1], 3, [1], 1, 6>; 243defm : SBWriteResPair<WriteFAdd64X, [SBPort1], 3, [1], 1, 6>; 244defm : SBWriteResPair<WriteFAdd64Y, [SBPort1], 3, [1], 1, 7>; 245defm : SBWriteResPair<WriteFAdd64Z, [SBPort1], 3, [1], 1, 7>; // Unsupported = 1 246 247defm : SBWriteResPair<WriteFCmp, [SBPort1], 3, [1], 1, 6>; 248defm : SBWriteResPair<WriteFCmpX, [SBPort1], 3, [1], 1, 6>; 249defm : SBWriteResPair<WriteFCmpY, [SBPort1], 3, [1], 1, 7>; 250defm : SBWriteResPair<WriteFCmpZ, [SBPort1], 3, [1], 1, 7>; // Unsupported = 1 251defm : SBWriteResPair<WriteFCmp64, [SBPort1], 3, [1], 1, 6>; 252defm : SBWriteResPair<WriteFCmp64X, [SBPort1], 3, [1], 1, 6>; 253defm : SBWriteResPair<WriteFCmp64Y, [SBPort1], 3, [1], 1, 7>; 254defm : SBWriteResPair<WriteFCmp64Z, [SBPort1], 3, [1], 1, 7>; // Unsupported = 1 255 256defm : SBWriteResPair<WriteFCom, [SBPort1], 3>; 257defm : SBWriteResPair<WriteFComX, [SBPort1], 3>; 258 259defm : SBWriteResPair<WriteFMul, [SBPort0], 5, [1], 1, 6>; 260defm : SBWriteResPair<WriteFMulX, [SBPort0], 5, [1], 1, 6>; 261defm : SBWriteResPair<WriteFMulY, [SBPort0], 5, [1], 1, 7>; 262defm : SBWriteResPair<WriteFMulZ, [SBPort0], 5, [1], 1, 7>; // Unsupported = 1 263defm : SBWriteResPair<WriteFMul64, [SBPort0], 5, [1], 1, 6>; 264defm : SBWriteResPair<WriteFMul64X, [SBPort0], 5, [1], 1, 6>; 265defm : SBWriteResPair<WriteFMul64Y, [SBPort0], 5, [1], 1, 7>; 266defm : SBWriteResPair<WriteFMul64Z, [SBPort0], 5, [1], 1, 7>; // Unsupported = 1 267 268defm : SBWriteResPair<WriteFDiv, [SBPort0,SBFPDivider], 14, [1,14], 1, 6>; 269defm : SBWriteResPair<WriteFDivX, [SBPort0,SBFPDivider], 14, [1,14], 1, 6>; 270defm : SBWriteResPair<WriteFDivY, [SBPort0,SBPort05,SBFPDivider], 29, [2,1,28], 3, 7>; 271defm : SBWriteResPair<WriteFDivZ, [SBPort0,SBPort05,SBFPDivider], 29, [2,1,28], 3, 7>; // Unsupported = 1 272defm : SBWriteResPair<WriteFDiv64, [SBPort0,SBFPDivider], 22, [1,22], 1, 6>; 273defm : SBWriteResPair<WriteFDiv64X, [SBPort0,SBFPDivider], 22, [1,22], 1, 6>; 274defm : SBWriteResPair<WriteFDiv64Y, [SBPort0,SBPort05,SBFPDivider], 45, [2,1,44], 3, 7>; 275defm : SBWriteResPair<WriteFDiv64Z, [SBPort0,SBPort05,SBFPDivider], 45, [2,1,44], 3, 7>; // Unsupported = 1 276 277defm : SBWriteResPair<WriteFRcp, [SBPort0], 5, [1], 1, 6>; 278defm : SBWriteResPair<WriteFRcpX, [SBPort0], 5, [1], 1, 6>; 279defm : SBWriteResPair<WriteFRcpY, [SBPort0,SBPort05], 7, [2,1], 3, 7>; 280defm : SBWriteResPair<WriteFRcpZ, [SBPort0,SBPort05], 7, [2,1], 3, 7>; // Unsupported = 1 281 282defm : SBWriteResPair<WriteFRsqrt, [SBPort0], 5, [1], 1, 6>; 283defm : SBWriteResPair<WriteFRsqrtX,[SBPort0], 5, [1], 1, 6>; 284defm : SBWriteResPair<WriteFRsqrtY,[SBPort0,SBPort05], 7, [2,1], 3, 7>; 285defm : SBWriteResPair<WriteFRsqrtZ,[SBPort0,SBPort05], 7, [2,1], 3, 7>; // Unsupported = 1 286 287defm : SBWriteResPair<WriteFSqrt, [SBPort0,SBFPDivider], 14, [1,14], 1, 6>; 288defm : SBWriteResPair<WriteFSqrtX, [SBPort0,SBFPDivider], 14, [1,14], 1, 6>; 289defm : SBWriteResPair<WriteFSqrtY, [SBPort0,SBPort05,SBFPDivider], 29, [2,1,28], 3, 7>; 290defm : SBWriteResPair<WriteFSqrtZ, [SBPort0,SBPort05,SBFPDivider], 29, [2,1,28], 3, 7>; // Unsupported = 1 291defm : SBWriteResPair<WriteFSqrt64, [SBPort0,SBFPDivider], 21, [1,21], 1, 6>; 292defm : SBWriteResPair<WriteFSqrt64X, [SBPort0,SBFPDivider], 21, [1,21], 1, 6>; 293defm : SBWriteResPair<WriteFSqrt64Y, [SBPort0,SBPort05,SBFPDivider], 45, [2,1,44], 3, 7>; 294defm : SBWriteResPair<WriteFSqrt64Z, [SBPort0,SBPort05,SBFPDivider], 45, [2,1,44], 3, 7>; // Unsupported = 1 295defm : SBWriteResPair<WriteFSqrt80, [SBPort0,SBFPDivider], 24, [1,24], 1, 6>; 296 297defm : SBWriteResPair<WriteDPPD, [SBPort0,SBPort1,SBPort5], 9, [1,1,1], 3, 6>; 298defm : X86WriteRes<WriteDPPS, [SBPort0,SBPort1,SBPort5], 12, [1,2,1], 4>; 299defm : X86WriteRes<WriteDPPSY, [SBPort0,SBPort1,SBPort5], 12, [1,2,1], 4>; 300defm : X86WriteRes<WriteDPPSLd, [SBPort0,SBPort1,SBPort5,SBPort23], 18, [1,2,2,1], 6>; 301defm : X86WriteRes<WriteDPPSYLd, [SBPort0,SBPort1,SBPort5,SBPort23], 19, [1,2,2,1], 6>; 302defm : SBWriteResPair<WriteFSign, [SBPort5], 1>; 303defm : SBWriteResPair<WriteFRnd, [SBPort1], 3, [1], 1, 6>; 304defm : SBWriteResPair<WriteFRndY, [SBPort1], 3, [1], 1, 7>; 305defm : SBWriteResPair<WriteFRndZ, [SBPort1], 3, [1], 1, 7>; // Unsupported = 1 306defm : SBWriteResPair<WriteFLogic, [SBPort5], 1, [1], 1, 6>; 307defm : SBWriteResPair<WriteFLogicY, [SBPort5], 1, [1], 1, 7>; 308defm : SBWriteResPair<WriteFLogicZ, [SBPort5], 1, [1], 1, 7>; // Unsupported = 1 309defm : SBWriteResPair<WriteFTest, [SBPort0], 1, [1], 1, 6>; 310defm : SBWriteResPair<WriteFTestY, [SBPort0], 1, [1], 1, 7>; 311defm : SBWriteResPair<WriteFTestZ, [SBPort0], 1, [1], 1, 7>; // Unsupported = 1 312defm : SBWriteResPair<WriteFShuffle, [SBPort5], 1, [1], 1, 6>; 313defm : SBWriteResPair<WriteFShuffleY,[SBPort5], 1, [1], 1, 7>; 314defm : SBWriteResPair<WriteFShuffleZ,[SBPort5], 1, [1], 1, 7>; // Unsupported = 1 315defm : SBWriteResPair<WriteFVarShuffle, [SBPort5], 1, [1], 1, 6>; 316defm : SBWriteResPair<WriteFVarShuffleY,[SBPort5], 1, [1], 1, 7>; 317defm : SBWriteResPair<WriteFVarShuffleZ,[SBPort5], 1, [1], 1, 7>; // Unsupported = 1 318defm : SBWriteResPair<WriteFBlend, [SBPort05], 1, [1], 1, 6>; 319defm : SBWriteResPair<WriteFBlendY, [SBPort05], 1, [1], 1, 7>; 320defm : SBWriteResPair<WriteFBlendZ, [SBPort05], 1, [1], 1, 7>; // Unsupported = 1 321defm : SBWriteResPair<WriteFVarBlend, [SBPort05], 2, [2], 2, 6>; 322defm : SBWriteResPair<WriteFVarBlendY,[SBPort05], 2, [2], 2, 7>; 323defm : SBWriteResPair<WriteFVarBlendZ,[SBPort05], 2, [2], 2, 7>; // Unsupported = 1 324 325// Conversion between integer and float. 326defm : SBWriteResPair<WriteCvtSS2I, [SBPort0,SBPort1], 5, [1,1], 2>; 327defm : SBWriteResPair<WriteCvtPS2I, [SBPort1], 3, [1], 1, 6>; 328defm : SBWriteResPair<WriteCvtPS2IY, [SBPort1], 3, [1], 1, 7>; 329defm : SBWriteResPair<WriteCvtPS2IZ, [SBPort1], 3, [1], 1, 7>; // Unsupported = 1 330defm : SBWriteResPair<WriteCvtSD2I, [SBPort0,SBPort1], 5, [1,1], 2>; 331defm : SBWriteResPair<WriteCvtPD2I, [SBPort1,SBPort5], 4, [1,1], 2, 6>; 332defm : X86WriteRes<WriteCvtPD2IY, [SBPort1,SBPort5], 4, [1,1], 2>; 333defm : X86WriteRes<WriteCvtPD2IZ, [SBPort1,SBPort5], 4, [1,1], 2>; // Unsupported = 1 334defm : X86WriteRes<WriteCvtPD2IYLd, [SBPort1,SBPort5,SBPort23], 11, [1,1,1], 3>; 335defm : X86WriteRes<WriteCvtPD2IZLd, [SBPort1,SBPort5,SBPort23], 11, [1,1,1], 3>; // Unsupported = 1 336 337defm : X86WriteRes<WriteCvtI2SS, [SBPort1,SBPort5], 5, [1,2], 3>; 338defm : X86WriteRes<WriteCvtI2SSLd, [SBPort1,SBPort5,SBPort23], 10, [1,1,1], 3>; 339defm : SBWriteResPair<WriteCvtI2PS, [SBPort1], 3, [1], 1, 6>; 340defm : SBWriteResPair<WriteCvtI2PSY, [SBPort1], 3, [1], 1, 7>; 341defm : SBWriteResPair<WriteCvtI2PSZ, [SBPort1], 3, [1], 1, 7>; // Unsupported = 1 342defm : X86WriteRes<WriteCvtI2SD, [SBPort1,SBPort5], 4, [1,1], 2>; 343defm : X86WriteRes<WriteCvtI2PD, [SBPort1,SBPort5], 4, [1,1], 2>; 344defm : X86WriteRes<WriteCvtI2PDY, [SBPort1,SBPort5], 4, [1,1], 2>; 345defm : X86WriteRes<WriteCvtI2PDZ, [SBPort1,SBPort5], 4, [1,1], 2>; // Unsupported = 1 346defm : X86WriteRes<WriteCvtI2SDLd, [SBPort1,SBPort23], 9, [1,1], 2>; 347defm : X86WriteRes<WriteCvtI2PDLd, [SBPort1,SBPort5,SBPort23], 10, [1,1,1], 3>; 348defm : X86WriteRes<WriteCvtI2PDYLd, [SBPort1,SBPort5,SBPort23], 10, [1,1,1], 3>; 349defm : X86WriteRes<WriteCvtI2PDZLd, [SBPort1,SBPort5,SBPort23], 10, [1,1,1], 3>; // Unsupported = 1 350 351defm : X86WriteRes<WriteCvtSS2SD, [SBPort0,SBPort5], 1, [1,1], 2>; 352defm : X86WriteRes<WriteCvtPS2PD, [SBPort0,SBPort5], 2, [1,1], 2>; 353defm : X86WriteRes<WriteCvtPS2PDY, [SBPort0,SBPort5], 2, [1,1], 2>; 354defm : X86WriteRes<WriteCvtPS2PDZ, [SBPort0,SBPort5], 2, [1,1], 2>; // Unsupported = 1 355defm : X86WriteRes<WriteCvtSS2SDLd, [SBPort0,SBPort23], 7, [1,1], 2>; 356defm : X86WriteRes<WriteCvtPS2PDLd, [SBPort0,SBPort23], 7, [1,1], 2>; 357defm : X86WriteRes<WriteCvtPS2PDYLd, [SBPort0,SBPort5,SBPort23], 7, [1,1,1], 3>; 358defm : X86WriteRes<WriteCvtPS2PDZLd, [SBPort0,SBPort5,SBPort23], 7, [1,1,1], 3>; // Unsupported = 1 359defm : SBWriteResPair<WriteCvtSD2SS, [SBPort1,SBPort5], 4, [1,1], 2, 6>; 360defm : SBWriteResPair<WriteCvtPD2PS, [SBPort1,SBPort5], 4, [1,1], 2, 6>; 361defm : SBWriteResPair<WriteCvtPD2PSY, [SBPort1,SBPort5], 4, [1,1], 2, 7>; 362defm : SBWriteResPair<WriteCvtPD2PSZ, [SBPort1,SBPort5], 4, [1,1], 2, 7>; // Unsupported = 1 363 364// F16C Instructions (IvyBridge+) 365defm : X86WriteRes<WriteCvtPH2PS, [SBPort0,SBPort5], 3, [1,1], 2>; 366defm : X86WriteRes<WriteCvtPH2PSY, [SBPort0,SBPort5], 3, [1,1], 2>; 367defm : X86WriteRes<WriteCvtPH2PSZ, [SBPort0,SBPort5], 3, [1,1], 2>; // Unsupported = 1 368defm : X86WriteRes<WriteCvtPH2PSLd, [SBPort0,SBPort23], 8, [1,1], 2>; 369defm : X86WriteRes<WriteCvtPH2PSYLd, [SBPort0,SBPort5,SBPort23], 8, [1,1,1], 3>; 370defm : X86WriteRes<WriteCvtPH2PSZLd, [SBPort0,SBPort5,SBPort23], 8, [1,1,1], 3>; // Unsupported = 1 371 372defm : X86WriteRes<WriteCvtPS2PH, [SBPort0,SBPort1,SBPort5], 10, [1,1,1], 3>; 373defm : X86WriteRes<WriteCvtPS2PHY, [SBPort0,SBPort1,SBPort5], 10, [1,1,1], 3>; 374defm : X86WriteRes<WriteCvtPS2PHZ, [SBPort0,SBPort1,SBPort5], 10, [1,1,1], 3>; // Unsupported = 1 375defm : X86WriteRes<WriteCvtPS2PHSt, [SBPort0,SBPort1,SBPort23,SBPort4], 13, [1,1,1,1], 4>; 376defm : X86WriteRes<WriteCvtPS2PHYSt, [SBPort0,SBPort1,SBPort23,SBPort4], 13, [1,1,1,1], 4>; 377defm : X86WriteRes<WriteCvtPS2PHZSt, [SBPort0,SBPort1,SBPort23,SBPort4], 13, [1,1,1,1], 4>; // Unsupported = 1 378 379// Vector integer operations. 380defm : X86WriteRes<WriteVecLoad, [SBPort23], 5, [1], 1>; 381defm : X86WriteRes<WriteVecLoadX, [SBPort23], 6, [1], 1>; 382defm : X86WriteRes<WriteVecLoadY, [SBPort23], 7, [1], 1>; 383defm : X86WriteRes<WriteVecLoadNT, [SBPort23], 6, [1], 1>; 384defm : X86WriteRes<WriteVecLoadNTY, [SBPort23], 7, [1], 1>; 385defm : X86WriteRes<WriteVecMaskedLoad, [SBPort23,SBPort05], 8, [1,2], 3>; 386defm : X86WriteRes<WriteVecMaskedLoadY, [SBPort23,SBPort05], 9, [1,2], 3>; 387defm : X86WriteRes<WriteVecStore, [SBPort23,SBPort4], 1, [1,1], 1>; 388defm : X86WriteRes<WriteVecStoreX, [SBPort23,SBPort4], 1, [1,1], 1>; 389defm : X86WriteRes<WriteVecStoreY, [SBPort23,SBPort4], 1, [1,1], 1>; 390defm : X86WriteRes<WriteVecStoreNT, [SBPort23,SBPort4], 1, [1,1], 1>; 391defm : X86WriteRes<WriteVecStoreNTY, [SBPort23,SBPort4], 1, [1,1], 1>; 392defm : X86WriteRes<WriteVecMaskedStore32, [SBPort4,SBPort01,SBPort23], 5, [1,2,1], 4>; 393defm : X86WriteRes<WriteVecMaskedStore32Y, [SBPort4,SBPort01,SBPort23], 5, [1,2,1], 4>; 394defm : X86WriteRes<WriteVecMaskedStore64, [SBPort4,SBPort01,SBPort23], 5, [1,2,1], 4>; 395defm : X86WriteRes<WriteVecMaskedStore64Y, [SBPort4,SBPort01,SBPort23], 5, [1,2,1], 4>; 396defm : X86WriteRes<WriteVecMove, [SBPort05], 1, [1], 1>; 397defm : X86WriteRes<WriteVecMoveX, [SBPort015], 1, [1], 1>; 398defm : X86WriteRes<WriteVecMoveY, [SBPort05], 1, [1], 1>; 399defm : X86WriteRes<WriteVecMoveZ, [SBPort05], 1, [1], 1>; 400defm : X86WriteRes<WriteVecMoveToGpr, [SBPort0], 2, [1], 1>; 401defm : X86WriteRes<WriteVecMoveFromGpr, [SBPort5], 1, [1], 1>; 402 403defm : SBWriteResPair<WriteVecLogic, [SBPort015], 1, [1], 1, 5>; 404defm : SBWriteResPair<WriteVecLogicX,[SBPort015], 1, [1], 1, 6>; 405defm : SBWriteResPair<WriteVecLogicY,[SBPort015], 1, [1], 1, 7>; 406defm : SBWriteResPair<WriteVecLogicZ,[SBPort015], 1, [1], 1, 7>; // Unsupported = 1 407defm : SBWriteResPair<WriteVecTest, [SBPort0,SBPort5], 2, [1,1], 2, 6>; 408defm : SBWriteResPair<WriteVecTestY, [SBPort0,SBPort5], 2, [1,1], 2, 7>; 409defm : SBWriteResPair<WriteVecTestZ, [SBPort0,SBPort5], 2, [1,1], 2, 7>; // Unsupported = 1 410defm : SBWriteResPair<WriteVecALU, [SBPort1], 3, [1], 1, 5>; 411defm : SBWriteResPair<WriteVecALUX, [SBPort15], 1, [1], 1, 6>; 412defm : SBWriteResPair<WriteVecALUY, [SBPort15], 1, [1], 1, 7>; 413defm : SBWriteResPair<WriteVecALUZ, [SBPort15], 1, [1], 1, 7>; // Unsupported = 1 414defm : SBWriteResPair<WriteVecIMul, [SBPort0], 5, [1], 1, 5>; 415defm : SBWriteResPair<WriteVecIMulX, [SBPort0], 5, [1], 1, 6>; 416defm : SBWriteResPair<WriteVecIMulY, [SBPort0], 5, [1], 1, 7>; 417defm : SBWriteResPair<WriteVecIMulZ, [SBPort0], 5, [1], 1, 7>; // Unsupported = 1 418defm : SBWriteResPair<WritePMULLD, [SBPort0], 5, [1], 1, 6>; 419defm : SBWriteResPair<WritePMULLDY, [SBPort0], 5, [1], 1, 7>; // TODO this is probably wrong for 256/512-bit for the "generic" model 420defm : SBWriteResPair<WritePMULLDZ, [SBPort0], 5, [1], 1, 7>; // Unsupported = 1 421defm : SBWriteResPair<WriteShuffle, [SBPort5], 1, [1], 1, 5>; 422defm : SBWriteResPair<WriteShuffleX, [SBPort15], 1, [1], 1, 6>; 423defm : SBWriteResPair<WriteShuffleY, [SBPort5], 1, [1], 1, 7>; 424defm : SBWriteResPair<WriteShuffleZ, [SBPort5], 1, [1], 1, 7>; // Unsupported = 1 425defm : SBWriteResPair<WriteVarShuffle, [SBPort15], 1, [1], 1, 5>; 426defm : SBWriteResPair<WriteVarShuffleX, [SBPort15], 1, [1], 1, 6>; 427defm : SBWriteResPair<WriteVarShuffleY, [SBPort15], 1, [1], 1, 7>; 428defm : SBWriteResPair<WriteVarShuffleZ, [SBPort15], 1, [1], 1, 7>; // Unsupported = 1 429defm : SBWriteResPair<WriteBlend, [SBPort15], 1, [1], 1, 6>; 430defm : SBWriteResPair<WriteBlendY, [SBPort15], 1, [1], 1, 7>; 431defm : SBWriteResPair<WriteBlendZ, [SBPort15], 1, [1], 1, 7>; // Unsupported = 1 432defm : SBWriteResPair<WriteVarBlend, [SBPort15], 2, [2], 2, 6>; 433defm : SBWriteResPair<WriteVarBlendY,[SBPort15], 2, [2], 2, 7>; 434defm : SBWriteResPair<WriteVarBlendZ,[SBPort15], 2, [2], 2, 7>; // Unsupported = 1 435defm : SBWriteResPair<WriteMPSAD, [SBPort0, SBPort15], 7, [1,2], 3, 6>; 436defm : SBWriteResPair<WriteMPSADY, [SBPort0, SBPort15], 7, [1,2], 3, 7>; 437defm : SBWriteResPair<WriteMPSADZ, [SBPort0, SBPort15], 7, [1,2], 3, 7>; // Unsupported = 1 438defm : SBWriteResPair<WritePSADBW, [SBPort0], 5, [1], 1, 5>; 439defm : SBWriteResPair<WritePSADBWX, [SBPort0], 5, [1], 1, 6>; 440defm : SBWriteResPair<WritePSADBWY, [SBPort0], 5, [1], 1, 7>; 441defm : SBWriteResPair<WritePSADBWZ, [SBPort0], 5, [1], 1, 7>; // Unsupported = 1 442defm : SBWriteResPair<WritePHMINPOS, [SBPort0], 5, [1], 1, 6>; 443 444// Vector integer shifts. 445defm : SBWriteResPair<WriteVecShift, [SBPort5], 1, [1], 1, 5>; 446defm : SBWriteResPair<WriteVecShiftX, [SBPort0,SBPort15], 2, [1,1], 2, 6>; 447defm : SBWriteResPair<WriteVecShiftY, [SBPort0,SBPort15], 4, [1,1], 2, 7>; 448defm : SBWriteResPair<WriteVecShiftZ, [SBPort0,SBPort15], 4, [1,1], 2, 7>; // Unsupported = 1 449defm : SBWriteResPair<WriteVecShiftImm, [SBPort5], 1, [1], 1, 5>; 450defm : SBWriteResPair<WriteVecShiftImmX, [SBPort0], 1, [1], 1, 6>; 451defm : SBWriteResPair<WriteVecShiftImmY, [SBPort0], 1, [1], 1, 7>; 452defm : SBWriteResPair<WriteVecShiftImmZ, [SBPort0], 1, [1], 1, 7>; // Unsupported = 1 453defm : SBWriteResPair<WriteVarVecShift, [SBPort0], 1, [1], 1, 6>; 454defm : SBWriteResPair<WriteVarVecShiftY, [SBPort0], 1, [1], 1, 7>; 455defm : SBWriteResPair<WriteVarVecShiftZ, [SBPort0], 1, [1], 1, 7>; // Unsupported = 1 456 457// Vector insert/extract operations. 458def : WriteRes<WriteVecInsert, [SBPort5,SBPort15]> { 459 let Latency = 2; 460 let NumMicroOps = 2; 461} 462def : WriteRes<WriteVecInsertLd, [SBPort23,SBPort15]> { 463 let Latency = 7; 464 let NumMicroOps = 2; 465} 466 467def : WriteRes<WriteVecExtract, [SBPort0,SBPort15]> { 468 let Latency = 3; 469 let NumMicroOps = 2; 470} 471def : WriteRes<WriteVecExtractSt, [SBPort4,SBPort23,SBPort15]> { 472 let Latency = 5; 473 let NumMicroOps = 3; 474} 475 476//////////////////////////////////////////////////////////////////////////////// 477// Horizontal add/sub instructions. 478//////////////////////////////////////////////////////////////////////////////// 479 480defm : SBWriteResPair<WriteFHAdd, [SBPort1,SBPort5], 5, [1,2], 3, 6>; 481defm : SBWriteResPair<WriteFHAddY, [SBPort1,SBPort5], 5, [1,2], 3, 7>; 482defm : SBWriteResPair<WriteFHAddZ, [SBPort1,SBPort5], 5, [1,2], 3, 7>; // Unsupported = 1 483defm : SBWriteResPair<WritePHAdd, [SBPort15], 3, [3], 3, 5>; 484defm : SBWriteResPair<WritePHAddX, [SBPort15], 3, [3], 3, 6>; 485defm : SBWriteResPair<WritePHAddY, [SBPort15], 3, [3], 3, 7>; 486defm : SBWriteResPair<WritePHAddZ, [SBPort15], 3, [3], 3, 7>; // Unsupported = 1 487 488//////////////////////////////////////////////////////////////////////////////// 489// String instructions. 490//////////////////////////////////////////////////////////////////////////////// 491 492// Packed Compare Implicit Length Strings, Return Mask 493def : WriteRes<WritePCmpIStrM, [SBPort0]> { 494 let Latency = 11; 495 let NumMicroOps = 3; 496 let ReleaseAtCycles = [3]; 497} 498def : WriteRes<WritePCmpIStrMLd, [SBPort0, SBPort23]> { 499 let Latency = 17; 500 let NumMicroOps = 4; 501 let ReleaseAtCycles = [3,1]; 502} 503 504// Packed Compare Explicit Length Strings, Return Mask 505def : WriteRes<WritePCmpEStrM, [SBPort015]> { 506 let Latency = 11; 507 let ReleaseAtCycles = [8]; 508} 509def : WriteRes<WritePCmpEStrMLd, [SBPort015, SBPort23]> { 510 let Latency = 17; 511 let ReleaseAtCycles = [7, 1]; 512} 513 514// Packed Compare Implicit Length Strings, Return Index 515def : WriteRes<WritePCmpIStrI, [SBPort0]> { 516 let Latency = 11; 517 let NumMicroOps = 3; 518 let ReleaseAtCycles = [3]; 519} 520def : WriteRes<WritePCmpIStrILd, [SBPort0,SBPort23]> { 521 let Latency = 17; 522 let NumMicroOps = 4; 523 let ReleaseAtCycles = [3,1]; 524} 525 526// Packed Compare Explicit Length Strings, Return Index 527def : WriteRes<WritePCmpEStrI, [SBPort015]> { 528 let Latency = 4; 529 let ReleaseAtCycles = [8]; 530} 531def : WriteRes<WritePCmpEStrILd, [SBPort015, SBPort23]> { 532 let Latency = 10; 533 let ReleaseAtCycles = [7, 1]; 534} 535 536// MOVMSK Instructions. 537def : WriteRes<WriteFMOVMSK, [SBPort0]> { let Latency = 2; } 538def : WriteRes<WriteVecMOVMSK, [SBPort0]> { let Latency = 2; } 539def : WriteRes<WriteVecMOVMSKY, [SBPort0]> { let Latency = 2; } 540def : WriteRes<WriteMMXMOVMSK, [SBPort0]> { let Latency = 1; } 541 542// AES Instructions. 543def : WriteRes<WriteAESDecEnc, [SBPort5,SBPort015]> { 544 let Latency = 7; 545 let NumMicroOps = 2; 546 let ReleaseAtCycles = [1,1]; 547} 548def : WriteRes<WriteAESDecEncLd, [SBPort5,SBPort23,SBPort015]> { 549 let Latency = 13; 550 let NumMicroOps = 3; 551 let ReleaseAtCycles = [1,1,1]; 552} 553 554def : WriteRes<WriteAESIMC, [SBPort5]> { 555 let Latency = 12; 556 let NumMicroOps = 2; 557 let ReleaseAtCycles = [2]; 558} 559def : WriteRes<WriteAESIMCLd, [SBPort5,SBPort23]> { 560 let Latency = 18; 561 let NumMicroOps = 3; 562 let ReleaseAtCycles = [2,1]; 563} 564 565def : WriteRes<WriteAESKeyGen, [SBPort015]> { 566 let Latency = 8; 567 let ReleaseAtCycles = [11]; 568} 569def : WriteRes<WriteAESKeyGenLd, [SBPort015, SBPort23]> { 570 let Latency = 14; 571 let ReleaseAtCycles = [10, 1]; 572} 573 574// Carry-less multiplication instructions. 575def : WriteRes<WriteCLMul, [SBPort015]> { 576 let Latency = 14; 577 let ReleaseAtCycles = [18]; 578} 579def : WriteRes<WriteCLMulLd, [SBPort015, SBPort23]> { 580 let Latency = 20; 581 let ReleaseAtCycles = [17, 1]; 582} 583 584// Load/store MXCSR. 585// FIXME: This is probably wrong. Only STMXCSR should require Port4. 586def : WriteRes<WriteLDMXCSR, [SBPort0,SBPort4,SBPort5,SBPort23]> { let Latency = 5; let NumMicroOps = 4; let ReleaseAtCycles = [1,1,1,1]; } 587def : WriteRes<WriteSTMXCSR, [SBPort0,SBPort4,SBPort5,SBPort23]> { let Latency = 5; let NumMicroOps = 4; let ReleaseAtCycles = [1,1,1,1]; } 588 589def : WriteRes<WriteSystem, [SBPort015]> { let Latency = 100; } 590def : WriteRes<WriteMicrocoded, [SBPort015]> { let Latency = 100; } 591def : WriteRes<WriteFence, [SBPort23, SBPort4]> { let NumMicroOps = 2; let ReleaseAtCycles = [1,1]; } 592def : WriteRes<WriteNop, []>; 593 594// AVX2/FMA is not supported on that architecture, but we should define the basic 595// scheduling resources anyway. 596defm : SBWriteResPair<WriteFShuffle256, [SBPort5], 1, [1], 1, 7>; 597defm : SBWriteResPair<WriteFVarShuffle256, [SBPort5], 1, [1], 1, 7>; 598defm : SBWriteResPair<WriteShuffle256, [SBPort5], 1, [1], 1, 7>; 599defm : SBWriteResPair<WriteVPMOV256, [SBPort5], 1, [1], 1, 7>; 600defm : SBWriteResPair<WriteVarShuffle256, [SBPort5], 1, [1], 1, 7>; 601defm : SBWriteResPair<WriteFMA, [SBPort01], 5>; 602defm : SBWriteResPair<WriteFMAX, [SBPort01], 5>; 603defm : SBWriteResPair<WriteFMAY, [SBPort01], 5>; 604defm : SBWriteResPair<WriteFMAZ, [SBPort01], 5>; // Unsupported = 1 605 606// Remaining SNB instrs. 607 608def SBWriteResGroup1 : SchedWriteRes<[SBPort1]> { 609 let Latency = 1; 610 let NumMicroOps = 1; 611 let ReleaseAtCycles = [1]; 612} 613def: InstRW<[SBWriteResGroup1], (instrs COMP_FST0r, 614 COM_FST0r, 615 UCOM_FPr, 616 UCOM_Fr)>; 617 618def SBWriteResGroup2 : SchedWriteRes<[SBPort5]> { 619 let Latency = 1; 620 let NumMicroOps = 1; 621 let ReleaseAtCycles = [1]; 622} 623def: InstRW<[SBWriteResGroup2], (instrs FDECSTP, FINCSTP, FFREE, FFREEP, FNOP, 624 LD_Frr, ST_Frr, ST_FPrr)>; 625def: InstRW<[SBWriteResGroup2], (instrs RET64)>; 626 627def SBWriteResGroup4 : SchedWriteRes<[SBPort05]> { 628 let Latency = 1; 629 let NumMicroOps = 1; 630 let ReleaseAtCycles = [1]; 631} 632def: InstRW<[SBWriteResGroup4], (instrs CDQ, CQO)>; 633 634def SBWriteResGroup5 : SchedWriteRes<[SBPort15]> { 635 let Latency = 1; 636 let NumMicroOps = 1; 637 let ReleaseAtCycles = [1]; 638} 639def: InstRW<[SBWriteResGroup5], (instrs MMX_PABSBrr, 640 MMX_PABSDrr, 641 MMX_PABSWrr, 642 MMX_PADDQrr, 643 MMX_PALIGNRrri, 644 MMX_PSIGNBrr, 645 MMX_PSIGNDrr, 646 MMX_PSIGNWrr, 647 MMX_PSUBQrr)>; 648 649def SBWriteResGroup11 : SchedWriteRes<[SBPort015]> { 650 let Latency = 2; 651 let NumMicroOps = 2; 652 let ReleaseAtCycles = [2]; 653} 654def: InstRW<[SBWriteResGroup11], (instrs SCASB, 655 SCASL, 656 SCASQ, 657 SCASW)>; 658 659def SBWriteResGroup12 : SchedWriteRes<[SBPort0,SBPort1]> { 660 let Latency = 2; 661 let NumMicroOps = 2; 662 let ReleaseAtCycles = [1,1]; 663} 664def: InstRW<[SBWriteResGroup12], (instregex "(V?)(U?)COMI(SD|SS)rr")>; 665 666def SBWriteResGroup15 : SchedWriteRes<[SBPort0,SBPort015]> { 667 let Latency = 2; 668 let NumMicroOps = 2; 669 let ReleaseAtCycles = [1,1]; 670} 671def: InstRW<[SBWriteResGroup15], (instrs CWD, 672 FNSTSW16r)>; 673 674def SBWriteResGroup18 : SchedWriteRes<[SBPort5,SBPort015]> { 675 let Latency = 2; 676 let NumMicroOps = 2; 677 let ReleaseAtCycles = [1,1]; 678} 679def: InstRW<[SBWriteResGroup18], (instrs JCXZ, JECXZ, JRCXZ, 680 MMX_MOVDQ2Qrr)>; 681 682def SBWriteResGroup21 : SchedWriteRes<[SBPort1]> { 683 let Latency = 3; 684 let NumMicroOps = 1; 685 let ReleaseAtCycles = [1]; 686} 687def: InstRW<[SBWriteResGroup21], (instrs PUSHFS64)>; 688 689def SBWriteResGroup22 : SchedWriteRes<[SBPort0,SBPort5]> { 690 let Latency = 3; 691 let NumMicroOps = 2; 692 let ReleaseAtCycles = [1,1]; 693} 694def: InstRW<[SBWriteResGroup22], (instregex "(V?)EXTRACTPSrri")>; 695 696def SBWriteResGroup23 : SchedWriteRes<[SBPort05,SBPort015]> { 697 let Latency = 2; 698 let NumMicroOps = 3; 699 let ReleaseAtCycles = [2,1]; 700} 701def: InstRW<[SBWriteResGroup23], (instrs RCL8r1, RCL16r1, RCL32r1, RCL64r1, 702 RCR8r1, RCR16r1, RCR32r1, RCR64r1)>; 703 704def SBWriteResGroup24 : SchedWriteRes<[SBPort1,SBPort5,SBPort05,SBPort015]> { 705 let Latency = 3; 706 let NumMicroOps = 8; 707 let ReleaseAtCycles = [1,1,4,2]; 708} 709def: InstRW<[SBWriteResGroup24], (instrs RCR8ri, RCR16ri, RCR32ri, RCR64ri)>; 710 711def SBWriteResGroup24b : SchedWriteRes<[SBPort1,SBPort5,SBPort05,SBPort015]> { 712 let Latency = 4; 713 let NumMicroOps = 8; 714 let ReleaseAtCycles = [1,1,4,2]; 715} 716def: InstRW<[SBWriteResGroup24b], (instrs RCL8ri, RCL16ri, RCL32ri, RCL64ri)>; 717 718def SBWriteResGroup25_1 : SchedWriteRes<[SBPort23,SBPort015]> { 719 let Latency = 7; 720 let NumMicroOps = 3; 721 let ReleaseAtCycles = [1,2]; 722} 723def: InstRW<[SBWriteResGroup25_1], (instrs LEAVE, LEAVE64)>; 724 725def SBWriteResGroup26_2 : SchedWriteRes<[SBPort0,SBPort1,SBPort5]> { 726 let Latency = 3; 727 let NumMicroOps = 3; 728 let ReleaseAtCycles = [1,1,1]; 729} 730def: InstRW<[SBWriteResGroup26_2], (instrs COM_FIPr, COM_FIr, UCOM_FIPr, UCOM_FIr)>; 731 732def SBWriteResGroup29 : SchedWriteRes<[SBPort1,SBPort015]> { 733 let Latency = 4; 734 let NumMicroOps = 2; 735 let ReleaseAtCycles = [1,1]; 736} 737def: InstRW<[SBWriteResGroup29], (instrs MOV64sr)>; 738 739def SBWriteResGroup29_2 : SchedWriteRes<[SBPort5,SBPort015]> { 740 let Latency = 4; 741 let NumMicroOps = 4; 742 let ReleaseAtCycles = [1,3]; 743} 744def: InstRW<[SBWriteResGroup29_2], (instrs PAUSE)>; 745 746def SBWriteResGroup30 : SchedWriteRes<[SBPort1,SBPort5,SBPort015]> { 747 let Latency = 3; 748 let NumMicroOps = 8; 749 let ReleaseAtCycles = [1,3,4]; 750} 751def: InstRW<[SBWriteResGroup30], (instrs LOOP)>; 752 753def SBWriteResGroup31 : SchedWriteRes<[SBPort1,SBPort5,SBPort015,SBPort05]> { 754 let Latency = 4; 755 let NumMicroOps = 12; 756 let ReleaseAtCycles = [1,3,6,2]; 757} 758def: InstRW<[SBWriteResGroup31], (instrs LOOPE, LOOPNE)>; 759 760def SBWriteResGroup76 : SchedWriteRes<[SBPort05]> { 761 let Latency = 5; 762 let NumMicroOps = 8; 763 let ReleaseAtCycles = [8]; 764} 765def: InstRW<[SBWriteResGroup76], (instregex "RCL(8|16|32|64)rCL", 766 "RCR(8|16|32|64)rCL")>; 767 768def SBWriteResGroup33 : SchedWriteRes<[SBPort4,SBPort23]> { 769 let Latency = 5; 770 let NumMicroOps = 2; 771 let ReleaseAtCycles = [1,1]; 772} 773def: InstRW<[SBWriteResGroup33], (instregex "PUSH(16r|32r|64r|64i8)")>; 774 775def SBWriteResGroup35 : SchedWriteRes<[SBPort1,SBPort5]> { 776 let Latency = 5; 777 let NumMicroOps = 3; 778 let ReleaseAtCycles = [1,2]; 779} 780def: InstRW<[SBWriteResGroup35], (instrs CLI)>; 781 782def SBWriteResGroup35_2 : SchedWriteRes<[SBPort1,SBPort4,SBPort23]> { 783 let Latency = 5; 784 let NumMicroOps = 3; 785 let ReleaseAtCycles = [1,1,1]; 786} 787def: InstRW<[SBWriteResGroup35_2], (instrs PUSHGS64)>; 788def: InstRW<[SBWriteResGroup35_2], (instregex "ISTT_FP(16|32|64)m")>; 789 790def SBWriteResGroup36 : SchedWriteRes<[SBPort4,SBPort5,SBPort23]> { 791 let Latency = 5; 792 let NumMicroOps = 3; 793 let ReleaseAtCycles = [1,1,1]; 794} 795def: InstRW<[SBWriteResGroup36], (instrs CALL64pcrel32)>; 796def: InstRW<[SBWriteResGroup36], (instregex "CALL(16|32|64)r", 797 "(V?)EXTRACTPSmri")>; 798 799def SBWriteResGroup40 : SchedWriteRes<[SBPort4,SBPort23,SBPort015]> { 800 let Latency = 5; 801 let NumMicroOps = 3; 802 let ReleaseAtCycles = [1,1,1]; 803} 804def: InstRW<[SBWriteResGroup40], (instrs STOSB, STOSL, STOSQ, STOSW)>; 805 806def SBWriteResGroup41 : SchedWriteRes<[SBPort5,SBPort015]> { 807 let Latency = 5; 808 let NumMicroOps = 4; 809 let ReleaseAtCycles = [1,3]; 810} 811def: InstRW<[SBWriteResGroup41], (instrs FNINIT)>; 812 813def SBWriteResGroup45 : SchedWriteRes<[SBPort0,SBPort4,SBPort23,SBPort15]> { 814 let Latency = 5; 815 let NumMicroOps = 4; 816 let ReleaseAtCycles = [1,1,1,1]; 817} 818def: InstRW<[SBWriteResGroup45], (instregex "(V?)PEXTR(D|Q)mr", 819 "PUSHF(16|64)")>; 820 821def SBWriteResGroup46 : SchedWriteRes<[SBPort4,SBPort5,SBPort01,SBPort23]> { 822 let Latency = 5; 823 let NumMicroOps = 4; 824 let ReleaseAtCycles = [1,1,1,1]; 825} 826def: InstRW<[SBWriteResGroup46], (instregex "CLFLUSH")>; 827 828def SBWriteResGroup47 : SchedWriteRes<[SBPort4,SBPort5,SBPort01,SBPort23]> { 829 let Latency = 5; 830 let NumMicroOps = 5; 831 let ReleaseAtCycles = [1,2,1,1]; 832} 833def: InstRW<[SBWriteResGroup47], (instregex "FXRSTOR")>; 834 835def SBWriteResGroup48 : SchedWriteRes<[SBPort23]> { 836 let Latency = 6; 837 let NumMicroOps = 1; 838 let ReleaseAtCycles = [1]; 839} 840def: InstRW<[SBWriteResGroup48], (instrs VBROADCASTSSrm)>; 841def: InstRW<[SBWriteResGroup48], (instregex "POP(16|32|64)r", 842 "(V?)MOV64toPQIrm", 843 "(V?)MOVDDUPrm", 844 "(V?)MOVDI2PDIrm", 845 "(V?)MOVQI2PQIrm", 846 "(V?)MOVSDrm", 847 "(V?)MOVSHDUPrm", 848 "(V?)MOVSLDUPrm", 849 "(V?)MOVSSrm")>; 850 851def SBWriteResGroup49 : SchedWriteRes<[SBPort5,SBPort23]> { 852 let Latency = 6; 853 let NumMicroOps = 2; 854 let ReleaseAtCycles = [1,1]; 855} 856def: InstRW<[SBWriteResGroup49], (instrs MOV16sm)>; 857 858def SBWriteResGroup51 : SchedWriteRes<[SBPort23,SBPort15]> { 859 let Latency = 6; 860 let NumMicroOps = 2; 861 let ReleaseAtCycles = [1,1]; 862} 863def: InstRW<[SBWriteResGroup51], (instrs MMX_PABSBrm, 864 MMX_PABSDrm, 865 MMX_PABSWrm, 866 MMX_PALIGNRrmi, 867 MMX_PSIGNBrm, 868 MMX_PSIGNDrm, 869 MMX_PSIGNWrm)>; 870 871def SBWriteResGroup52 : SchedWriteRes<[SBPort23,SBPort015]> { 872 let Latency = 6; 873 let NumMicroOps = 2; 874 let ReleaseAtCycles = [1,1]; 875} 876def: InstRW<[SBWriteResGroup52], (instrs LODSL, LODSQ)>; 877 878def SBWriteResGroup53 : SchedWriteRes<[SBPort4,SBPort23]> { 879 let Latency = 6; 880 let NumMicroOps = 3; 881 let ReleaseAtCycles = [1,2]; 882} 883def: InstRW<[SBWriteResGroup53], (instregex "ST_F(32|64)m", 884 "ST_FP(32|64|80)m")>; 885 886def SBWriteResGroup54 : SchedWriteRes<[SBPort23]> { 887 let Latency = 7; 888 let NumMicroOps = 1; 889 let ReleaseAtCycles = [1]; 890} 891def: InstRW<[SBWriteResGroup54], (instrs VMOVDDUPYrm, 892 VMOVSHDUPYrm, 893 VMOVSLDUPYrm)>; 894 895def SBWriteResGroup58 : SchedWriteRes<[SBPort23,SBPort05]> { 896 let Latency = 7; 897 let NumMicroOps = 2; 898 let ReleaseAtCycles = [1,1]; 899} 900def: InstRW<[SBWriteResGroup58], (instrs VINSERTF128rmi)>; 901 902def SBWriteResGroup59 : SchedWriteRes<[SBPort23,SBPort15]> { 903 let Latency = 7; 904 let NumMicroOps = 2; 905 let ReleaseAtCycles = [1,1]; 906} 907def: InstRW<[SBWriteResGroup59], (instrs MMX_PADDQrm, 908 MMX_PSUBQrm)>; 909 910def SBWriteResGroup62 : SchedWriteRes<[SBPort5,SBPort23]> { 911 let Latency = 7; 912 let NumMicroOps = 3; 913 let ReleaseAtCycles = [2,1]; 914} 915def: InstRW<[SBWriteResGroup62], (instrs VERRm, VERWm)>; 916 917def SBWriteResGroup63 : SchedWriteRes<[SBPort23,SBPort015]> { 918 let Latency = 7; 919 let NumMicroOps = 3; 920 let ReleaseAtCycles = [1,2]; 921} 922def: InstRW<[SBWriteResGroup63], (instrs LODSB, LODSW)>; 923 924def SBWriteResGroup64 : SchedWriteRes<[SBPort5,SBPort01,SBPort23]> { 925 let Latency = 7; 926 let NumMicroOps = 3; 927 let ReleaseAtCycles = [1,1,1]; 928} 929def: InstRW<[SBWriteResGroup64], (instrs FARJMP64m)>; 930 931def SBWriteResGroup66 : SchedWriteRes<[SBPort0,SBPort4,SBPort23]> { 932 let Latency = 7; 933 let NumMicroOps = 4; 934 let ReleaseAtCycles = [1,1,2]; 935} 936def: InstRW<[SBWriteResGroup66], (instrs FNSTSWm)>; 937 938def SBWriteResGroup67 : SchedWriteRes<[SBPort1,SBPort5,SBPort015]> { 939 let Latency = 7; 940 let NumMicroOps = 4; 941 let ReleaseAtCycles = [1,2,1]; 942} 943def: InstRW<[SBWriteResGroup67], (instregex "SLDT(16|32|64)r", 944 "STR(16|32|64)r")>; 945 946def SBWriteResGroup68 : SchedWriteRes<[SBPort4,SBPort5,SBPort23]> { 947 let Latency = 7; 948 let NumMicroOps = 4; 949 let ReleaseAtCycles = [1,1,2]; 950} 951def: InstRW<[SBWriteResGroup68], (instrs FNSTCW16m)>; 952def: InstRW<[SBWriteResGroup68], (instregex "CALL(16|32|64)m")>; 953 954def SBWriteResGroup69 : SchedWriteRes<[SBPort4,SBPort23,SBPort05]> { 955 let Latency = 7; 956 let NumMicroOps = 4; 957 let ReleaseAtCycles = [1,2,1]; 958} 959def: InstRW<[SBWriteResGroup69], (instregex "SAR(8|16|32|64)m(1|i)", 960 "SHL(8|16|32|64)m(1|i)", 961 "SHR(8|16|32|64)m(1|i)")>; 962 963def SBWriteResGroup77 : SchedWriteRes<[SBPort0,SBPort1,SBPort23]> { 964 let Latency = 8; 965 let NumMicroOps = 3; 966 let ReleaseAtCycles = [1,1,1]; 967} 968def: InstRW<[SBWriteResGroup77], (instregex "(V?)(U?)COMI(SD|SS)rm")>; 969 970def SBWriteResGroup81 : SchedWriteRes<[SBPort4, SBPort23, SBPort015]> { 971 let Latency = 6; 972 let NumMicroOps = 3; 973 let ReleaseAtCycles = [1, 2, 1]; 974} 975def: InstRW<[SBWriteResGroup81], (instregex "CMPXCHG(8|16)B")>; 976 977def SBWriteResGroup83 : SchedWriteRes<[SBPort23,SBPort015]> { 978 let Latency = 8; 979 let NumMicroOps = 5; 980 let ReleaseAtCycles = [2,3]; 981} 982def: InstRW<[SBWriteResGroup83], (instrs CMPSB, 983 CMPSL, 984 CMPSQ, 985 CMPSW)>; 986 987def SBWriteResGroup84 : SchedWriteRes<[SBPort4,SBPort5,SBPort23]> { 988 let Latency = 8; 989 let NumMicroOps = 5; 990 let ReleaseAtCycles = [1,2,2]; 991} 992def: InstRW<[SBWriteResGroup84], (instrs FLDCW16m)>; 993 994def SBWriteResGroup85 : SchedWriteRes<[SBPort4,SBPort23,SBPort05]> { 995 let Latency = 8; 996 let NumMicroOps = 5; 997 let ReleaseAtCycles = [1,2,2]; 998} 999def: InstRW<[SBWriteResGroup85], (instregex "ROL(8|16|32|64)m(1|i)", 1000 "ROR(8|16|32|64)m(1|i)")>; 1001 1002def SBWriteResGroup86 : SchedWriteRes<[SBPort4,SBPort23,SBPort015]> { 1003 let Latency = 8; 1004 let NumMicroOps = 5; 1005 let ReleaseAtCycles = [1,2,2]; 1006} 1007def: InstRW<[SBWriteResGroup86], (instrs MOVSB, MOVSL, MOVSQ, MOVSW)>; 1008def: InstRW<[SBWriteResGroup86], (instregex "XADD(8|16|32|64)rm")>; 1009 1010def SBWriteResGroup87 : SchedWriteRes<[SBPort4,SBPort5,SBPort01,SBPort23]> { 1011 let Latency = 8; 1012 let NumMicroOps = 5; 1013 let ReleaseAtCycles = [1,1,1,2]; 1014} 1015def: InstRW<[SBWriteResGroup87], (instrs FARCALL64m)>; 1016 1017def SBWriteResGroup95 : SchedWriteRes<[SBPort5,SBPort01,SBPort23]> { 1018 let Latency = 9; 1019 let NumMicroOps = 3; 1020 let ReleaseAtCycles = [1,1,1]; 1021} 1022def: InstRW<[SBWriteResGroup95], (instregex "LD_F(32|64|80)m")>; 1023 1024def SBWriteResGroup97 : SchedWriteRes<[SBPort1,SBPort4,SBPort23]> { 1025 let Latency = 9; 1026 let NumMicroOps = 4; 1027 let ReleaseAtCycles = [1,1,2]; 1028} 1029def: InstRW<[SBWriteResGroup97], (instregex "IST_F(16|32)m", 1030 "IST_FP(16|32|64)m")>; 1031 1032def SBWriteResGroup97_2 : SchedWriteRes<[SBPort4,SBPort23,SBPort05]> { 1033 let Latency = 9; 1034 let NumMicroOps = 6; 1035 let ReleaseAtCycles = [1,2,3]; 1036} 1037def: InstRW<[SBWriteResGroup97_2], (instregex "ROL(8|16|32|64)mCL", 1038 "ROR(8|16|32|64)mCL", 1039 "SAR(8|16|32|64)mCL", 1040 "SHL(8|16|32|64)mCL", 1041 "SHR(8|16|32|64)mCL")>; 1042 1043def SBWriteResGroup98 : SchedWriteRes<[SBPort4,SBPort23,SBPort015]> { 1044 let Latency = 9; 1045 let NumMicroOps = 4; 1046 let ReleaseAtCycles = [1,2,3]; 1047} 1048def: SchedAlias<WriteADCRMW, SBWriteResGroup98>; 1049 1050def SBWriteResGroup99 : SchedWriteRes<[SBPort4,SBPort23,SBPort05,SBPort015]> { 1051 let Latency = 9; 1052 let NumMicroOps = 4; 1053 let ReleaseAtCycles = [1,2,2,1]; 1054} 1055def: InstRW<[SBWriteResGroup99, ReadAfterLd], (instrs ADC8mr, ADC16mr, ADC32mr, ADC64mr, 1056 SBB8mr, SBB16mr, SBB32mr, SBB64mr)>; 1057 1058def SBWriteResGroup100 : SchedWriteRes<[SBPort4,SBPort5,SBPort23,SBPort05,SBPort015]> { 1059 let Latency = 9; 1060 let NumMicroOps = 6; 1061 let ReleaseAtCycles = [1,1,2,1,1]; 1062} 1063def : SchedAlias<WriteBitTestRegLd, SBWriteResGroup100>; // TODO - this is incorrect - no RMW 1064 1065def SBWriteResGroup101 : SchedWriteRes<[SBPort1,SBPort23]> { 1066 let Latency = 10; 1067 let NumMicroOps = 2; 1068 let ReleaseAtCycles = [1,1]; 1069} 1070def: InstRW<[SBWriteResGroup101], (instregex "(ADD|SUB|SUBR)_F(32|64)m", 1071 "ILD_F(16|32|64)m")>; 1072 1073def SBWriteResGroup104 : SchedWriteRes<[SBPort0,SBPort23]> { 1074 let Latency = 11; 1075 let NumMicroOps = 2; 1076 let ReleaseAtCycles = [1,1]; 1077} 1078def: InstRW<[SBWriteResGroup104], (instregex "(V?)PCMPGTQrm")>; 1079 1080def SBWriteResGroup106 : SchedWriteRes<[SBPort1,SBPort23]> { 1081 let Latency = 11; 1082 let NumMicroOps = 3; 1083 let ReleaseAtCycles = [2,1]; 1084} 1085def: InstRW<[SBWriteResGroup106], (instregex "FICOM(P?)(16|32)m")>; 1086 1087def SBWriteResGroup108 : SchedWriteRes<[SBPort05,SBPort23]> { 1088 let Latency = 11; 1089 let NumMicroOps = 11; 1090 let ReleaseAtCycles = [7,4]; 1091} 1092def: InstRW<[SBWriteResGroup108], (instregex "RCL(8|16|32|64)m", 1093 "RCR(8|16|32|64)m")>; 1094 1095def SBWriteResGroup111 : SchedWriteRes<[SBPort0,SBPort23]> { 1096 let Latency = 12; 1097 let NumMicroOps = 2; 1098 let ReleaseAtCycles = [1,1]; 1099} 1100def: InstRW<[SBWriteResGroup111], (instregex "MUL_F(32|64)m")>; 1101 1102def SBWriteResGroup114 : SchedWriteRes<[SBPort1,SBPort23]> { 1103 let Latency = 13; 1104 let NumMicroOps = 3; 1105 let ReleaseAtCycles = [2,1]; 1106} 1107def: InstRW<[SBWriteResGroup114], (instregex "(ADD|SUB|SUBR)_FI(16|32)m")>; 1108 1109def SBWriteResGroup119 : SchedWriteRes<[SBPort0,SBPort1,SBPort23]> { 1110 let Latency = 15; 1111 let NumMicroOps = 3; 1112 let ReleaseAtCycles = [1,1,1]; 1113} 1114def: InstRW<[SBWriteResGroup119], (instregex "MUL_FI(16|32)m")>; 1115 1116def SBWriteResGroup130 : SchedWriteRes<[SBPort0,SBPort23]> { 1117 let Latency = 31; 1118 let NumMicroOps = 2; 1119 let ReleaseAtCycles = [1,1]; 1120} 1121def: InstRW<[SBWriteResGroup130], (instregex "DIV(R?)_F(32|64)m")>; 1122 1123def SBWriteResGroup131 : SchedWriteRes<[SBPort0,SBPort1,SBPort23]> { 1124 let Latency = 34; 1125 let NumMicroOps = 3; 1126 let ReleaseAtCycles = [1,1,1]; 1127} 1128def: InstRW<[SBWriteResGroup131], (instregex "DIV(R?)_FI(16|32)m")>; 1129 1130def SBWriteResGroupVzeroall : SchedWriteRes<[SBPort5]> { 1131 let Latency = 9; 1132 let NumMicroOps = 20; 1133 let ReleaseAtCycles = [2]; 1134} 1135def: InstRW<[SBWriteResGroupVzeroall], (instrs VZEROALL)>; 1136 1137def SBWriteResGroupVzeroupper : SchedWriteRes<[]> { 1138 let Latency = 1; 1139 let NumMicroOps = 4; 1140 let ReleaseAtCycles = []; 1141} 1142def: InstRW<[SBWriteResGroupVzeroupper], (instrs VZEROUPPER)>; 1143 1144def: InstRW<[WriteZero], (instrs CLC)>; 1145 1146// Instruction variants handled by the renamer. These might not need execution 1147// ports in certain conditions. 1148// See Agner's Fog "The microarchitecture of Intel, AMD and VIA CPUs", 1149// section "Sandy Bridge and Ivy Bridge Pipeline" > "Register allocation and 1150// renaming". 1151// These can be investigated with llvm-exegesis, e.g. 1152// echo 'pxor %mm0, %mm0' | /tmp/llvm-exegesis -mode=uops -snippets-file=- 1153// echo 'vxorpd %xmm0, %xmm0, %xmm1' | /tmp/llvm-exegesis -mode=uops -snippets-file=- 1154 1155def SBWriteZeroLatency : SchedWriteRes<[]> { 1156 let Latency = 0; 1157} 1158 1159def SBWriteZeroIdiom : SchedWriteVariant<[ 1160 SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SBWriteZeroLatency]>, 1161 SchedVar<NoSchedPred, [WriteALU]> 1162]>; 1163def : InstRW<[SBWriteZeroIdiom], (instrs SUB32rr, SUB64rr, 1164 XOR32rr, XOR64rr)>; 1165 1166def SBWriteFZeroIdiom : SchedWriteVariant<[ 1167 SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SBWriteZeroLatency]>, 1168 SchedVar<NoSchedPred, [WriteFLogic]> 1169]>; 1170def : InstRW<[SBWriteFZeroIdiom], (instrs XORPSrr, VXORPSrr, XORPDrr, 1171 VXORPDrr)>; 1172 1173def SBWriteFZeroIdiomY : SchedWriteVariant<[ 1174 SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SBWriteZeroLatency]>, 1175 SchedVar<NoSchedPred, [WriteFLogicY]> 1176]>; 1177def : InstRW<[SBWriteFZeroIdiomY], (instrs VXORPSYrr, VXORPDYrr)>; 1178 1179def SBWriteVZeroIdiomLogicX : SchedWriteVariant<[ 1180 SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SBWriteZeroLatency]>, 1181 SchedVar<NoSchedPred, [WriteVecLogicX]> 1182]>; 1183def : InstRW<[SBWriteVZeroIdiomLogicX], (instrs PXORrr, VPXORrr)>; 1184 1185def SBWriteVZeroIdiomALUX : SchedWriteVariant<[ 1186 SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SBWriteZeroLatency]>, 1187 SchedVar<NoSchedPred, [WriteVecALUX]> 1188]>; 1189def : InstRW<[SBWriteVZeroIdiomALUX], (instrs PSUBBrr, VPSUBBrr, 1190 PSUBDrr, VPSUBDrr, 1191 PSUBQrr, VPSUBQrr, 1192 PSUBWrr, VPSUBWrr, 1193 PCMPGTBrr, VPCMPGTBrr, 1194 PCMPGTDrr, VPCMPGTDrr, 1195 PCMPGTWrr, VPCMPGTWrr)>; 1196 1197def SBWritePCMPGTQ : SchedWriteRes<[SBPort0]> { 1198 let Latency = 5; 1199 let NumMicroOps = 1; 1200 let ReleaseAtCycles = [1]; 1201} 1202 1203def SBWriteVZeroIdiomPCMPGTQ : SchedWriteVariant<[ 1204 SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SBWriteZeroLatency]>, 1205 SchedVar<NoSchedPred, [SBWritePCMPGTQ]> 1206]>; 1207def : InstRW<[SBWriteVZeroIdiomPCMPGTQ], (instrs PCMPGTQrr, VPCMPGTQrr)>; 1208 1209// CMOVs that use both Z and C flag require an extra uop. 1210def SBWriteCMOVA_CMOVBErr : SchedWriteRes<[SBPort05,SBPort015]> { 1211 let Latency = 3; 1212 let ReleaseAtCycles = [2,1]; 1213 let NumMicroOps = 3; 1214} 1215 1216def SBWriteCMOVA_CMOVBErm : SchedWriteRes<[SBPort23,SBPort05,SBPort015]> { 1217 let Latency = 8; 1218 let ReleaseAtCycles = [1,2,1]; 1219 let NumMicroOps = 4; 1220} 1221 1222def SBCMOVA_CMOVBErr : SchedWriteVariant<[ 1223 SchedVar<MCSchedPredicate<IsCMOVArr_Or_CMOVBErr>, [SBWriteCMOVA_CMOVBErr]>, 1224 SchedVar<NoSchedPred, [WriteCMOV]> 1225]>; 1226 1227def SBCMOVA_CMOVBErm : SchedWriteVariant<[ 1228 SchedVar<MCSchedPredicate<IsCMOVArm_Or_CMOVBErm>, [SBWriteCMOVA_CMOVBErm]>, 1229 SchedVar<NoSchedPred, [WriteCMOV.Folded]> 1230]>; 1231 1232def : InstRW<[SBCMOVA_CMOVBErr], (instrs CMOV16rr, CMOV32rr, CMOV64rr)>; 1233def : InstRW<[SBCMOVA_CMOVBErm], (instrs CMOV16rm, CMOV32rm, CMOV64rm)>; 1234 1235// SETCCs that use both Z and C flag require an extra uop. 1236def SBWriteSETA_SETBEr : SchedWriteRes<[SBPort05]> { 1237 let Latency = 2; 1238 let ReleaseAtCycles = [2]; 1239 let NumMicroOps = 2; 1240} 1241 1242def SBWriteSETA_SETBEm : SchedWriteRes<[SBPort4,SBPort23,SBPort05]> { 1243 let Latency = 3; 1244 let ReleaseAtCycles = [1,1,2]; 1245 let NumMicroOps = 4; 1246} 1247 1248def SBSETA_SETBErr : SchedWriteVariant<[ 1249 SchedVar<MCSchedPredicate<IsSETAr_Or_SETBEr>, [SBWriteSETA_SETBEr]>, 1250 SchedVar<NoSchedPred, [WriteSETCC]> 1251]>; 1252 1253def SBSETA_SETBErm : SchedWriteVariant<[ 1254 SchedVar<MCSchedPredicate<IsSETAm_Or_SETBEm>, [SBWriteSETA_SETBEm]>, 1255 SchedVar<NoSchedPred, [WriteSETCCStore]> 1256]>; 1257 1258def : InstRW<[SBSETA_SETBErr], (instrs SETCCr)>; 1259def : InstRW<[SBSETA_SETBErm], (instrs SETCCm)>; 1260 1261/////////////////////////////////////////////////////////////////////////////// 1262// Dependency breaking instructions. 1263/////////////////////////////////////////////////////////////////////////////// 1264 1265def : IsZeroIdiomFunction<[ 1266 // GPR Zero-idioms. 1267 DepBreakingClass<[ SUB32rr, SUB64rr, XOR32rr, XOR64rr ], ZeroIdiomPredicate>, 1268 1269 // SSE Zero-idioms. 1270 DepBreakingClass<[ 1271 // fp variants. 1272 XORPSrr, XORPDrr, 1273 1274 // int variants. 1275 PXORrr, 1276 PSUBBrr, PSUBWrr, PSUBDrr, PSUBQrr, 1277 PCMPGTBrr, PCMPGTDrr, PCMPGTQrr, PCMPGTWrr 1278 ], ZeroIdiomPredicate>, 1279 1280 // AVX Zero-idioms. 1281 DepBreakingClass<[ 1282 // xmm fp variants. 1283 VXORPSrr, VXORPDrr, 1284 1285 // xmm int variants. 1286 VPXORrr, 1287 VPSUBBrr, VPSUBWrr, VPSUBDrr, VPSUBQrr, 1288 VPCMPGTBrr, VPCMPGTWrr, VPCMPGTDrr, VPCMPGTQrr, 1289 ], ZeroIdiomPredicate>, 1290]>; 1291 1292} // SchedModel 1293