1//=- X86SchedSkylake.td - X86 Skylake Server Scheduling ------*- tablegen -*-=// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This file defines the machine model for Skylake Server to support 10// instruction scheduling and other instruction cost heuristics. 11// 12//===----------------------------------------------------------------------===// 13 14def SkylakeServerModel : SchedMachineModel { 15 // All x86 instructions are modeled as a single micro-op, and SKylake can 16 // decode 6 instructions per cycle. 17 let IssueWidth = 6; 18 let MicroOpBufferSize = 224; // Based on the reorder buffer. 19 let LoadLatency = 5; 20 let MispredictPenalty = 14; 21 22 // Based on the LSD (loop-stream detector) queue size and benchmarking data. 23 let LoopMicroOpBufferSize = 50; 24 25 // This flag is set to allow the scheduler to assign a default model to 26 // unrecognized opcodes. 27 let CompleteModel = 0; 28} 29 30let SchedModel = SkylakeServerModel in { 31 32// Skylake Server can issue micro-ops to 8 different ports in one cycle. 33 34// Ports 0, 1, 5, and 6 handle all computation. 35// Port 4 gets the data half of stores. Store data can be available later than 36// the store address, but since we don't model the latency of stores, we can 37// ignore that. 38// Ports 2 and 3 are identical. They handle loads and the address half of 39// stores. Port 7 can handle address calculations. 40def SKXPort0 : ProcResource<1>; 41def SKXPort1 : ProcResource<1>; 42def SKXPort2 : ProcResource<1>; 43def SKXPort3 : ProcResource<1>; 44def SKXPort4 : ProcResource<1>; 45def SKXPort5 : ProcResource<1>; 46def SKXPort6 : ProcResource<1>; 47def SKXPort7 : ProcResource<1>; 48 49// Many micro-ops are capable of issuing on multiple ports. 50def SKXPort01 : ProcResGroup<[SKXPort0, SKXPort1]>; 51def SKXPort23 : ProcResGroup<[SKXPort2, SKXPort3]>; 52def SKXPort237 : ProcResGroup<[SKXPort2, SKXPort3, SKXPort7]>; 53def SKXPort04 : ProcResGroup<[SKXPort0, SKXPort4]>; 54def SKXPort05 : ProcResGroup<[SKXPort0, SKXPort5]>; 55def SKXPort06 : ProcResGroup<[SKXPort0, SKXPort6]>; 56def SKXPort15 : ProcResGroup<[SKXPort1, SKXPort5]>; 57def SKXPort16 : ProcResGroup<[SKXPort1, SKXPort6]>; 58def SKXPort56 : ProcResGroup<[SKXPort5, SKXPort6]>; 59def SKXPort015 : ProcResGroup<[SKXPort0, SKXPort1, SKXPort5]>; 60def SKXPort056 : ProcResGroup<[SKXPort0, SKXPort5, SKXPort6]>; 61def SKXPort0156: ProcResGroup<[SKXPort0, SKXPort1, SKXPort5, SKXPort6]>; 62 63def SKXDivider : ProcResource<1>; // Integer division issued on port 0. 64// FP division and sqrt on port 0. 65def SKXFPDivider : ProcResource<1>; 66 67// 60 Entry Unified Scheduler 68def SKXPortAny : ProcResGroup<[SKXPort0, SKXPort1, SKXPort2, SKXPort3, SKXPort4, 69 SKXPort5, SKXPort6, SKXPort7]> { 70 let BufferSize=60; 71} 72 73// Integer loads are 5 cycles, so ReadAfterLd registers needn't be available until 5 74// cycles after the memory operand. 75def : ReadAdvance<ReadAfterLd, 5>; 76 77// Vector loads are 5/6/7 cycles, so ReadAfterVec*Ld registers needn't be available 78// until 5/6/7 cycles after the memory operand. 79def : ReadAdvance<ReadAfterVecLd, 5>; 80def : ReadAdvance<ReadAfterVecXLd, 6>; 81def : ReadAdvance<ReadAfterVecYLd, 7>; 82 83def : ReadAdvance<ReadInt2Fpu, 0>; 84 85// Many SchedWrites are defined in pairs with and without a folded load. 86// Instructions with folded loads are usually micro-fused, so they only appear 87// as two micro-ops when queued in the reservation station. 88// This multiclass defines the resource usage for variants with and without 89// folded loads. 90multiclass SKXWriteResPair<X86FoldableSchedWrite SchedRW, 91 list<ProcResourceKind> ExePorts, 92 int Lat, list<int> Res = [1], int UOps = 1, 93 int LoadLat = 5, int LoadUOps = 1> { 94 // Register variant is using a single cycle on ExePort. 95 def : WriteRes<SchedRW, ExePorts> { 96 let Latency = Lat; 97 let ReleaseAtCycles = Res; 98 let NumMicroOps = UOps; 99 } 100 101 // Memory variant also uses a cycle on port 2/3 and adds LoadLat cycles to 102 // the latency (default = 5). 103 def : WriteRes<SchedRW.Folded, !listconcat([SKXPort23], ExePorts)> { 104 let Latency = !add(Lat, LoadLat); 105 let ReleaseAtCycles = !listconcat([1], Res); 106 let NumMicroOps = !add(UOps, LoadUOps); 107 } 108} 109 110// A folded store needs a cycle on port 4 for the store data, and an extra port 111// 2/3/7 cycle to recompute the address. 112def : WriteRes<WriteRMW, [SKXPort237,SKXPort4]>; 113 114// Arithmetic. 115defm : SKXWriteResPair<WriteALU, [SKXPort0156], 1>; // Simple integer ALU op. 116defm : SKXWriteResPair<WriteADC, [SKXPort06], 1>; // Integer ALU + flags op. 117 118// Integer multiplication. 119defm : SKXWriteResPair<WriteIMul8, [SKXPort1], 3>; 120defm : SKXWriteResPair<WriteIMul16, [SKXPort1,SKXPort06,SKXPort0156], 4, [1,1,2], 4>; 121defm : X86WriteRes<WriteIMul16Imm, [SKXPort1,SKXPort0156], 4, [1,1], 2>; 122defm : X86WriteRes<WriteIMul16ImmLd, [SKXPort1,SKXPort0156,SKXPort23], 8, [1,1,1], 3>; 123defm : X86WriteRes<WriteIMul16Reg, [SKXPort1], 3, [1], 1>; 124defm : X86WriteRes<WriteIMul16RegLd, [SKXPort1,SKXPort0156,SKXPort23], 8, [1,1,1], 3>; 125defm : SKXWriteResPair<WriteIMul32, [SKXPort1,SKXPort06,SKXPort0156], 4, [1,1,1], 3>; 126defm : SKXWriteResPair<WriteMULX32, [SKXPort1,SKXPort06,SKXPort0156], 3, [1,1,1], 3>; 127defm : SKXWriteResPair<WriteIMul32Imm, [SKXPort1], 3>; 128defm : SKXWriteResPair<WriteIMul32Reg, [SKXPort1], 3>; 129defm : SKXWriteResPair<WriteIMul64, [SKXPort1,SKXPort5], 4, [1,1], 2>; 130defm : SKXWriteResPair<WriteMULX64, [SKXPort1,SKXPort5], 3, [1,1], 2>; 131defm : SKXWriteResPair<WriteIMul64Imm, [SKXPort1], 3>; 132defm : SKXWriteResPair<WriteIMul64Reg, [SKXPort1], 3>; 133def SKXWriteIMulH : WriteRes<WriteIMulH, []> { let Latency = 4; } 134def : WriteRes<WriteIMulHLd, []> { 135 let Latency = !add(SKXWriteIMulH.Latency, SkylakeServerModel.LoadLatency); 136} 137 138defm : X86WriteRes<WriteBSWAP32, [SKXPort15], 1, [1], 1>; 139defm : X86WriteRes<WriteBSWAP64, [SKXPort06, SKXPort15], 2, [1,1], 2>; 140defm : X86WriteRes<WriteCMPXCHG,[SKXPort06, SKXPort0156], 5, [2,3], 5>; 141defm : X86WriteRes<WriteCMPXCHGRMW,[SKXPort23,SKXPort06,SKXPort0156,SKXPort237,SKXPort4], 8, [1,2,1,1,1], 6>; 142defm : X86WriteRes<WriteXCHG, [SKXPort0156], 2, [3], 3>; 143 144// TODO: Why isn't the SKXDivider used? 145defm : SKXWriteResPair<WriteDiv8, [SKXPort0, SKXDivider], 25, [1,10], 1, 4>; 146defm : X86WriteRes<WriteDiv16, [SKXPort0,SKXPort1,SKXPort5,SKXPort6,SKXPort05,SKXPort0156], 76, [7,2,8,3,1,11], 32>; 147defm : X86WriteRes<WriteDiv32, [SKXPort0,SKXPort1,SKXPort5,SKXPort6,SKXPort05,SKXPort0156], 76, [7,2,8,3,1,11], 32>; 148defm : X86WriteRes<WriteDiv64, [SKXPort0,SKXPort1,SKXPort5,SKXPort6,SKXPort05,SKXPort0156], 76, [7,2,8,3,1,11], 32>; 149defm : X86WriteRes<WriteDiv16Ld, [SKXPort0,SKXPort23,SKXDivider], 29, [1,1,10], 2>; 150defm : X86WriteRes<WriteDiv32Ld, [SKXPort0,SKXPort23,SKXDivider], 29, [1,1,10], 2>; 151defm : X86WriteRes<WriteDiv64Ld, [SKXPort0,SKXPort23,SKXDivider], 29, [1,1,10], 2>; 152 153defm : X86WriteRes<WriteIDiv8, [SKXPort0, SKXDivider], 25, [1,10], 1>; 154defm : X86WriteRes<WriteIDiv16, [SKXPort0,SKXPort1,SKXPort5,SKXPort6,SKXPort06,SKXPort0156], 102, [4,2,4,8,14,34], 66>; 155defm : X86WriteRes<WriteIDiv32, [SKXPort0,SKXPort1,SKXPort5,SKXPort6,SKXPort06,SKXPort0156], 102, [4,2,4,8,14,34], 66>; 156defm : X86WriteRes<WriteIDiv64, [SKXPort0,SKXPort1,SKXPort5,SKXPort6,SKXPort06,SKXPort0156], 102, [4,2,4,8,14,34], 66>; 157defm : X86WriteRes<WriteIDiv8Ld, [SKXPort0,SKXPort5,SKXPort23,SKXPort0156], 28, [2,4,1,1], 8>; 158defm : X86WriteRes<WriteIDiv16Ld, [SKXPort0,SKXPort5,SKXPort23,SKXPort0156], 28, [2,4,1,1], 8>; 159defm : X86WriteRes<WriteIDiv32Ld, [SKXPort0,SKXPort5,SKXPort23,SKXPort0156], 28, [2,4,1,1], 8>; 160defm : X86WriteRes<WriteIDiv64Ld, [SKXPort0,SKXPort5,SKXPort23,SKXPort0156], 28, [2,4,1,1], 8>; 161 162defm : SKXWriteResPair<WriteCRC32, [SKXPort1], 3>; 163 164def : WriteRes<WriteLEA, [SKXPort15]>; // LEA instructions can't fold loads. 165 166defm : SKXWriteResPair<WriteCMOV, [SKXPort06], 1, [1], 1>; // Conditional move. 167defm : X86WriteRes<WriteFCMOV, [SKXPort1], 3, [1], 1>; // x87 conditional move. 168def : WriteRes<WriteSETCC, [SKXPort06]>; // Setcc. 169def : WriteRes<WriteSETCCStore, [SKXPort06,SKXPort4,SKXPort237]> { 170 let Latency = 2; 171 let NumMicroOps = 3; 172} 173defm : X86WriteRes<WriteLAHFSAHF, [SKXPort06], 1, [1], 1>; 174defm : X86WriteRes<WriteBitTest, [SKXPort06], 1, [1], 1>; 175defm : X86WriteRes<WriteBitTestImmLd, [SKXPort06,SKXPort23], 6, [1,1], 2>; 176defm : X86WriteRes<WriteBitTestRegLd, [SKXPort0156,SKXPort23], 6, [1,1], 2>; 177defm : X86WriteRes<WriteBitTestSet, [SKXPort06], 1, [1], 1>; 178defm : X86WriteRes<WriteBitTestSetImmLd, [SKXPort06,SKXPort23], 5, [1,1], 3>; 179defm : X86WriteRes<WriteBitTestSetRegLd, [SKXPort0156,SKXPort23], 5, [1,1], 2>; 180 181// Integer shifts and rotates. 182defm : SKXWriteResPair<WriteShift, [SKXPort06], 1>; 183defm : SKXWriteResPair<WriteShiftCL, [SKXPort06], 3, [3], 3>; 184defm : SKXWriteResPair<WriteRotate, [SKXPort06], 1, [1], 1>; 185defm : SKXWriteResPair<WriteRotateCL, [SKXPort06], 3, [3], 3>; 186 187// SHLD/SHRD. 188defm : X86WriteRes<WriteSHDrri, [SKXPort1], 3, [1], 1>; 189defm : X86WriteRes<WriteSHDrrcl,[SKXPort1,SKXPort06,SKXPort0156], 6, [1, 2, 1], 4>; 190defm : X86WriteRes<WriteSHDmri, [SKXPort1,SKXPort23,SKXPort237,SKXPort0156], 9, [1, 1, 1, 1], 4>; 191defm : X86WriteRes<WriteSHDmrcl,[SKXPort1,SKXPort23,SKXPort237,SKXPort06,SKXPort0156], 11, [1, 1, 1, 2, 1], 6>; 192 193// Bit counts. 194defm : SKXWriteResPair<WriteBSF, [SKXPort1], 3>; 195defm : SKXWriteResPair<WriteBSR, [SKXPort1], 3>; 196defm : SKXWriteResPair<WriteLZCNT, [SKXPort1], 3>; 197defm : SKXWriteResPair<WriteTZCNT, [SKXPort1], 3>; 198defm : SKXWriteResPair<WritePOPCNT, [SKXPort1], 3>; 199 200// BMI1 BEXTR/BLS, BMI2 BZHI 201defm : SKXWriteResPair<WriteBEXTR, [SKXPort06,SKXPort15], 2, [1,1], 2>; 202defm : SKXWriteResPair<WriteBLS, [SKXPort15], 1>; 203defm : SKXWriteResPair<WriteBZHI, [SKXPort15], 1>; 204 205// Loads, stores, and moves, not folded with other operations. 206defm : X86WriteRes<WriteLoad, [SKXPort23], 5, [1], 1>; 207defm : X86WriteRes<WriteStore, [SKXPort237, SKXPort4], 1, [1,1], 1>; 208defm : X86WriteRes<WriteStoreNT, [SKXPort237, SKXPort4], 1, [1,1], 2>; 209defm : X86WriteRes<WriteMove, [SKXPort0156], 1, [1], 1>; 210 211// Model the effect of clobbering the read-write mask operand of the GATHER operation. 212// Does not cost anything by itself, only has latency, matching that of the WriteLoad, 213defm : X86WriteRes<WriteVecMaskedGatherWriteback, [], 5, [], 0>; 214 215// Idioms that clear a register, like xorps %xmm0, %xmm0. 216// These can often bypass execution ports completely. 217def : WriteRes<WriteZero, []>; 218 219// Branches don't produce values, so they have no latency, but they still 220// consume resources. Indirect branches can fold loads. 221defm : SKXWriteResPair<WriteJump, [SKXPort06], 1>; 222 223// Floating point. This covers both scalar and vector operations. 224defm : X86WriteRes<WriteFLD0, [SKXPort05], 1, [1], 1>; 225defm : X86WriteRes<WriteFLD1, [SKXPort05], 1, [2], 2>; 226defm : X86WriteRes<WriteFLDC, [SKXPort05], 1, [2], 2>; 227defm : X86WriteRes<WriteFLoad, [SKXPort23], 5, [1], 1>; 228defm : X86WriteRes<WriteFLoadX, [SKXPort23], 6, [1], 1>; 229defm : X86WriteRes<WriteFLoadY, [SKXPort23], 7, [1], 1>; 230defm : X86WriteRes<WriteFMaskedLoad, [SKXPort23,SKXPort015], 7, [1,1], 2>; 231defm : X86WriteRes<WriteFMaskedLoadY, [SKXPort23,SKXPort015], 8, [1,1], 2>; 232defm : X86WriteRes<WriteFStore, [SKXPort237,SKXPort4], 1, [1,1], 2>; 233defm : X86WriteRes<WriteFStoreX, [SKXPort237,SKXPort4], 1, [1,1], 2>; 234defm : X86WriteRes<WriteFStoreY, [SKXPort237,SKXPort4], 1, [1,1], 2>; 235defm : X86WriteRes<WriteFStoreNT, [SKXPort237,SKXPort4], 1, [1,1], 2>; 236defm : X86WriteRes<WriteFStoreNTX, [SKXPort237,SKXPort4], 1, [1,1], 2>; 237defm : X86WriteRes<WriteFStoreNTY, [SKXPort237,SKXPort4], 1, [1,1], 2>; 238 239defm : X86WriteRes<WriteFMaskedStore32, [SKXPort23,SKXPort0,SKXPort4], 2, [1,1,1], 3>; 240defm : X86WriteRes<WriteFMaskedStore32Y, [SKXPort23,SKXPort0,SKXPort4], 2, [1,1,1], 3>; 241defm : X86WriteRes<WriteFMaskedStore64, [SKXPort23,SKXPort0,SKXPort4], 2, [1,1,1], 3>; 242defm : X86WriteRes<WriteFMaskedStore64Y, [SKXPort23,SKXPort0,SKXPort4], 2, [1,1,1], 3>; 243 244defm : X86WriteRes<WriteFMove, [SKXPort015], 1, [1], 1>; 245defm : X86WriteRes<WriteFMoveX, [SKXPort015], 1, [1], 1>; 246defm : X86WriteRes<WriteFMoveY, [SKXPort015], 1, [1], 1>; 247defm : X86WriteRes<WriteFMoveZ, [SKXPort05], 1, [1], 1>; 248defm : X86WriteRes<WriteEMMS, [SKXPort05,SKXPort0156], 10, [9,1], 10>; 249 250defm : SKXWriteResPair<WriteFAdd, [SKXPort01], 4, [1], 1, 5>; // Floating point add/sub. 251defm : SKXWriteResPair<WriteFAddX, [SKXPort01], 4, [1], 1, 6>; 252defm : SKXWriteResPair<WriteFAddY, [SKXPort01], 4, [1], 1, 7>; 253defm : SKXWriteResPair<WriteFAddZ, [SKXPort05], 4, [1], 1, 7>; 254defm : SKXWriteResPair<WriteFAdd64, [SKXPort01], 4, [1], 1, 5>; // Floating point double add/sub. 255defm : SKXWriteResPair<WriteFAdd64X, [SKXPort01], 4, [1], 1, 6>; 256defm : SKXWriteResPair<WriteFAdd64Y, [SKXPort01], 4, [1], 1, 7>; 257defm : SKXWriteResPair<WriteFAdd64Z, [SKXPort05], 4, [1], 1, 7>; 258 259defm : SKXWriteResPair<WriteFCmp, [SKXPort01], 4, [1], 1, 5>; // Floating point compare. 260defm : SKXWriteResPair<WriteFCmpX, [SKXPort01], 4, [1], 1, 6>; 261defm : SKXWriteResPair<WriteFCmpY, [SKXPort01], 4, [1], 1, 7>; 262defm : SKXWriteResPair<WriteFCmpZ, [SKXPort05], 4, [1], 1, 7>; 263defm : SKXWriteResPair<WriteFCmp64, [SKXPort01], 4, [1], 1, 5>; // Floating point double compare. 264defm : SKXWriteResPair<WriteFCmp64X, [SKXPort01], 4, [1], 1, 6>; 265defm : SKXWriteResPair<WriteFCmp64Y, [SKXPort01], 4, [1], 1, 7>; 266defm : SKXWriteResPair<WriteFCmp64Z, [SKXPort05], 4, [1], 1, 7>; 267 268defm : SKXWriteResPair<WriteFCom, [SKXPort0], 2>; // Floating point compare to flags (X87). 269defm : SKXWriteResPair<WriteFComX, [SKXPort0], 2>; // Floating point compare to flags (SSE). 270 271defm : SKXWriteResPair<WriteFMul, [SKXPort01], 4, [1], 1, 5>; // Floating point multiplication. 272defm : SKXWriteResPair<WriteFMulX, [SKXPort01], 4, [1], 1, 6>; 273defm : SKXWriteResPair<WriteFMulY, [SKXPort01], 4, [1], 1, 7>; 274defm : SKXWriteResPair<WriteFMulZ, [SKXPort05], 4, [1], 1, 7>; 275defm : SKXWriteResPair<WriteFMul64, [SKXPort01], 4, [1], 1, 5>; // Floating point double multiplication. 276defm : SKXWriteResPair<WriteFMul64X, [SKXPort01], 4, [1], 1, 6>; 277defm : SKXWriteResPair<WriteFMul64Y, [SKXPort01], 4, [1], 1, 7>; 278defm : SKXWriteResPair<WriteFMul64Z, [SKXPort05], 4, [1], 1, 7>; 279 280defm : SKXWriteResPair<WriteFDiv, [SKXPort0,SKXFPDivider], 11, [1,3], 1, 5>; // 10-14 cycles. // Floating point division. 281defm : SKXWriteResPair<WriteFDivX, [SKXPort0,SKXFPDivider], 11, [1,3], 1, 6>; // 10-14 cycles. 282defm : SKXWriteResPair<WriteFDivY, [SKXPort0,SKXFPDivider], 11, [1,5], 1, 7>; // 10-14 cycles. 283defm : SKXWriteResPair<WriteFDivZ, [SKXPort0,SKXPort5,SKXFPDivider], 18, [2,1,10], 3, 7>; // 10-14 cycles. 284defm : SKXWriteResPair<WriteFDiv64, [SKXPort0,SKXFPDivider], 14, [1,4], 1, 5>; // 10-14 cycles. // Floating point division. 285defm : SKXWriteResPair<WriteFDiv64X, [SKXPort0,SKXFPDivider], 14, [1,4], 1, 6>; // 10-14 cycles. 286defm : SKXWriteResPair<WriteFDiv64Y, [SKXPort0,SKXFPDivider], 14, [1,8], 1, 7>; // 10-14 cycles. 287defm : SKXWriteResPair<WriteFDiv64Z, [SKXPort0,SKXPort5,SKXFPDivider], 23, [2,1,16], 3, 7>; // 10-14 cycles. 288 289defm : SKXWriteResPair<WriteFSqrt, [SKXPort0,SKXFPDivider], 12, [1,3], 1, 5>; // Floating point square root. 290defm : SKXWriteResPair<WriteFSqrtX, [SKXPort0,SKXFPDivider], 12, [1,3], 1, 6>; 291defm : SKXWriteResPair<WriteFSqrtY, [SKXPort0,SKXFPDivider], 12, [1,6], 1, 7>; 292defm : SKXWriteResPair<WriteFSqrtZ, [SKXPort0,SKXPort5,SKXFPDivider], 20, [2,1,12], 3, 7>; 293defm : SKXWriteResPair<WriteFSqrt64, [SKXPort0,SKXFPDivider], 18, [1,6], 1, 5>; // Floating point double square root. 294defm : SKXWriteResPair<WriteFSqrt64X, [SKXPort0,SKXFPDivider], 18, [1,6], 1, 6>; 295defm : SKXWriteResPair<WriteFSqrt64Y, [SKXPort0,SKXFPDivider], 18, [1,12],1, 7>; 296defm : SKXWriteResPair<WriteFSqrt64Z, [SKXPort0,SKXPort5,SKXFPDivider], 32, [2,1,24], 3, 7>; 297defm : SKXWriteResPair<WriteFSqrt80, [SKXPort0,SKXFPDivider], 21, [1,7]>; // Floating point long double square root. 298 299defm : SKXWriteResPair<WriteFRcp, [SKXPort0], 4, [1], 1, 5>; // Floating point reciprocal estimate. 300defm : SKXWriteResPair<WriteFRcpX, [SKXPort0], 4, [1], 1, 6>; 301defm : SKXWriteResPair<WriteFRcpY, [SKXPort0], 4, [1], 1, 7>; 302defm : SKXWriteResPair<WriteFRcpZ, [SKXPort0,SKXPort5], 4, [2,1], 3, 7>; 303 304defm : SKXWriteResPair<WriteFRsqrt, [SKXPort0], 4, [1], 1, 5>; // Floating point reciprocal square root estimate. 305defm : SKXWriteResPair<WriteFRsqrtX,[SKXPort0], 4, [1], 1, 6>; 306defm : SKXWriteResPair<WriteFRsqrtY,[SKXPort0], 4, [1], 1, 7>; 307defm : SKXWriteResPair<WriteFRsqrtZ,[SKXPort0,SKXPort5], 9, [2,1], 3, 7>; 308 309defm : SKXWriteResPair<WriteFMA, [SKXPort01], 4, [1], 1, 5>; // Fused Multiply Add. 310defm : SKXWriteResPair<WriteFMAX, [SKXPort01], 4, [1], 1, 6>; 311defm : SKXWriteResPair<WriteFMAY, [SKXPort01], 4, [1], 1, 7>; 312defm : SKXWriteResPair<WriteFMAZ, [SKXPort05], 4, [1], 1, 7>; 313defm : SKXWriteResPair<WriteDPPD, [SKXPort5,SKXPort015], 9, [1,2], 3, 6>; // Floating point double dot product. 314defm : X86WriteRes<WriteDPPS, [SKXPort5,SKXPort01], 13, [1,3], 4>; 315defm : X86WriteRes<WriteDPPSY, [SKXPort5,SKXPort01], 13, [1,3], 4>; 316defm : X86WriteRes<WriteDPPSLd, [SKXPort5,SKXPort01,SKXPort06,SKXPort23], 19, [1,3,1,1], 6>; 317defm : X86WriteRes<WriteDPPSYLd, [SKXPort5,SKXPort01,SKXPort06,SKXPort23], 20, [1,3,1,1], 6>; 318defm : SKXWriteResPair<WriteFSign, [SKXPort0], 1>; // Floating point fabs/fchs. 319defm : SKXWriteResPair<WriteFRnd, [SKXPort01], 8, [2], 2, 6>; // Floating point rounding. 320defm : SKXWriteResPair<WriteFRndY, [SKXPort01], 8, [2], 2, 7>; 321defm : SKXWriteResPair<WriteFRndZ, [SKXPort05], 8, [2], 2, 7>; 322defm : SKXWriteResPair<WriteFLogic, [SKXPort015], 1, [1], 1, 6>; // Floating point and/or/xor logicals. 323defm : SKXWriteResPair<WriteFLogicY, [SKXPort015], 1, [1], 1, 7>; 324defm : SKXWriteResPair<WriteFLogicZ, [SKXPort05], 1, [1], 1, 7>; 325defm : SKXWriteResPair<WriteFTest, [SKXPort0], 2, [1], 1, 6>; // Floating point TEST instructions. 326defm : SKXWriteResPair<WriteFTestY, [SKXPort0], 2, [1], 1, 7>; 327defm : SKXWriteResPair<WriteFTestZ, [SKXPort0], 2, [1], 1, 7>; 328defm : SKXWriteResPair<WriteFShuffle, [SKXPort5], 1, [1], 1, 6>; // Floating point vector shuffles. 329defm : SKXWriteResPair<WriteFShuffleY, [SKXPort5], 1, [1], 1, 7>; 330defm : SKXWriteResPair<WriteFShuffleZ, [SKXPort5], 1, [1], 1, 7>; 331defm : SKXWriteResPair<WriteFVarShuffle, [SKXPort5], 1, [1], 1, 6>; // Floating point vector variable shuffles. 332defm : SKXWriteResPair<WriteFVarShuffleY, [SKXPort5], 1, [1], 1, 7>; 333defm : SKXWriteResPair<WriteFVarShuffleZ, [SKXPort5], 1, [1], 1, 7>; 334defm : SKXWriteResPair<WriteFBlend, [SKXPort015], 1, [1], 1, 6>; // Floating point vector blends. 335defm : SKXWriteResPair<WriteFBlendY,[SKXPort015], 1, [1], 1, 7>; 336defm : SKXWriteResPair<WriteFBlendZ,[SKXPort015], 1, [1], 1, 7>; 337defm : SKXWriteResPair<WriteFVarBlend, [SKXPort015], 2, [2], 2, 6>; // Fp vector variable blends. 338defm : SKXWriteResPair<WriteFVarBlendY,[SKXPort015], 2, [2], 2, 7>; 339defm : SKXWriteResPair<WriteFVarBlendZ,[SKXPort015], 2, [2], 2, 7>; 340 341// FMA Scheduling helper class. 342// class FMASC { X86FoldableSchedWrite Sched = WriteFAdd; } 343 344// Vector integer operations. 345defm : X86WriteRes<WriteVecLoad, [SKXPort23], 5, [1], 1>; 346defm : X86WriteRes<WriteVecLoadX, [SKXPort23], 6, [1], 1>; 347defm : X86WriteRes<WriteVecLoadY, [SKXPort23], 7, [1], 1>; 348defm : X86WriteRes<WriteVecLoadNT, [SKXPort23,SKXPort015], 7, [1,1], 2>; 349defm : X86WriteRes<WriteVecLoadNTY, [SKXPort23,SKXPort015], 8, [1,1], 2>; 350defm : X86WriteRes<WriteVecMaskedLoad, [SKXPort23,SKXPort015], 7, [1,1], 2>; 351defm : X86WriteRes<WriteVecMaskedLoadY, [SKXPort23,SKXPort015], 8, [1,1], 2>; 352defm : X86WriteRes<WriteVecStore, [SKXPort237,SKXPort4], 1, [1,1], 2>; 353defm : X86WriteRes<WriteVecStoreX, [SKXPort237,SKXPort4], 1, [1,1], 2>; 354defm : X86WriteRes<WriteVecStoreY, [SKXPort237,SKXPort4], 1, [1,1], 2>; 355defm : X86WriteRes<WriteVecStoreNT, [SKXPort237,SKXPort4], 1, [1,1], 2>; 356defm : X86WriteRes<WriteVecStoreNTY, [SKXPort237,SKXPort4], 1, [1,1], 2>; 357defm : X86WriteRes<WriteVecMaskedStore32, [SKXPort23,SKXPort0,SKXPort4], 2, [1,1,1], 3>; 358defm : X86WriteRes<WriteVecMaskedStore32Y, [SKXPort23,SKXPort0,SKXPort4], 2, [1,1,1], 3>; 359defm : X86WriteRes<WriteVecMaskedStore64, [SKXPort23,SKXPort0,SKXPort4], 2, [1,1,1], 3>; 360defm : X86WriteRes<WriteVecMaskedStore64Y, [SKXPort23,SKXPort0,SKXPort4], 2, [1,1,1], 3>; 361defm : X86WriteRes<WriteVecMove, [SKXPort05], 1, [1], 1>; 362defm : X86WriteRes<WriteVecMoveX, [SKXPort015], 1, [1], 1>; 363defm : X86WriteRes<WriteVecMoveY, [SKXPort015], 1, [1], 1>; 364defm : X86WriteRes<WriteVecMoveZ, [SKXPort05], 1, [1], 1>; 365defm : X86WriteRes<WriteVecMoveToGpr, [SKXPort0], 2, [1], 1>; 366defm : X86WriteRes<WriteVecMoveFromGpr, [SKXPort5], 1, [1], 1>; 367 368defm : SKXWriteResPair<WriteVecALU, [SKXPort05], 1, [1], 1, 5>; // Vector integer ALU op, no logicals. 369defm : SKXWriteResPair<WriteVecALUX, [SKXPort01], 1, [1], 1, 6>; 370defm : SKXWriteResPair<WriteVecALUY, [SKXPort01], 1, [1], 1, 7>; 371defm : SKXWriteResPair<WriteVecALUZ, [SKXPort0], 1, [1], 1, 7>; 372defm : SKXWriteResPair<WriteVecLogic, [SKXPort05], 1, [1], 1, 5>; // Vector integer and/or/xor. 373defm : SKXWriteResPair<WriteVecLogicX,[SKXPort015], 1, [1], 1, 6>; 374defm : SKXWriteResPair<WriteVecLogicY,[SKXPort015], 1, [1], 1, 7>; 375defm : SKXWriteResPair<WriteVecLogicZ,[SKXPort05], 1, [1], 1, 7>; 376defm : SKXWriteResPair<WriteVecTest, [SKXPort0,SKXPort5], 3, [1,1], 2, 6>; // Vector integer TEST instructions. 377defm : SKXWriteResPair<WriteVecTestY, [SKXPort0,SKXPort5], 3, [1,1], 2, 7>; 378defm : SKXWriteResPair<WriteVecTestZ, [SKXPort0,SKXPort5], 3, [1,1], 2, 7>; 379defm : SKXWriteResPair<WriteVecIMul, [SKXPort0], 5, [1], 1, 5>; // Vector integer multiply. 380defm : SKXWriteResPair<WriteVecIMulX, [SKXPort01], 5, [1], 1, 6>; 381defm : SKXWriteResPair<WriteVecIMulY, [SKXPort01], 5, [1], 1, 7>; 382defm : SKXWriteResPair<WriteVecIMulZ, [SKXPort05], 5, [1], 1, 7>; 383defm : SKXWriteResPair<WritePMULLD, [SKXPort01], 10, [2], 2, 6>; // Vector PMULLD. 384defm : SKXWriteResPair<WritePMULLDY, [SKXPort01], 10, [2], 2, 7>; 385defm : SKXWriteResPair<WritePMULLDZ, [SKXPort05], 10, [2], 2, 7>; 386defm : SKXWriteResPair<WriteShuffle, [SKXPort5], 1, [1], 1, 5>; // Vector shuffles. 387defm : SKXWriteResPair<WriteShuffleX, [SKXPort5], 1, [1], 1, 6>; 388defm : SKXWriteResPair<WriteShuffleY, [SKXPort5], 1, [1], 1, 7>; 389defm : SKXWriteResPair<WriteShuffleZ, [SKXPort5], 1, [1], 1, 7>; 390defm : SKXWriteResPair<WriteVarShuffle, [SKXPort0,SKXPort5], 1, [1,1], 2, 5>; // Vector variable shuffles. 391defm : SKXWriteResPair<WriteVarShuffleX, [SKXPort5], 1, [1], 1, 6>; 392defm : SKXWriteResPair<WriteVarShuffleY, [SKXPort5], 1, [1], 1, 7>; 393defm : SKXWriteResPair<WriteVarShuffleZ, [SKXPort5], 1, [1], 1, 7>; 394defm : SKXWriteResPair<WriteBlend, [SKXPort5], 1, [1], 1, 6>; // Vector blends. 395defm : SKXWriteResPair<WriteBlendY,[SKXPort5], 1, [1], 1, 7>; 396defm : SKXWriteResPair<WriteBlendZ,[SKXPort5], 1, [1], 1, 7>; 397defm : SKXWriteResPair<WriteVarBlend, [SKXPort015], 2, [2], 2, 6>; // Vector variable blends. 398defm : SKXWriteResPair<WriteVarBlendY,[SKXPort015], 2, [2], 2, 6>; 399defm : SKXWriteResPair<WriteVarBlendZ,[SKXPort05], 2, [1], 1, 6>; 400defm : SKXWriteResPair<WriteMPSAD, [SKXPort5], 4, [2], 2, 6>; // Vector MPSAD. 401defm : SKXWriteResPair<WriteMPSADY, [SKXPort5], 4, [2], 2, 7>; 402defm : SKXWriteResPair<WriteMPSADZ, [SKXPort5], 4, [2], 2, 7>; 403defm : SKXWriteResPair<WritePSADBW, [SKXPort5], 3, [1], 1, 5>; // Vector PSADBW. 404defm : SKXWriteResPair<WritePSADBWX, [SKXPort5], 3, [1], 1, 6>; 405defm : SKXWriteResPair<WritePSADBWY, [SKXPort5], 3, [1], 1, 7>; 406defm : SKXWriteResPair<WritePSADBWZ, [SKXPort5], 3, [1], 1, 7>; // TODO: 512-bit ops require ports 0/1 to be joined. 407defm : SKXWriteResPair<WritePHMINPOS, [SKXPort0], 4, [1], 1, 6>; // Vector PHMINPOS. 408 409// Vector integer shifts. 410defm : SKXWriteResPair<WriteVecShift, [SKXPort0], 1, [1], 1, 5>; 411defm : X86WriteRes<WriteVecShiftX, [SKXPort5,SKXPort01], 2, [1,1], 2>; 412defm : X86WriteRes<WriteVecShiftY, [SKXPort5,SKXPort01], 4, [1,1], 2>; 413defm : X86WriteRes<WriteVecShiftZ, [SKXPort5,SKXPort0], 4, [1,1], 2>; 414defm : X86WriteRes<WriteVecShiftXLd, [SKXPort01,SKXPort23], 7, [1,1], 2>; 415defm : X86WriteRes<WriteVecShiftYLd, [SKXPort01,SKXPort23], 8, [1,1], 2>; 416defm : X86WriteRes<WriteVecShiftZLd, [SKXPort0,SKXPort23], 8, [1,1], 2>; 417 418defm : SKXWriteResPair<WriteVecShiftImm, [SKXPort0], 1, [1], 1, 5>; 419defm : SKXWriteResPair<WriteVecShiftImmX, [SKXPort01], 1, [1], 1, 6>; // Vector integer immediate shifts. 420defm : SKXWriteResPair<WriteVecShiftImmY, [SKXPort01], 1, [1], 1, 7>; 421defm : SKXWriteResPair<WriteVecShiftImmZ, [SKXPort0], 1, [1], 1, 7>; 422defm : SKXWriteResPair<WriteVarVecShift, [SKXPort01], 1, [1], 1, 6>; // Variable vector shifts. 423defm : SKXWriteResPair<WriteVarVecShiftY, [SKXPort01], 1, [1], 1, 7>; 424defm : SKXWriteResPair<WriteVarVecShiftZ, [SKXPort0], 1, [1], 1, 7>; 425 426// Vector insert/extract operations. 427def : WriteRes<WriteVecInsert, [SKXPort5]> { 428 let Latency = 2; 429 let NumMicroOps = 2; 430 let ReleaseAtCycles = [2]; 431} 432def : WriteRes<WriteVecInsertLd, [SKXPort5,SKXPort23]> { 433 let Latency = 6; 434 let NumMicroOps = 2; 435} 436def: InstRW<[WriteVecInsertLd], (instregex "(V?)MOV(H|L)(PD|PS)rm")>; 437 438def : WriteRes<WriteVecExtract, [SKXPort0,SKXPort5]> { 439 let Latency = 3; 440 let NumMicroOps = 2; 441} 442def : WriteRes<WriteVecExtractSt, [SKXPort4,SKXPort5,SKXPort237]> { 443 let Latency = 2; 444 let NumMicroOps = 3; 445} 446 447// Conversion between integer and float. 448defm : SKXWriteResPair<WriteCvtSS2I, [SKXPort01], 6, [2], 2>; // Needs more work: DD vs DQ. 449defm : SKXWriteResPair<WriteCvtPS2I, [SKXPort01], 3>; 450defm : SKXWriteResPair<WriteCvtPS2IY, [SKXPort01], 3>; 451defm : SKXWriteResPair<WriteCvtPS2IZ, [SKXPort05], 3>; 452defm : SKXWriteResPair<WriteCvtSD2I, [SKXPort01], 6, [2], 2>; 453defm : SKXWriteResPair<WriteCvtPD2I, [SKXPort01], 3>; 454defm : SKXWriteResPair<WriteCvtPD2IY, [SKXPort01], 3>; 455defm : SKXWriteResPair<WriteCvtPD2IZ, [SKXPort05], 3>; 456 457defm : SKXWriteResPair<WriteCvtI2SS, [SKXPort1], 4>; 458defm : SKXWriteResPair<WriteCvtI2PS, [SKXPort01], 4>; 459defm : SKXWriteResPair<WriteCvtI2PSY, [SKXPort01], 4>; 460defm : SKXWriteResPair<WriteCvtI2PSZ, [SKXPort05], 4>; // Needs more work: DD vs DQ. 461defm : SKXWriteResPair<WriteCvtI2SD, [SKXPort1], 4>; 462defm : SKXWriteResPair<WriteCvtI2PD, [SKXPort01], 4>; 463defm : SKXWriteResPair<WriteCvtI2PDY, [SKXPort01], 4>; 464defm : SKXWriteResPair<WriteCvtI2PDZ, [SKXPort05], 4>; 465 466defm : SKXWriteResPair<WriteCvtSS2SD, [SKXPort1], 3>; 467defm : SKXWriteResPair<WriteCvtPS2PD, [SKXPort1], 3>; 468defm : SKXWriteResPair<WriteCvtPS2PDY, [SKXPort5,SKXPort01], 3, [1,1], 2>; 469defm : SKXWriteResPair<WriteCvtPS2PDZ, [SKXPort05], 3, [2], 2>; 470defm : SKXWriteResPair<WriteCvtSD2SS, [SKXPort5,SKXPort01], 5, [1,1], 2, 5>; 471defm : SKXWriteResPair<WriteCvtPD2PS, [SKXPort5,SKXPort01], 5, [1,1], 2, 4>; 472defm : SKXWriteResPair<WriteCvtPD2PSY, [SKXPort5,SKXPort01], 7, [1,1], 2, 7>; 473defm : SKXWriteResPair<WriteCvtPD2PSZ, [SKXPort5,SKXPort05], 7, [1,1], 2, 7>; 474 475defm : X86WriteRes<WriteCvtPH2PS, [SKXPort5,SKXPort01], 5, [1,1], 2>; 476defm : X86WriteRes<WriteCvtPH2PSY, [SKXPort5,SKXPort01], 7, [1,1], 2>; 477defm : X86WriteRes<WriteCvtPH2PSZ, [SKXPort5,SKXPort0], 7, [1,1], 2>; 478defm : X86WriteRes<WriteCvtPH2PSLd, [SKXPort23,SKXPort01], 9, [1,1], 2>; 479defm : X86WriteRes<WriteCvtPH2PSYLd, [SKXPort23,SKXPort01], 10, [1,1], 2>; 480defm : X86WriteRes<WriteCvtPH2PSZLd, [SKXPort23,SKXPort05], 10, [1,1], 2>; 481 482defm : X86WriteRes<WriteCvtPS2PH, [SKXPort5,SKXPort01], 5, [1,1], 2>; 483defm : X86WriteRes<WriteCvtPS2PHY, [SKXPort5,SKXPort01], 7, [1,1], 2>; 484defm : X86WriteRes<WriteCvtPS2PHZ, [SKXPort5,SKXPort05], 7, [1,1], 2>; 485defm : X86WriteRes<WriteCvtPS2PHSt, [SKXPort4,SKXPort5,SKXPort237,SKXPort01], 6, [1,1,1,1], 4>; 486defm : X86WriteRes<WriteCvtPS2PHYSt, [SKXPort4,SKXPort5,SKXPort237,SKXPort01], 8, [1,1,1,1], 4>; 487defm : X86WriteRes<WriteCvtPS2PHZSt, [SKXPort4,SKXPort5,SKXPort237,SKXPort05], 8, [1,1,1,1], 4>; 488 489// Strings instructions. 490 491// Packed Compare Implicit Length Strings, Return Mask 492def : WriteRes<WritePCmpIStrM, [SKXPort0]> { 493 let Latency = 10; 494 let NumMicroOps = 3; 495 let ReleaseAtCycles = [3]; 496} 497def : WriteRes<WritePCmpIStrMLd, [SKXPort0, SKXPort23]> { 498 let Latency = 16; 499 let NumMicroOps = 4; 500 let ReleaseAtCycles = [3,1]; 501} 502 503// Packed Compare Explicit Length Strings, Return Mask 504def : WriteRes<WritePCmpEStrM, [SKXPort0, SKXPort5, SKXPort015, SKXPort0156]> { 505 let Latency = 19; 506 let NumMicroOps = 9; 507 let ReleaseAtCycles = [4,3,1,1]; 508} 509def : WriteRes<WritePCmpEStrMLd, [SKXPort0, SKXPort5, SKXPort23, SKXPort015, SKXPort0156]> { 510 let Latency = 25; 511 let NumMicroOps = 10; 512 let ReleaseAtCycles = [4,3,1,1,1]; 513} 514 515// Packed Compare Implicit Length Strings, Return Index 516def : WriteRes<WritePCmpIStrI, [SKXPort0]> { 517 let Latency = 10; 518 let NumMicroOps = 3; 519 let ReleaseAtCycles = [3]; 520} 521def : WriteRes<WritePCmpIStrILd, [SKXPort0, SKXPort23]> { 522 let Latency = 16; 523 let NumMicroOps = 4; 524 let ReleaseAtCycles = [3,1]; 525} 526 527// Packed Compare Explicit Length Strings, Return Index 528def : WriteRes<WritePCmpEStrI, [SKXPort0,SKXPort5,SKXPort0156]> { 529 let Latency = 18; 530 let NumMicroOps = 8; 531 let ReleaseAtCycles = [4,3,1]; 532} 533def : WriteRes<WritePCmpEStrILd, [SKXPort0, SKXPort5, SKXPort23, SKXPort0156]> { 534 let Latency = 24; 535 let NumMicroOps = 9; 536 let ReleaseAtCycles = [4,3,1,1]; 537} 538 539// MOVMSK Instructions. 540def : WriteRes<WriteFMOVMSK, [SKXPort0]> { let Latency = 2; } 541def : WriteRes<WriteVecMOVMSK, [SKXPort0]> { let Latency = 2; } 542def : WriteRes<WriteVecMOVMSKY, [SKXPort0]> { let Latency = 2; } 543def : WriteRes<WriteMMXMOVMSK, [SKXPort0]> { let Latency = 2; } 544 545// AES instructions. 546def : WriteRes<WriteAESDecEnc, [SKXPort0]> { // Decryption, encryption. 547 let Latency = 4; 548 let NumMicroOps = 1; 549 let ReleaseAtCycles = [1]; 550} 551def : WriteRes<WriteAESDecEncLd, [SKXPort0, SKXPort23]> { 552 let Latency = 10; 553 let NumMicroOps = 2; 554 let ReleaseAtCycles = [1,1]; 555} 556 557def : WriteRes<WriteAESIMC, [SKXPort0]> { // InvMixColumn. 558 let Latency = 8; 559 let NumMicroOps = 2; 560 let ReleaseAtCycles = [2]; 561} 562def : WriteRes<WriteAESIMCLd, [SKXPort0, SKXPort23]> { 563 let Latency = 14; 564 let NumMicroOps = 3; 565 let ReleaseAtCycles = [2,1]; 566} 567 568def : WriteRes<WriteAESKeyGen, [SKXPort0,SKXPort5,SKXPort015]> { // Key Generation. 569 let Latency = 20; 570 let NumMicroOps = 11; 571 let ReleaseAtCycles = [3,6,2]; 572} 573def : WriteRes<WriteAESKeyGenLd, [SKXPort0,SKXPort5,SKXPort23,SKXPort015]> { 574 let Latency = 25; 575 let NumMicroOps = 11; 576 let ReleaseAtCycles = [3,6,1,1]; 577} 578 579// Carry-less multiplication instructions. 580def : WriteRes<WriteCLMul, [SKXPort5]> { 581 let Latency = 6; 582 let NumMicroOps = 1; 583 let ReleaseAtCycles = [1]; 584} 585def : WriteRes<WriteCLMulLd, [SKXPort5, SKXPort23]> { 586 let Latency = 12; 587 let NumMicroOps = 2; 588 let ReleaseAtCycles = [1,1]; 589} 590 591// Catch-all for expensive system instructions. 592def : WriteRes<WriteSystem, [SKXPort0156]> { let Latency = 100; } // def WriteSystem : SchedWrite; 593 594// AVX2. 595defm : SKXWriteResPair<WriteFShuffle256, [SKXPort5], 3, [1], 1, 7>; // Fp 256-bit width vector shuffles. 596defm : SKXWriteResPair<WriteFVarShuffle256, [SKXPort5], 3, [1], 1, 7>; // Fp 256-bit width vector variable shuffles. 597defm : SKXWriteResPair<WriteShuffle256, [SKXPort5], 3, [1], 1, 7>; // 256-bit width vector shuffles. 598defm : SKXWriteResPair<WriteVPMOV256, [SKXPort5], 3, [1], 1, 7>; // 256-bit width packed vector width-changing move. 599defm : SKXWriteResPair<WriteVarShuffle256, [SKXPort5], 3, [1], 1, 7>; // 256-bit width vector variable shuffles. 600 601// Old microcoded instructions that nobody use. 602def : WriteRes<WriteMicrocoded, [SKXPort0156]> { let Latency = 100; } // def WriteMicrocoded : SchedWrite; 603 604// Fence instructions. 605def : WriteRes<WriteFence, [SKXPort23, SKXPort4]> { let NumMicroOps = 2; let ReleaseAtCycles = [1,1]; } 606 607// Load/store MXCSR. 608def : WriteRes<WriteLDMXCSR, [SKXPort0,SKXPort23,SKXPort0156]> { let Latency = 7; let NumMicroOps = 3; let ReleaseAtCycles = [1,1,1]; } 609def : WriteRes<WriteSTMXCSR, [SKXPort4,SKXPort5,SKXPort237]> { let Latency = 2; let NumMicroOps = 3; let ReleaseAtCycles = [1,1,1]; } 610 611// Nop, not very useful expect it provides a model for nops! 612def : WriteRes<WriteNop, []>; 613 614//////////////////////////////////////////////////////////////////////////////// 615// Horizontal add/sub instructions. 616//////////////////////////////////////////////////////////////////////////////// 617 618defm : SKXWriteResPair<WriteFHAdd, [SKXPort5,SKXPort01], 6, [2,1], 3, 6>; 619defm : SKXWriteResPair<WriteFHAddY, [SKXPort5,SKXPort01], 6, [2,1], 3, 7>; 620defm : SKXWriteResPair<WritePHAdd, [SKXPort5,SKXPort05], 3, [2,1], 3, 5>; 621defm : SKXWriteResPair<WritePHAddX, [SKXPort5,SKXPort015], 3, [2,1], 3, 6>; 622defm : SKXWriteResPair<WritePHAddY, [SKXPort5,SKXPort015], 3, [2,1], 3, 7>; 623 624// Remaining instrs. 625 626def SKXWriteResGroup1 : SchedWriteRes<[SKXPort0]> { 627 let Latency = 1; 628 let NumMicroOps = 1; 629 let ReleaseAtCycles = [1]; 630} 631def: InstRW<[SKXWriteResGroup1], (instregex "KAND(B|D|Q|W)kk", 632 "KANDN(B|D|Q|W)kk", 633 "KMOV(B|D|Q|W)kk", 634 "KNOT(B|D|Q|W)kk", 635 "KOR(B|D|Q|W)kk", 636 "KXNOR(B|D|Q|W)kk", 637 "KXOR(B|D|Q|W)kk", 638 "KSET0(B|D|Q|W)", // Same as KXOR 639 "KSET1(B|D|Q|W)", // Same as KXNOR 640 "MMX_PADDS(B|W)rr", 641 "MMX_PADDUS(B|W)rr", 642 "MMX_PAVG(B|W)rr", 643 "MMX_PCMPEQ(B|D|W)rr", 644 "MMX_PCMPGT(B|D|W)rr", 645 "MMX_P(MAX|MIN)SWrr", 646 "MMX_P(MAX|MIN)UBrr", 647 "MMX_PSUBS(B|W)rr", 648 "MMX_PSUBUS(B|W)rr", 649 "VPMOVB2M(Z|Z128|Z256)kr", 650 "VPMOVD2M(Z|Z128|Z256)kr", 651 "VPMOVQ2M(Z|Z128|Z256)kr", 652 "VPMOVW2M(Z|Z128|Z256)kr")>; 653 654def SKXWriteResGroup3 : SchedWriteRes<[SKXPort5]> { 655 let Latency = 1; 656 let NumMicroOps = 1; 657 let ReleaseAtCycles = [1]; 658} 659def: InstRW<[SKXWriteResGroup3], (instregex "COM(P?)_FST0r", 660 "KMOV(B|D|Q|W)kr", 661 "UCOM_F(P?)r")>; 662 663def SKXWriteResGroup4 : SchedWriteRes<[SKXPort6]> { 664 let Latency = 1; 665 let NumMicroOps = 1; 666 let ReleaseAtCycles = [1]; 667} 668def: InstRW<[SKXWriteResGroup4], (instregex "JMP(16|32|64)r")>; 669 670def SKXWriteResGroup6 : SchedWriteRes<[SKXPort05]> { 671 let Latency = 1; 672 let NumMicroOps = 1; 673 let ReleaseAtCycles = [1]; 674} 675def: InstRW<[SKXWriteResGroup6], (instrs FINCSTP, FNOP)>; 676 677def SKXWriteResGroup7 : SchedWriteRes<[SKXPort06]> { 678 let Latency = 1; 679 let NumMicroOps = 1; 680 let ReleaseAtCycles = [1]; 681} 682def: InstRW<[SKXWriteResGroup7], (instrs CDQ, CQO, CLAC, STAC)>; 683 684def SKXWriteResGroup8 : SchedWriteRes<[SKXPort15]> { 685 let Latency = 1; 686 let NumMicroOps = 1; 687 let ReleaseAtCycles = [1]; 688} 689def: InstRW<[SKXWriteResGroup8], (instregex "ANDN(32|64)rr")>; 690 691def SKXWriteResGroup9 : SchedWriteRes<[SKXPort015]> { 692 let Latency = 1; 693 let NumMicroOps = 1; 694 let ReleaseAtCycles = [1]; 695} 696def: InstRW<[SKXWriteResGroup9], (instregex "VBLENDMPD(Z128|Z256)rr", 697 "VBLENDMPS(Z128|Z256)rr", 698 "VPADD(B|D|Q|W)(Y|Z|Z128|Z256)rr", 699 "(V?)PADD(B|D|Q|W)rr", 700 "VPBLENDD(Y?)rri", 701 "VPBLENDMB(Z128|Z256)rr", 702 "VPBLENDMD(Z128|Z256)rr", 703 "VPBLENDMQ(Z128|Z256)rr", 704 "VPBLENDMW(Z128|Z256)rr", 705 "VPSUB(B|D|Q|W)(Y|Z|Z128|Z256)rrk", 706 "VPTERNLOGD(Z|Z128|Z256)rri", 707 "VPTERNLOGQ(Z|Z128|Z256)rri")>; 708 709def SKXWriteResGroup10 : SchedWriteRes<[SKXPort0156]> { 710 let Latency = 1; 711 let NumMicroOps = 1; 712 let ReleaseAtCycles = [1]; 713} 714def: InstRW<[SKXWriteResGroup10], (instrs SGDT64m, 715 SIDT64m, 716 SMSW16m, 717 STRm, 718 SYSCALL)>; 719 720def SKXWriteResGroup11 : SchedWriteRes<[SKXPort4,SKXPort237]> { 721 let Latency = 1; 722 let NumMicroOps = 2; 723 let ReleaseAtCycles = [1,1]; 724} 725def: InstRW<[SKXWriteResGroup11], (instrs FBSTPm, VMPTRSTm)>; 726def: InstRW<[SKXWriteResGroup11], (instregex "KMOV(B|D|Q|W)mk", 727 "ST_FP(32|64|80)m")>; 728 729def SKXWriteResGroup13 : SchedWriteRes<[SKXPort5]> { 730 let Latency = 2; 731 let NumMicroOps = 2; 732 let ReleaseAtCycles = [2]; 733} 734def: InstRW<[SKXWriteResGroup13], (instrs MMX_MOVQ2DQrr)>; 735 736def SKXWriteResGroup14 : SchedWriteRes<[SKXPort05]> { 737 let Latency = 2; 738 let NumMicroOps = 2; 739 let ReleaseAtCycles = [2]; 740} 741def: InstRW<[SKXWriteResGroup14], (instrs FDECSTP, 742 MMX_MOVDQ2Qrr)>; 743 744def SKXWriteResGroup17 : SchedWriteRes<[SKXPort0156]> { 745 let Latency = 2; 746 let NumMicroOps = 2; 747 let ReleaseAtCycles = [2]; 748} 749def: InstRW<[SKXWriteResGroup17], (instrs LFENCE, 750 WAIT, 751 XGETBV)>; 752 753def SKXWriteResGroup20 : SchedWriteRes<[SKXPort6,SKXPort0156]> { 754 let Latency = 2; 755 let NumMicroOps = 2; 756 let ReleaseAtCycles = [1,1]; 757} 758def: InstRW<[SKXWriteResGroup20], (instregex "CLFLUSH")>; 759 760def SKXWriteResGroup23 : SchedWriteRes<[SKXPort06,SKXPort0156]> { 761 let Latency = 2; 762 let NumMicroOps = 2; 763 let ReleaseAtCycles = [1,1]; 764} 765def: InstRW<[SKXWriteResGroup23], (instrs CWD, 766 JCXZ, JECXZ, JRCXZ, 767 ADC8i8, SBB8i8, 768 ADC16i16, SBB16i16, 769 ADC32i32, SBB32i32, 770 ADC64i32, SBB64i32)>; 771 772def SKXWriteResGroup25 : SchedWriteRes<[SKXPort4,SKXPort6,SKXPort237]> { 773 let Latency = 2; 774 let NumMicroOps = 3; 775 let ReleaseAtCycles = [1,1,1]; 776} 777def: InstRW<[SKXWriteResGroup25], (instrs FNSTCW16m)>; 778 779def SKXWriteResGroup27 : SchedWriteRes<[SKXPort4,SKXPort237,SKXPort15]> { 780 let Latency = 2; 781 let NumMicroOps = 3; 782 let ReleaseAtCycles = [1,1,1]; 783} 784def: InstRW<[SKXWriteResGroup27], (instregex "MOVBE(16|32|64)mr")>; 785 786def SKXWriteResGroup28 : SchedWriteRes<[SKXPort4,SKXPort237,SKXPort0156]> { 787 let Latency = 2; 788 let NumMicroOps = 3; 789 let ReleaseAtCycles = [1,1,1]; 790} 791def: InstRW<[SKXWriteResGroup28], (instrs PUSH16r, PUSH32r, PUSH64r, PUSH64i8, 792 STOSB, STOSL, STOSQ, STOSW)>; 793def: InstRW<[SKXWriteResGroup28], (instregex "PUSH(16|32|64)rmr")>; 794 795def SKXWriteResGroup29 : SchedWriteRes<[SKXPort4,SKXPort237,SKXPort15]> { 796 let Latency = 2; 797 let NumMicroOps = 5; 798 let ReleaseAtCycles = [2,2,1]; 799} 800def: InstRW<[SKXWriteResGroup29], (instregex "VMOVDQU8Zmr(b?)")>; 801 802def SKXWriteResGroup30 : SchedWriteRes<[SKXPort0]> { 803 let Latency = 3; 804 let NumMicroOps = 1; 805 let ReleaseAtCycles = [1]; 806} 807def: InstRW<[SKXWriteResGroup30], (instregex "KMOV(B|D|Q|W)rk", 808 "KORTEST(B|D|Q|W)kk", 809 "KTEST(B|D|Q|W)kk")>; 810 811def SKXWriteResGroup31 : SchedWriteRes<[SKXPort1]> { 812 let Latency = 3; 813 let NumMicroOps = 1; 814 let ReleaseAtCycles = [1]; 815} 816def: InstRW<[SKXWriteResGroup31], (instregex "PDEP(32|64)rr", 817 "PEXT(32|64)rr")>; 818 819def SKXWriteResGroup32 : SchedWriteRes<[SKXPort5]> { 820 let Latency = 3; 821 let NumMicroOps = 1; 822 let ReleaseAtCycles = [1]; 823} 824def: InstRW<[SKXWriteResGroup32], (instregex "(ADD|SUB|SUBR)_(FPrST0|FST0r|FrST0)", 825 "VALIGND(Z|Z128|Z256)rri", 826 "VALIGNQ(Z|Z128|Z256)rri", 827 "VPBROADCAST(B|W)rr", 828 "VP(MAX|MIN)(S|U)Q(Z|Z128|Z256)rr")>; 829 830def SKXWriteResGroup33 : SchedWriteRes<[SKXPort5]> { 831 let Latency = 4; 832 let NumMicroOps = 1; 833 let ReleaseAtCycles = [1]; 834} 835def: InstRW<[SKXWriteResGroup33], (instregex "KADD(B|D|Q|W)kk", 836 "KSHIFTL(B|D|Q|W)ki", 837 "KSHIFTR(B|D|Q|W)ki", 838 "KUNPCK(BW|DQ|WD)kk", 839 "VCMPPD(Z|Z128|Z256)rri", 840 "VCMPPS(Z|Z128|Z256)rri", 841 "VCMP(SD|SS)Zrr", 842 "VFPCLASS(PD|PS)(Z|Z128|Z256)ri", 843 "VFPCLASS(SD|SS)Zri", 844 "VPCMPB(Z|Z128|Z256)rri", 845 "VPCMPD(Z|Z128|Z256)rri", 846 "VPCMPEQ(B|D|Q|W)(Z|Z128|Z256)rr", 847 "VPCMPGT(B|D|Q|W)(Z|Z128|Z256)rr", 848 "VPCMPQ(Z|Z128|Z256)rri", 849 "VPCMPU(B|D|Q|W)(Z|Z128|Z256)rri", 850 "VPCMPW(Z|Z128|Z256)rri", 851 "VPTEST(N?)M(B|D|Q|W)(Z|Z128|Z256)rr")>; 852 853def SKXWriteResGroup34 : SchedWriteRes<[SKXPort0,SKXPort0156]> { 854 let Latency = 3; 855 let NumMicroOps = 2; 856 let ReleaseAtCycles = [1,1]; 857} 858def: InstRW<[SKXWriteResGroup34], (instrs FNSTSW16r)>; 859 860def SKXWriteResGroup37 : SchedWriteRes<[SKXPort0,SKXPort5]> { 861 let Latency = 3; 862 let NumMicroOps = 3; 863 let ReleaseAtCycles = [1,2]; 864} 865def: InstRW<[SKXWriteResGroup37], (instregex "MMX_PH(ADD|SUB)SWrr")>; 866 867def SKXWriteResGroup38 : SchedWriteRes<[SKXPort5,SKXPort01]> { 868 let Latency = 3; 869 let NumMicroOps = 3; 870 let ReleaseAtCycles = [2,1]; 871} 872def: InstRW<[SKXWriteResGroup38], (instregex "(V?)PH(ADD|SUB)SW(Y?)rr")>; 873 874def SKXWriteResGroup41 : SchedWriteRes<[SKXPort5]> { 875 let Latency = 2; 876 let NumMicroOps = 2; 877 let ReleaseAtCycles = [2]; 878} 879def: InstRW<[SKXWriteResGroup41], (instrs MMX_PACKSSDWrr, 880 MMX_PACKSSWBrr, 881 MMX_PACKUSWBrr)>; 882 883def SKXWriteResGroup42 : SchedWriteRes<[SKXPort6,SKXPort0156]> { 884 let Latency = 3; 885 let NumMicroOps = 3; 886 let ReleaseAtCycles = [1,2]; 887} 888def: InstRW<[SKXWriteResGroup42], (instregex "CLD")>; 889 890def SKXWriteResGroup44 : SchedWriteRes<[SKXPort06,SKXPort0156]> { 891 let Latency = 2; 892 let NumMicroOps = 3; 893 let ReleaseAtCycles = [1,2]; 894} 895def: InstRW<[SKXWriteResGroup44], (instrs RCL8r1, RCL16r1, RCL32r1, RCL64r1, 896 RCR8r1, RCR16r1, RCR32r1, RCR64r1)>; 897 898def SKXWriteResGroup44b : SchedWriteRes<[SKXPort1,SKXPort06,SKXPort0156]> { 899 let Latency = 5; 900 let NumMicroOps = 8; 901 let ReleaseAtCycles = [2,4,2]; 902} 903def: InstRW<[SKXWriteResGroup44b], (instrs RCR8ri, RCR16ri, RCR32ri, RCR64ri)>; 904 905def SKXWriteResGroup44c : SchedWriteRes<[SKXPort1,SKXPort06,SKXPort0156]> { 906 let Latency = 6; 907 let NumMicroOps = 8; 908 let ReleaseAtCycles = [2,4,2]; 909} 910def: InstRW<[SKXWriteResGroup44c], (instrs RCL8ri, RCL16ri, RCL32ri, RCL64ri)>; 911 912def SKXWriteResGroup45 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort237]> { 913 let Latency = 3; 914 let NumMicroOps = 3; 915 let ReleaseAtCycles = [1,1,1]; 916} 917def: InstRW<[SKXWriteResGroup45], (instrs FNSTSWm)>; 918 919def SKXWriteResGroup47 : SchedWriteRes<[SKXPort4,SKXPort6,SKXPort237,SKXPort0156]> { 920 let Latency = 3; 921 let NumMicroOps = 4; 922 let ReleaseAtCycles = [1,1,1,1]; 923} 924def: InstRW<[SKXWriteResGroup47], (instregex "CALL(16|32|64)r")>; 925 926def SKXWriteResGroup48 : SchedWriteRes<[SKXPort4,SKXPort237,SKXPort06,SKXPort0156]> { 927 let Latency = 3; 928 let NumMicroOps = 4; 929 let ReleaseAtCycles = [1,1,1,1]; 930} 931def: InstRW<[SKXWriteResGroup48], (instrs CALL64pcrel32)>; 932 933def SKXWriteResGroup49 : SchedWriteRes<[SKXPort0]> { 934 let Latency = 4; 935 let NumMicroOps = 1; 936 let ReleaseAtCycles = [1]; 937} 938def: InstRW<[SKXWriteResGroup49], (instregex "MUL_(FPrST0|FST0r|FrST0)")>; 939 940def SKXWriteResGroup50 : SchedWriteRes<[SKXPort01]> { 941 let Latency = 4; 942 let NumMicroOps = 1; 943 let ReleaseAtCycles = [1]; 944} 945def: InstRW<[SKXWriteResGroup50], (instregex "VCVTPD2QQ(Z128|Z256)rr", 946 "VCVTPD2UQQ(Z128|Z256)rr", 947 "VCVTPS2DQ(Y|Z128|Z256)rr", 948 "(V?)CVTPS2DQrr", 949 "VCVTPS2UDQ(Z128|Z256)rr", 950 "VCVTTPD2QQ(Z128|Z256)rr", 951 "VCVTTPD2UQQ(Z128|Z256)rr", 952 "VCVTTPS2DQ(Z128|Z256)rr", 953 "(V?)CVTTPS2DQrr", 954 "VCVTTPS2UDQ(Z128|Z256)rr")>; 955 956def SKXWriteResGroup50z : SchedWriteRes<[SKXPort05]> { 957 let Latency = 4; 958 let NumMicroOps = 1; 959 let ReleaseAtCycles = [1]; 960} 961def: InstRW<[SKXWriteResGroup50z], (instrs VCVTPD2QQZrr, 962 VCVTPD2UQQZrr, 963 VCVTPS2DQZrr, 964 VCVTPS2UDQZrr, 965 VCVTTPD2QQZrr, 966 VCVTTPD2UQQZrr, 967 VCVTTPS2DQZrr, 968 VCVTTPS2UDQZrr)>; 969 970def SKXWriteResGroup51 : SchedWriteRes<[SKXPort5]> { 971 let Latency = 4; 972 let NumMicroOps = 2; 973 let ReleaseAtCycles = [2]; 974} 975def: InstRW<[SKXWriteResGroup51], (instregex "VEXPANDPD(Z|Z128|Z256)rr", 976 "VEXPANDPS(Z|Z128|Z256)rr", 977 "VPEXPANDD(Z|Z128|Z256)rr", 978 "VPEXPANDQ(Z|Z128|Z256)rr", 979 "VPMOVDB(Z|Z128|Z256)rr", 980 "VPMOVDW(Z|Z128|Z256)rr", 981 "VPMOVQB(Z|Z128|Z256)rr", 982 "VPMOVQW(Z|Z128|Z256)rr", 983 "VPMOVSDB(Z|Z128|Z256)rr", 984 "VPMOVSDW(Z|Z128|Z256)rr", 985 "VPMOVSQB(Z|Z128|Z256)rr", 986 "VPMOVSQD(Z|Z128|Z256)rr", 987 "VPMOVSQW(Z|Z128|Z256)rr", 988 "VPMOVSWB(Z|Z128|Z256)rr", 989 "VPMOVUSDB(Z|Z128|Z256)rr", 990 "VPMOVUSDW(Z|Z128|Z256)rr", 991 "VPMOVUSQB(Z|Z128|Z256)rr", 992 "VPMOVUSQD(Z|Z128|Z256)rr", 993 "VPMOVUSWB(Z|Z128|Z256)rr", 994 "VPMOVWB(Z|Z128|Z256)rr")>; 995 996def SKXWriteResGroup54 : SchedWriteRes<[SKXPort4,SKXPort5,SKXPort237]> { 997 let Latency = 4; 998 let NumMicroOps = 3; 999 let ReleaseAtCycles = [1,1,1]; 1000} 1001def: InstRW<[SKXWriteResGroup54], (instregex "IST(T?)_FP(16|32|64)m", 1002 "IST_F(16|32)m", 1003 "VPMOVQD(Z|Z128|Z256)mr(b?)")>; 1004 1005def SKXWriteResGroup55 : SchedWriteRes<[SKXPort0156]> { 1006 let Latency = 4; 1007 let NumMicroOps = 4; 1008 let ReleaseAtCycles = [4]; 1009} 1010def: InstRW<[SKXWriteResGroup55], (instrs FNCLEX)>; 1011 1012def SKXWriteResGroup56 : SchedWriteRes<[]> { 1013 let Latency = 0; 1014 let NumMicroOps = 4; 1015 let ReleaseAtCycles = []; 1016} 1017def: InstRW<[SKXWriteResGroup56], (instrs VZEROUPPER)>; 1018 1019def SKXWriteResGroup57 : SchedWriteRes<[SKXPort1,SKXPort6,SKXPort0156]> { 1020 let Latency = 4; 1021 let NumMicroOps = 4; 1022 let ReleaseAtCycles = [1,1,2]; 1023} 1024def: InstRW<[SKXWriteResGroup57], (instregex "LAR(16|32|64)rr")>; 1025 1026def SKXWriteResGroup61 : SchedWriteRes<[SKXPort5,SKXPort01]> { 1027 let Latency = 5; 1028 let NumMicroOps = 2; 1029 let ReleaseAtCycles = [1,1]; 1030} 1031def: InstRW<[SKXWriteResGroup61], (instregex "MMX_CVT(T?)PD2PIrr", 1032 "MMX_CVT(T?)PS2PIrr", 1033 "VCVTDQ2PDZ128rr", 1034 "VCVTPD2DQZ128rr", 1035 "(V?)CVT(T?)PD2DQrr", 1036 "VCVTPD2UDQZ128rr", 1037 "VCVTPS2PDZ128rr", 1038 "(V?)CVTPS2PDrr", 1039 "VCVTPS2QQZ128rr", 1040 "VCVTPS2UQQZ128rr", 1041 "VCVTQQ2PSZ128rr", 1042 "(V?)CVTSI(64)?2SDrr", 1043 "VCVTSI2SSZrr", 1044 "(V?)CVTSI2SSrr", 1045 "VCVTSI(64)?2SDZrr", 1046 "VCVTSS2SDZrr", 1047 "(V?)CVTSS2SDrr", 1048 "VCVTTPD2DQZ128rr", 1049 "VCVTTPD2UDQZ128rr", 1050 "VCVTTPS2QQZ128rr", 1051 "VCVTTPS2UQQZ128rr", 1052 "VCVTUDQ2PDZ128rr", 1053 "VCVTUQQ2PSZ128rr", 1054 "VCVTUSI2SSZrr", 1055 "VCVTUSI(64)?2SDZrr")>; 1056 1057def SKXWriteResGroup62 : SchedWriteRes<[SKXPort5,SKXPort015]> { 1058 let Latency = 5; 1059 let NumMicroOps = 3; 1060 let ReleaseAtCycles = [2,1]; 1061} 1062def: InstRW<[SKXWriteResGroup62], (instregex "VPCONFLICTQZ128rr")>; 1063 1064def SKXWriteResGroup63 : SchedWriteRes<[SKXPort1,SKXPort6,SKXPort06]> { 1065 let Latency = 5; 1066 let NumMicroOps = 3; 1067 let ReleaseAtCycles = [1,1,1]; 1068} 1069def: InstRW<[SKXWriteResGroup63], (instregex "STR(16|32|64)r")>; 1070 1071def SKXWriteResGroup65 : SchedWriteRes<[SKXPort4,SKXPort237,SKXPort01]> { 1072 let Latency = 5; 1073 let NumMicroOps = 3; 1074 let ReleaseAtCycles = [1,1,1]; 1075} 1076def: InstRW<[SKXWriteResGroup65], (instregex "VCVTPS2PHZ128mr(b?)", 1077 "VCVTPS2PHZ256mr(b?)", 1078 "VCVTPS2PHZmr(b?)")>; 1079 1080def SKXWriteResGroup66 : SchedWriteRes<[SKXPort4,SKXPort5,SKXPort237]> { 1081 let Latency = 5; 1082 let NumMicroOps = 4; 1083 let ReleaseAtCycles = [1,2,1]; 1084} 1085def: InstRW<[SKXWriteResGroup66], (instregex "VPMOVDB(Z|Z128|Z256)mr(b?)", 1086 "VPMOVDW(Z|Z128|Z256)mr(b?)", 1087 "VPMOVQB(Z|Z128|Z256)mr(b?)", 1088 "VPMOVQW(Z|Z128|Z256)mr(b?)", 1089 "VPMOVSDB(Z|Z128|Z256)mr(b?)", 1090 "VPMOVSDW(Z|Z128|Z256)mr(b?)", 1091 "VPMOVSQB(Z|Z128|Z256)mr(b?)", 1092 "VPMOVSQD(Z|Z128|Z256)mr(b?)", 1093 "VPMOVSQW(Z|Z128|Z256)mr(b?)", 1094 "VPMOVSWB(Z|Z128|Z256)mr(b?)", 1095 "VPMOVUSDB(Z|Z128|Z256)mr(b?)", 1096 "VPMOVUSDW(Z|Z128|Z256)mr(b?)", 1097 "VPMOVUSQB(Z|Z128|Z256)mr(b?)", 1098 "VPMOVUSQD(Z|Z128|Z256)mr(b?)", 1099 "VPMOVUSQW(Z|Z128|Z256)mr(b?)", 1100 "VPMOVUSWB(Z|Z128|Z256)mr(b?)", 1101 "VPMOVWB(Z|Z128|Z256)mr(b?)")>; 1102 1103def SKXWriteResGroup67 : SchedWriteRes<[SKXPort06,SKXPort0156]> { 1104 let Latency = 5; 1105 let NumMicroOps = 5; 1106 let ReleaseAtCycles = [1,4]; 1107} 1108def: InstRW<[SKXWriteResGroup67], (instrs XSETBV)>; 1109 1110def SKXWriteResGroup69 : SchedWriteRes<[SKXPort4,SKXPort237,SKXPort0156]> { 1111 let Latency = 5; 1112 let NumMicroOps = 6; 1113 let ReleaseAtCycles = [1,1,4]; 1114} 1115def: InstRW<[SKXWriteResGroup69], (instregex "PUSHF(16|64)")>; 1116 1117def SKXWriteResGroup71 : SchedWriteRes<[SKXPort23]> { 1118 let Latency = 6; 1119 let NumMicroOps = 1; 1120 let ReleaseAtCycles = [1]; 1121} 1122def: InstRW<[SKXWriteResGroup71], (instrs VBROADCASTSSrm, 1123 VPBROADCASTDrm, 1124 VPBROADCASTQrm)>; 1125def: InstRW<[SKXWriteResGroup71], (instregex "(V?)MOVSHDUPrm", 1126 "(V?)MOVSLDUPrm", 1127 "(V?)MOVDDUPrm")>; 1128 1129def SKXWriteResGroup72 : SchedWriteRes<[SKXPort5]> { 1130 let Latency = 6; 1131 let NumMicroOps = 2; 1132 let ReleaseAtCycles = [2]; 1133} 1134def: InstRW<[SKXWriteResGroup72], (instrs MMX_CVTPI2PSrr)>; 1135def: InstRW<[SKXWriteResGroup72], (instregex "VCOMPRESSPD(Z|Z128|Z256)rr", 1136 "VCOMPRESSPS(Z|Z128|Z256)rr", 1137 "VPCOMPRESSD(Z|Z128|Z256)rr", 1138 "VPCOMPRESSQ(Z|Z128|Z256)rr", 1139 "VPERMW(Z|Z128|Z256)rr")>; 1140 1141def SKXWriteResGroup73 : SchedWriteRes<[SKXPort0,SKXPort23]> { 1142 let Latency = 6; 1143 let NumMicroOps = 2; 1144 let ReleaseAtCycles = [1,1]; 1145} 1146def: InstRW<[SKXWriteResGroup73], (instrs MMX_PADDSBrm, 1147 MMX_PADDSWrm, 1148 MMX_PADDUSBrm, 1149 MMX_PADDUSWrm, 1150 MMX_PAVGBrm, 1151 MMX_PAVGWrm, 1152 MMX_PCMPEQBrm, 1153 MMX_PCMPEQDrm, 1154 MMX_PCMPEQWrm, 1155 MMX_PCMPGTBrm, 1156 MMX_PCMPGTDrm, 1157 MMX_PCMPGTWrm, 1158 MMX_PMAXSWrm, 1159 MMX_PMAXUBrm, 1160 MMX_PMINSWrm, 1161 MMX_PMINUBrm, 1162 MMX_PSUBSBrm, 1163 MMX_PSUBSWrm, 1164 MMX_PSUBUSBrm, 1165 MMX_PSUBUSWrm)>; 1166 1167def SKXWriteResGroup76 : SchedWriteRes<[SKXPort6,SKXPort23]> { 1168 let Latency = 6; 1169 let NumMicroOps = 2; 1170 let ReleaseAtCycles = [1,1]; 1171} 1172def: InstRW<[SKXWriteResGroup76], (instrs FARJMP64m)>; 1173def: InstRW<[SKXWriteResGroup76], (instregex "JMP(16|32|64)m")>; 1174 1175def SKXWriteResGroup79 : SchedWriteRes<[SKXPort23,SKXPort15]> { 1176 let Latency = 6; 1177 let NumMicroOps = 2; 1178 let ReleaseAtCycles = [1,1]; 1179} 1180def: InstRW<[SKXWriteResGroup79], (instregex "ANDN(32|64)rm", 1181 "MOVBE(16|32|64)rm")>; 1182 1183def SKXWriteResGroup80 : SchedWriteRes<[SKXPort23,SKXPort015]> { 1184 let Latency = 6; 1185 let NumMicroOps = 2; 1186 let ReleaseAtCycles = [1,1]; 1187} 1188def: InstRW<[SKXWriteResGroup80], (instregex "VMOV(64to|QI2)PQIZrm(b?)")>; 1189def: InstRW<[SKXWriteResGroup80], (instrs VMOVDI2PDIZrm)>; 1190 1191def SKXWriteResGroup81 : SchedWriteRes<[SKXPort23,SKXPort0156]> { 1192 let Latency = 6; 1193 let NumMicroOps = 2; 1194 let ReleaseAtCycles = [1,1]; 1195} 1196def: InstRW<[SKXWriteResGroup81], (instrs POP16r, POP32r, POP64r)>; 1197def: InstRW<[SKXWriteResGroup81], (instregex "POP(16|32|64)rmr")>; 1198 1199def SKXWriteResGroup82 : SchedWriteRes<[SKXPort5,SKXPort01]> { 1200 let Latency = 6; 1201 let NumMicroOps = 3; 1202 let ReleaseAtCycles = [2,1]; 1203} 1204def: InstRW<[SKXWriteResGroup82], (instregex "(V?)CVTSI642SSrr", 1205 "VCVTSI642SSZrr", 1206 "VCVTUSI642SSZrr")>; 1207 1208def SKXWriteResGroup84 : SchedWriteRes<[SKXPort1,SKXPort6,SKXPort06,SKXPort0156]> { 1209 let Latency = 6; 1210 let NumMicroOps = 4; 1211 let ReleaseAtCycles = [1,1,1,1]; 1212} 1213def: InstRW<[SKXWriteResGroup84], (instregex "SLDT(16|32|64)r")>; 1214 1215def SKXWriteResGroup86 : SchedWriteRes<[SKXPort4,SKXPort23,SKXPort237,SKXPort06]> { 1216 let Latency = 6; 1217 let NumMicroOps = 4; 1218 let ReleaseAtCycles = [1,1,1,1]; 1219} 1220def: InstRW<[SKXWriteResGroup86], (instregex "SAR(8|16|32|64)m(1|i)", 1221 "SHL(8|16|32|64)m(1|i)", 1222 "SHR(8|16|32|64)m(1|i)")>; 1223 1224def SKXWriteResGroup87 : SchedWriteRes<[SKXPort4,SKXPort23,SKXPort237,SKXPort0156]> { 1225 let Latency = 6; 1226 let NumMicroOps = 4; 1227 let ReleaseAtCycles = [1,1,1,1]; 1228} 1229def: InstRW<[SKXWriteResGroup87], (instregex "POP(16|32|64)rmm", 1230 "PUSH(16|32|64)rmm")>; 1231 1232def SKXWriteResGroup88 : SchedWriteRes<[SKXPort6,SKXPort0156]> { 1233 let Latency = 6; 1234 let NumMicroOps = 6; 1235 let ReleaseAtCycles = [1,5]; 1236} 1237def: InstRW<[SKXWriteResGroup88], (instrs STD)>; 1238 1239def SKXWriteResGroup89 : SchedWriteRes<[SKXPort23]> { 1240 let Latency = 7; 1241 let NumMicroOps = 1; 1242 let ReleaseAtCycles = [1]; 1243} 1244def: InstRW<[SKXWriteResGroup89], (instregex "LD_F(32|64|80)m")>; 1245def: InstRW<[SKXWriteResGroup89], (instrs VBROADCASTF128rm, 1246 VBROADCASTI128rm, 1247 VBROADCASTSDYrm, 1248 VBROADCASTSSYrm, 1249 VMOVDDUPYrm, 1250 VMOVSHDUPYrm, 1251 VMOVSLDUPYrm, 1252 VPBROADCASTDYrm, 1253 VPBROADCASTQYrm)>; 1254 1255def SKXWriteResGroup90 : SchedWriteRes<[SKXPort01,SKXPort5]> { 1256 let Latency = 7; 1257 let NumMicroOps = 2; 1258 let ReleaseAtCycles = [1,1]; 1259} 1260def: InstRW<[SKXWriteResGroup90], (instrs VCVTDQ2PDYrr)>; 1261 1262def SKXWriteResGroup92 : SchedWriteRes<[SKXPort5,SKXPort23]> { 1263 let Latency = 7; 1264 let NumMicroOps = 2; 1265 let ReleaseAtCycles = [1,1]; 1266} 1267def: InstRW<[SKXWriteResGroup92], (instregex "VMOVSDZrm(b?)", 1268 "VMOVSSZrm(b?)")>; 1269 1270def SKXWriteResGroup92a : SchedWriteRes<[SKXPort5,SKXPort23]> { 1271 let Latency = 6; 1272 let NumMicroOps = 2; 1273 let ReleaseAtCycles = [1,1]; 1274} 1275def: InstRW<[SKXWriteResGroup92a], (instregex "(V?)PMOV(SX|ZX)BDrm", 1276 "(V?)PMOV(SX|ZX)BQrm", 1277 "(V?)PMOV(SX|ZX)BWrm", 1278 "(V?)PMOV(SX|ZX)DQrm", 1279 "(V?)PMOV(SX|ZX)WDrm", 1280 "(V?)PMOV(SX|ZX)WQrm")>; 1281 1282def SKXWriteResGroup93 : SchedWriteRes<[SKXPort5,SKXPort01]> { 1283 let Latency = 7; 1284 let NumMicroOps = 2; 1285 let ReleaseAtCycles = [1,1]; 1286} 1287def: InstRW<[SKXWriteResGroup93], (instregex "VCVTDQ2PDZ256rr", 1288 "VCVTPD2DQ(Y|Z256)rr", 1289 "VCVTPD2UDQZ256rr", 1290 "VCVTPS2PD(Y|Z256)rr", 1291 "VCVTPS2QQZ256rr", 1292 "VCVTPS2UQQZ256rr", 1293 "VCVTQQ2PSZ256rr", 1294 "VCVTTPD2DQ(Y|Z256)rr", 1295 "VCVTTPD2UDQZ256rr", 1296 "VCVTTPS2QQZ256rr", 1297 "VCVTTPS2UQQZ256rr", 1298 "VCVTUDQ2PDZ256rr", 1299 "VCVTUQQ2PSZ256rr")>; 1300 1301def SKXWriteResGroup93z : SchedWriteRes<[SKXPort5,SKXPort05]> { 1302 let Latency = 7; 1303 let NumMicroOps = 2; 1304 let ReleaseAtCycles = [1,1]; 1305} 1306def: InstRW<[SKXWriteResGroup93z], (instrs VCVTDQ2PDZrr, 1307 VCVTPD2DQZrr, 1308 VCVTPD2UDQZrr, 1309 VCVTPS2PDZrr, 1310 VCVTPS2QQZrr, 1311 VCVTPS2UQQZrr, 1312 VCVTQQ2PSZrr, 1313 VCVTTPD2DQZrr, 1314 VCVTTPD2UDQZrr, 1315 VCVTTPS2QQZrr, 1316 VCVTTPS2UQQZrr, 1317 VCVTUDQ2PDZrr, 1318 VCVTUQQ2PSZrr)>; 1319 1320def SKXWriteResGroup95 : SchedWriteRes<[SKXPort23,SKXPort015]> { 1321 let Latency = 7; 1322 let NumMicroOps = 2; 1323 let ReleaseAtCycles = [1,1]; 1324} 1325def: InstRW<[SKXWriteResGroup95], (instrs VPBLENDDrmi)>; 1326def: InstRW<[SKXWriteResGroup95, ReadAfterVecXLd], 1327 (instregex "VBLENDMPDZ128rm(b?)", 1328 "VBLENDMPSZ128rm(b?)", 1329 "VBROADCASTI32X2Z128rm(b?)", 1330 "VBROADCASTSSZ128rm(b?)", 1331 "VINSERT(F|I)128rmi", 1332 "VMOVAPDZ128rm(b?)", 1333 "VMOVAPSZ128rm(b?)", 1334 "VMOVDDUPZ128rm(b?)", 1335 "VMOVDQA32Z128rm(b?)", 1336 "VMOVDQA64Z128rm(b?)", 1337 "VMOVDQU16Z128rm(b?)", 1338 "VMOVDQU32Z128rm(b?)", 1339 "VMOVDQU64Z128rm(b?)", 1340 "VMOVDQU8Z128rm(b?)", 1341 "VMOVSHDUPZ128rm(b?)", 1342 "VMOVSLDUPZ128rm(b?)", 1343 "VMOVUPDZ128rm(b?)", 1344 "VMOVUPSZ128rm(b?)", 1345 "VPADD(B|D|Q|W)Z128rm(b?)", 1346 "(V?)PADD(B|D|Q|W)rm", 1347 "VPBLENDM(B|D|Q|W)Z128rm(b?)", 1348 "VPBROADCASTDZ128rm(b?)", 1349 "VPBROADCASTQZ128rm(b?)", 1350 "VPSUB(B|D|Q|W)Z128rm(b?)", 1351 "(V?)PSUB(B|D|Q|W)rm", 1352 "VPTERNLOGDZ128rm(b?)i", 1353 "VPTERNLOGQZ128rm(b?)i")>; 1354 1355def SKXWriteResGroup96 : SchedWriteRes<[SKXPort5,SKXPort23]> { 1356 let Latency = 7; 1357 let NumMicroOps = 3; 1358 let ReleaseAtCycles = [2,1]; 1359} 1360def: InstRW<[SKXWriteResGroup96], (instrs MMX_PACKSSDWrm, 1361 MMX_PACKSSWBrm, 1362 MMX_PACKUSWBrm)>; 1363 1364def SKXWriteResGroup97 : SchedWriteRes<[SKXPort5,SKXPort015]> { 1365 let Latency = 7; 1366 let NumMicroOps = 3; 1367 let ReleaseAtCycles = [2,1]; 1368} 1369def: InstRW<[SKXWriteResGroup97], (instregex "VPERMI2WZ128rr", 1370 "VPERMI2WZ256rr", 1371 "VPERMI2WZrr", 1372 "VPERMT2WZ128rr", 1373 "VPERMT2WZ256rr", 1374 "VPERMT2WZrr")>; 1375 1376def SKXWriteResGroup99 : SchedWriteRes<[SKXPort23,SKXPort0156]> { 1377 let Latency = 7; 1378 let NumMicroOps = 3; 1379 let ReleaseAtCycles = [1,2]; 1380} 1381def: InstRW<[SKXWriteResGroup99], (instrs LEAVE, LEAVE64, 1382 SCASB, SCASL, SCASQ, SCASW)>; 1383 1384def SKXWriteResGroup100 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort01]> { 1385 let Latency = 7; 1386 let NumMicroOps = 3; 1387 let ReleaseAtCycles = [1,1,1]; 1388} 1389def: InstRW<[SKXWriteResGroup100], (instregex "(V?)CVT(T?)SS2SI64(Z?)rr", 1390 "VCVT(T?)SS2USI64Zrr")>; 1391 1392def SKXWriteResGroup101 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort05]> { 1393 let Latency = 7; 1394 let NumMicroOps = 3; 1395 let ReleaseAtCycles = [1,1,1]; 1396} 1397def: InstRW<[SKXWriteResGroup101], (instrs FLDCW16m)>; 1398 1399def SKXWriteResGroup103 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort0156]> { 1400 let Latency = 7; 1401 let NumMicroOps = 3; 1402 let ReleaseAtCycles = [1,1,1]; 1403} 1404def: InstRW<[SKXWriteResGroup103], (instregex "KMOV(B|D|Q|W)km")>; 1405 1406def SKXWriteResGroup104 : SchedWriteRes<[SKXPort6,SKXPort23,SKXPort0156]> { 1407 let Latency = 7; 1408 let NumMicroOps = 3; 1409 let ReleaseAtCycles = [1,1,1]; 1410} 1411def: InstRW<[SKXWriteResGroup104], (instrs LRET64, RET64)>; 1412 1413def SKXWriteResGroup106 : SchedWriteRes<[SKXPort4,SKXPort5,SKXPort237]> { 1414 let Latency = 7; 1415 let NumMicroOps = 4; 1416 let ReleaseAtCycles = [1,2,1]; 1417} 1418def: InstRW<[SKXWriteResGroup106], (instregex "VCOMPRESSPD(Z|Z128|Z256)mr(b?)", 1419 "VCOMPRESSPS(Z|Z128|Z256)mr(b?)", 1420 "VPCOMPRESSD(Z|Z128|Z256)mr(b?)", 1421 "VPCOMPRESSQ(Z|Z128|Z256)mr(b?)")>; 1422 1423def SKXWriteResGroup107 : SchedWriteRes<[SKXPort4,SKXPort23,SKXPort237,SKXPort06]> { 1424 let Latency = 7; 1425 let NumMicroOps = 5; 1426 let ReleaseAtCycles = [1,1,1,2]; 1427} 1428def: InstRW<[SKXWriteResGroup107], (instregex "ROL(8|16|32|64)m(1|i)", 1429 "ROR(8|16|32|64)m(1|i)")>; 1430 1431def SKXWriteResGroup107_1 : SchedWriteRes<[SKXPort06]> { 1432 let Latency = 2; 1433 let NumMicroOps = 2; 1434 let ReleaseAtCycles = [2]; 1435} 1436def: InstRW<[SKXWriteResGroup107_1], (instrs ROL8r1, ROL16r1, ROL32r1, ROL64r1, 1437 ROR8r1, ROR16r1, ROR32r1, ROR64r1)>; 1438 1439def SKXWriteResGroup108 : SchedWriteRes<[SKXPort4,SKXPort23,SKXPort237,SKXPort0156]> { 1440 let Latency = 7; 1441 let NumMicroOps = 5; 1442 let ReleaseAtCycles = [1,1,1,2]; 1443} 1444def: InstRW<[SKXWriteResGroup108], (instregex "XADD(8|16|32|64)rm")>; 1445 1446def SKXWriteResGroup109 : SchedWriteRes<[SKXPort4,SKXPort6,SKXPort23,SKXPort237,SKXPort0156]> { 1447 let Latency = 7; 1448 let NumMicroOps = 5; 1449 let ReleaseAtCycles = [1,1,1,1,1]; 1450} 1451def: InstRW<[SKXWriteResGroup109], (instregex "CALL(16|32|64)m")>; 1452def: InstRW<[SKXWriteResGroup109], (instrs FARCALL64m)>; 1453 1454def SKXWriteResGroup110 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort237,SKXPort0156]> { 1455 let Latency = 7; 1456 let NumMicroOps = 7; 1457 let ReleaseAtCycles = [1,2,2,2]; 1458} 1459def: InstRW<[SKXWriteResGroup110], (instrs VPSCATTERDQZ128mr, 1460 VPSCATTERQQZ128mr, 1461 VSCATTERDPDZ128mr, 1462 VSCATTERQPDZ128mr)>; 1463 1464def SKXWriteResGroup111 : SchedWriteRes<[SKXPort6,SKXPort06,SKXPort15,SKXPort0156]> { 1465 let Latency = 7; 1466 let NumMicroOps = 7; 1467 let ReleaseAtCycles = [1,3,1,2]; 1468} 1469def: InstRW<[SKXWriteResGroup111], (instrs LOOP)>; 1470 1471def SKXWriteResGroup112 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort237,SKXPort0156]> { 1472 let Latency = 7; 1473 let NumMicroOps = 11; 1474 let ReleaseAtCycles = [1,4,4,2]; 1475} 1476def: InstRW<[SKXWriteResGroup112], (instrs VPSCATTERDQZ256mr, 1477 VPSCATTERQQZ256mr, 1478 VSCATTERDPDZ256mr, 1479 VSCATTERQPDZ256mr)>; 1480 1481def SKXWriteResGroup113 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort237,SKXPort0156]> { 1482 let Latency = 7; 1483 let NumMicroOps = 19; 1484 let ReleaseAtCycles = [1,8,8,2]; 1485} 1486def: InstRW<[SKXWriteResGroup113], (instrs VPSCATTERDQZmr, 1487 VPSCATTERQDZmr, 1488 VPSCATTERQQZmr, 1489 VSCATTERDPDZmr, 1490 VSCATTERQPSZmr, 1491 VSCATTERQPDZmr)>; 1492 1493def SKXWriteResGroup114 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort5,SKXPort237,SKXPort0156]> { 1494 let Latency = 7; 1495 let NumMicroOps = 36; 1496 let ReleaseAtCycles = [1,16,1,16,2]; 1497} 1498def: InstRW<[SKXWriteResGroup114], (instrs VSCATTERDPSZmr)>; 1499 1500def SKXWriteResGroup118 : SchedWriteRes<[SKXPort1,SKXPort23]> { 1501 let Latency = 8; 1502 let NumMicroOps = 2; 1503 let ReleaseAtCycles = [1,1]; 1504} 1505def: InstRW<[SKXWriteResGroup118], (instregex "PDEP(32|64)rm", 1506 "PEXT(32|64)rm")>; 1507 1508def SKXWriteResGroup119 : SchedWriteRes<[SKXPort5,SKXPort23]> { 1509 let Latency = 8; 1510 let NumMicroOps = 2; 1511 let ReleaseAtCycles = [1,1]; 1512} 1513def: InstRW<[SKXWriteResGroup119], (instregex "FCOM(P?)(32|64)m", 1514 "VPBROADCASTB(Z|Z256)rm(b?)", 1515 "VPBROADCASTW(Z|Z256)rm(b?)")>; 1516def: InstRW<[SKXWriteResGroup119], (instrs VPBROADCASTBYrm, 1517 VPBROADCASTWYrm, 1518 VPMOVSXBDYrm, 1519 VPMOVSXBQYrm, 1520 VPMOVSXWQYrm)>; 1521 1522def SKXWriteResGroup121 : SchedWriteRes<[SKXPort23,SKXPort015]> { 1523 let Latency = 8; 1524 let NumMicroOps = 2; 1525 let ReleaseAtCycles = [1,1]; 1526} 1527def: InstRW<[SKXWriteResGroup121], (instrs VPBLENDDYrmi)>; 1528def: InstRW<[SKXWriteResGroup121, ReadAfterVecYLd], 1529 (instregex "VBLENDMPD(Z|Z256)rm(b?)", 1530 "VBLENDMPS(Z|Z256)rm(b?)", 1531 "VBROADCASTF32X2Z256rm(b?)", 1532 "VBROADCASTF32X2Zrm(b?)", 1533 "VBROADCASTF32X4Z256rm(b?)", 1534 "VBROADCASTF32X4Zrm(b?)", 1535 "VBROADCASTF32X8Zrm(b?)", 1536 "VBROADCASTF64X2Z256rm(b?)", 1537 "VBROADCASTF64X2Zrm(b?)", 1538 "VBROADCASTF64X4Zrm(b?)", 1539 "VBROADCASTI32X2Z256rm(b?)", 1540 "VBROADCASTI32X2Zrm(b?)", 1541 "VBROADCASTI32X4Z256rm(b?)", 1542 "VBROADCASTI32X4Zrm(b?)", 1543 "VBROADCASTI32X8Zrm(b?)", 1544 "VBROADCASTI64X2Z256rm(b?)", 1545 "VBROADCASTI64X2Zrm(b?)", 1546 "VBROADCASTI64X4Zrm(b?)", 1547 "VBROADCASTSD(Z|Z256)rm(b?)", 1548 "VBROADCASTSS(Z|Z256)rm(b?)", 1549 "VINSERTF32X4(Z|Z256)rm(b?)", 1550 "VINSERTF32X8Zrm(b?)", 1551 "VINSERTF64X2(Z|Z256)rm(b?)", 1552 "VINSERTF64X4Zrm(b?)", 1553 "VINSERTI32X4(Z|Z256)rm(b?)", 1554 "VINSERTI32X8Zrm(b?)", 1555 "VINSERTI64X2(Z|Z256)rm(b?)", 1556 "VINSERTI64X4Zrm(b?)", 1557 "VMOVAPD(Z|Z256)rm(b?)", 1558 "VMOVAPS(Z|Z256)rm(b?)", 1559 "VMOVDDUP(Z|Z256)rm(b?)", 1560 "VMOVDQA32(Z|Z256)rm(b?)", 1561 "VMOVDQA64(Z|Z256)rm(b?)", 1562 "VMOVDQU16(Z|Z256)rm(b?)", 1563 "VMOVDQU32(Z|Z256)rm(b?)", 1564 "VMOVDQU64(Z|Z256)rm(b?)", 1565 "VMOVDQU8(Z|Z256)rm(b?)", 1566 "VMOVSHDUP(Z|Z256)rm(b?)", 1567 "VMOVSLDUP(Z|Z256)rm(b?)", 1568 "VMOVUPD(Z|Z256)rm(b?)", 1569 "VMOVUPS(Z|Z256)rm(b?)", 1570 "VPADD(B|D|Q|W)Yrm", 1571 "VPADD(B|D|Q|W)(Z|Z256)rm(b?)", 1572 "VPBLENDM(B|D|Q|W)(Z|Z256)rm(b?)", 1573 "VPBROADCASTD(Z|Z256)rm(b?)", 1574 "VPBROADCASTQ(Z|Z256)rm(b?)", 1575 "VPSUB(B|D|Q|W)Yrm", 1576 "VPSUB(B|D|Q|W)(Z|Z256)rm(b?)", 1577 "VPTERNLOGD(Z|Z256)rm(b?)i", 1578 "VPTERNLOGQ(Z|Z256)rm(b?)i")>; 1579 1580def SKXWriteResGroup123 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort23]> { 1581 let Latency = 8; 1582 let NumMicroOps = 4; 1583 let ReleaseAtCycles = [1,2,1]; 1584} 1585def: InstRW<[SKXWriteResGroup123], (instregex "MMX_PH(ADD|SUB)SWrm")>; 1586 1587def SKXWriteResGroup127 : SchedWriteRes<[SKXPort23,SKXPort237,SKXPort06,SKXPort0156]> { 1588 let Latency = 8; 1589 let NumMicroOps = 5; 1590 let ReleaseAtCycles = [1,1,1,2]; 1591} 1592def: InstRW<[SKXWriteResGroup127], (instregex "RCL(8|16|32|64)m(1|i)", 1593 "RCR(8|16|32|64)m(1|i)")>; 1594 1595def SKXWriteResGroup128 : SchedWriteRes<[SKXPort4,SKXPort23,SKXPort237,SKXPort06]> { 1596 let Latency = 8; 1597 let NumMicroOps = 6; 1598 let ReleaseAtCycles = [1,1,1,3]; 1599} 1600def: InstRW<[SKXWriteResGroup128], (instregex "ROL(8|16|32|64)mCL", 1601 "ROR(8|16|32|64)mCL", 1602 "SAR(8|16|32|64)mCL", 1603 "SHL(8|16|32|64)mCL", 1604 "SHR(8|16|32|64)mCL")>; 1605 1606def SKXWriteResGroup130 : SchedWriteRes<[SKXPort4,SKXPort23,SKXPort237,SKXPort06,SKXPort0156]> { 1607 let Latency = 8; 1608 let NumMicroOps = 6; 1609 let ReleaseAtCycles = [1,1,1,2,1]; 1610} 1611def: SchedAlias<WriteADCRMW, SKXWriteResGroup130>; 1612 1613def SKXWriteResGroup131 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort5,SKXPort237,SKXPort0156]> { 1614 let Latency = 8; 1615 let NumMicroOps = 8; 1616 let ReleaseAtCycles = [1,2,1,2,2]; 1617} 1618def: InstRW<[SKXWriteResGroup131], (instrs VPSCATTERQDZ128mr, 1619 VPSCATTERQDZ256mr, 1620 VSCATTERQPSZ128mr, 1621 VSCATTERQPSZ256mr)>; 1622 1623def SKXWriteResGroup132 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort5,SKXPort237,SKXPort0156]> { 1624 let Latency = 8; 1625 let NumMicroOps = 12; 1626 let ReleaseAtCycles = [1,4,1,4,2]; 1627} 1628def: InstRW<[SKXWriteResGroup132], (instrs VPSCATTERDDZ128mr, 1629 VSCATTERDPSZ128mr)>; 1630 1631def SKXWriteResGroup133 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort5,SKXPort237,SKXPort0156]> { 1632 let Latency = 8; 1633 let NumMicroOps = 20; 1634 let ReleaseAtCycles = [1,8,1,8,2]; 1635} 1636def: InstRW<[SKXWriteResGroup133], (instrs VPSCATTERDDZ256mr, 1637 VSCATTERDPSZ256mr)>; 1638 1639def SKXWriteResGroup134 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort5,SKXPort237,SKXPort0156]> { 1640 let Latency = 8; 1641 let NumMicroOps = 36; 1642 let ReleaseAtCycles = [1,16,1,16,2]; 1643} 1644def: InstRW<[SKXWriteResGroup134], (instrs VPSCATTERDDZmr)>; 1645 1646def SKXWriteResGroup135 : SchedWriteRes<[SKXPort0,SKXPort23]> { 1647 let Latency = 9; 1648 let NumMicroOps = 2; 1649 let ReleaseAtCycles = [1,1]; 1650} 1651def: InstRW<[SKXWriteResGroup135], (instrs MMX_CVTPI2PSrm)>; 1652 1653def SKXWriteResGroup136 : SchedWriteRes<[SKXPort5,SKXPort23]> { 1654 let Latency = 9; 1655 let NumMicroOps = 2; 1656 let ReleaseAtCycles = [1,1]; 1657} 1658def: InstRW<[SKXWriteResGroup136], (instrs VPMOVSXBWYrm, 1659 VPMOVSXDQYrm, 1660 VPMOVSXWDYrm, 1661 VPMOVZXWDYrm)>; 1662def: InstRW<[SKXWriteResGroup136], (instregex "VALIGN(D|Q)Z128rm(b?)i", 1663 "VFPCLASSSDZm(b?)i", 1664 "VFPCLASSSSZm(b?)i", 1665 "(V?)PCMPGTQrm", 1666 "VPERMI2DZ128rm(b?)", 1667 "VPERMI2PDZ128rm(b?)", 1668 "VPERMI2PSZ128rm(b?)", 1669 "VPERMI2QZ128rm(b?)", 1670 "VPERMT2DZ128rm(b?)", 1671 "VPERMT2PDZ128rm(b?)", 1672 "VPERMT2PSZ128rm(b?)", 1673 "VPERMT2QZ128rm(b?)", 1674 "VPMAXSQZ128rm(b?)", 1675 "VPMAXUQZ128rm(b?)", 1676 "VPMINSQZ128rm(b?)", 1677 "VPMINUQZ128rm(b?)")>; 1678 1679def SKXWriteResGroup136_2 : SchedWriteRes<[SKXPort5,SKXPort23]> { 1680 let Latency = 10; 1681 let NumMicroOps = 2; 1682 let ReleaseAtCycles = [1,1]; 1683} 1684def: InstRW<[SKXWriteResGroup136_2], (instregex "VCMP(PD|PS)Z128rm(b?)i", 1685 "VCMP(SD|SS)Zrm", 1686 "VFPCLASSPDZ128m(b?)i", 1687 "VFPCLASSPSZ128m(b?)i", 1688 "VPCMPBZ128rm(b?)i", 1689 "VPCMPDZ128rm(b?)i", 1690 "VPCMPEQ(B|D|Q|W)Z128rm(b?)", 1691 "VPCMPGT(B|D|Q|W)Z128rm(b?)", 1692 "VPCMPQZ128rm(b?)i", 1693 "VPCMPU(B|D|Q|W)Z128rm(b?)i", 1694 "VPCMPWZ128rm(b?)i", 1695 "VPTESTMBZ128rm(b?)", 1696 "VPTESTMDZ128rm(b?)", 1697 "VPTESTMQZ128rm(b?)", 1698 "VPTESTMWZ128rm(b?)", 1699 "VPTESTNMBZ128rm(b?)", 1700 "VPTESTNMDZ128rm(b?)", 1701 "VPTESTNMQZ128rm(b?)", 1702 "VPTESTNMWZ128rm(b?)")>; 1703 1704def SKXWriteResGroup137 : SchedWriteRes<[SKXPort23,SKXPort01]> { 1705 let Latency = 9; 1706 let NumMicroOps = 2; 1707 let ReleaseAtCycles = [1,1]; 1708} 1709def: InstRW<[SKXWriteResGroup137], (instregex "MMX_CVT(T?)PS2PIrm", 1710 "(V?)CVTPS2PDrm")>; 1711 1712def SKXWriteResGroup143 : SchedWriteRes<[SKXPort5,SKXPort01,SKXPort23]> { 1713 let Latency = 9; 1714 let NumMicroOps = 4; 1715 let ReleaseAtCycles = [2,1,1]; 1716} 1717def: InstRW<[SKXWriteResGroup143], (instregex "(V?)PHADDSWrm", 1718 "(V?)PHSUBSWrm")>; 1719 1720def SKXWriteResGroup146 : SchedWriteRes<[SKXPort1,SKXPort6,SKXPort23,SKXPort0156]> { 1721 let Latency = 9; 1722 let NumMicroOps = 5; 1723 let ReleaseAtCycles = [1,2,1,1]; 1724} 1725def: InstRW<[SKXWriteResGroup146], (instregex "LAR(16|32|64)rm", 1726 "LSL(16|32|64)rm")>; 1727 1728def SKXWriteResGroup148 : SchedWriteRes<[SKXPort5,SKXPort23]> { 1729 let Latency = 10; 1730 let NumMicroOps = 2; 1731 let ReleaseAtCycles = [1,1]; 1732} 1733def: InstRW<[SKXWriteResGroup148], (instrs VPCMPGTQYrm)>; 1734def: InstRW<[SKXWriteResGroup148], (instregex "(ADD|SUB|SUBR)_F(32|64)m", 1735 "ILD_F(16|32|64)m", 1736 "VALIGND(Z|Z256)rm(b?)i", 1737 "VALIGNQ(Z|Z256)rm(b?)i", 1738 "VPMAXSQ(Z|Z256)rm(b?)", 1739 "VPMAXUQ(Z|Z256)rm(b?)", 1740 "VPMINSQ(Z|Z256)rm(b?)", 1741 "VPMINUQ(Z|Z256)rm(b?)")>; 1742 1743def SKXWriteResGroup148_2 : SchedWriteRes<[SKXPort5,SKXPort23]> { 1744 let Latency = 11; 1745 let NumMicroOps = 2; 1746 let ReleaseAtCycles = [1,1]; 1747} 1748def: InstRW<[SKXWriteResGroup148_2], (instregex "VCMPPD(Z|Z256)rm(b?)i", 1749 "VCMPPS(Z|Z256)rm(b?)i", 1750 "VFPCLASSPD(Z|Z256)m(b?)i", 1751 "VFPCLASSPS(Z|Z256)m(b?)i", 1752 "VPCMPB(Z|Z256)rm(b?)i", 1753 "VPCMPD(Z|Z256)rm(b?)i", 1754 "VPCMPEQB(Z|Z256)rm(b?)", 1755 "VPCMPEQD(Z|Z256)rm(b?)", 1756 "VPCMPEQQ(Z|Z256)rm(b?)", 1757 "VPCMPEQW(Z|Z256)rm(b?)", 1758 "VPCMPGTB(Z|Z256)rm(b?)", 1759 "VPCMPGTD(Z|Z256)rm(b?)", 1760 "VPCMPGTQ(Z|Z256)rm(b?)", 1761 "VPCMPGTW(Z|Z256)rm(b?)", 1762 "VPCMPQ(Z|Z256)rm(b?)i", 1763 "VPCMPU(B|D|Q|W)Z256rm(b?)i", 1764 "VPCMPU(B|D|Q|W)Zrm(b?)i", 1765 "VPCMPW(Z|Z256)rm(b?)i", 1766 "VPTESTM(B|D|Q|W)Z256rm(b?)", 1767 "VPTESTM(B|D|Q|W)Zrm(b?)", 1768 "VPTESTNM(B|D|Q|W)Z256rm(b?)", 1769 "VPTESTNM(B|D|Q|W)Zrm(b?)")>; 1770 1771def SKXWriteResGroup149 : SchedWriteRes<[SKXPort23,SKXPort01]> { 1772 let Latency = 10; 1773 let NumMicroOps = 2; 1774 let ReleaseAtCycles = [1,1]; 1775} 1776def: InstRW<[SKXWriteResGroup149], (instregex "VCVTDQ2PDZ128rm(b?)", 1777 "VCVTDQ2PSZ128rm(b?)", 1778 "(V?)CVTDQ2PSrm", 1779 "VCVTPD2QQZ128rm(b?)", 1780 "VCVTPD2UQQZ128rm(b?)", 1781 "VCVTPH2PSZ128rm(b?)", 1782 "VCVTPS2DQZ128rm(b?)", 1783 "(V?)CVTPS2DQrm", 1784 "VCVTPS2PDZ128rm(b?)", 1785 "VCVTPS2QQZ128rm(b?)", 1786 "VCVTPS2UDQZ128rm(b?)", 1787 "VCVTPS2UQQZ128rm(b?)", 1788 "VCVTQQ2PDZ128rm(b?)", 1789 "VCVTQQ2PSZ128rm(b?)", 1790 "VCVTSS2SDZrm", 1791 "(V?)CVTSS2SDrm", 1792 "VCVTTPD2QQZ128rm(b?)", 1793 "VCVTTPD2UQQZ128rm(b?)", 1794 "VCVTTPS2DQZ128rm(b?)", 1795 "(V?)CVTTPS2DQrm", 1796 "VCVTTPS2QQZ128rm(b?)", 1797 "VCVTTPS2UDQZ128rm(b?)", 1798 "VCVTTPS2UQQZ128rm(b?)", 1799 "VCVTUDQ2PDZ128rm(b?)", 1800 "VCVTUDQ2PSZ128rm(b?)", 1801 "VCVTUQQ2PDZ128rm(b?)", 1802 "VCVTUQQ2PSZ128rm(b?)")>; 1803 1804def SKXWriteResGroup151 : SchedWriteRes<[SKXPort5,SKXPort23]> { 1805 let Latency = 10; 1806 let NumMicroOps = 3; 1807 let ReleaseAtCycles = [2,1]; 1808} 1809def: InstRW<[SKXWriteResGroup151], (instregex "VEXPANDPDZ128rm(b?)", 1810 "VEXPANDPSZ128rm(b?)", 1811 "VPEXPANDDZ128rm(b?)", 1812 "VPEXPANDQZ128rm(b?)")>; 1813 1814def SKXWriteResGroup154 : SchedWriteRes<[SKXPort5,SKXPort01,SKXPort23]> { 1815 let Latency = 10; 1816 let NumMicroOps = 4; 1817 let ReleaseAtCycles = [2,1,1]; 1818} 1819def: InstRW<[SKXWriteResGroup154], (instrs VPHADDSWYrm, 1820 VPHSUBSWYrm)>; 1821 1822def SKXWriteResGroup157 : SchedWriteRes<[SKXPort4,SKXPort6,SKXPort23,SKXPort237,SKXPort06,SKXPort0156]> { 1823 let Latency = 10; 1824 let NumMicroOps = 8; 1825 let ReleaseAtCycles = [1,1,1,1,1,3]; 1826} 1827def: InstRW<[SKXWriteResGroup157], (instregex "XCHG(8|16|32|64)rm")>; 1828 1829def SKXWriteResGroup160 : SchedWriteRes<[SKXPort0,SKXPort23]> { 1830 let Latency = 11; 1831 let NumMicroOps = 2; 1832 let ReleaseAtCycles = [1,1]; 1833} 1834def: InstRW<[SKXWriteResGroup160], (instregex "MUL_F(32|64)m")>; 1835 1836def SKXWriteResGroup161 : SchedWriteRes<[SKXPort23,SKXPort01]> { 1837 let Latency = 11; 1838 let NumMicroOps = 2; 1839 let ReleaseAtCycles = [1,1]; 1840} 1841def: InstRW<[SKXWriteResGroup161], (instrs VCVTDQ2PSYrm, 1842 VCVTPS2PDYrm)>; 1843def: InstRW<[SKXWriteResGroup161], (instregex "VCVTDQ2(PD|PS)(Z|Z256)rm(b?)", 1844 "VCVTPH2PS(Z|Z256)rm(b?)", 1845 "VCVTPS2PD(Z|Z256)rm(b?)", 1846 "VCVTQQ2PD(Z|Z256)rm(b?)", 1847 "VCVTQQ2PSZ256rm(b?)", 1848 "VCVT(T?)PD2QQ(Z|Z256)rm(b?)", 1849 "VCVT(T?)PD2UQQ(Z|Z256)rm(b?)", 1850 "VCVT(T?)PS2DQYrm", 1851 "VCVT(T?)PS2DQ(Z|Z256)rm(b?)", 1852 "VCVT(T?)PS2QQZ256rm(b?)", 1853 "VCVT(T?)PS2UDQ(Z|Z256)rm(b?)", 1854 "VCVT(T?)PS2UQQZ256rm(b?)", 1855 "VCVTUDQ2(PD|PS)(Z|Z256)rm(b?)", 1856 "VCVTUQQ2PD(Z|Z256)rm(b?)", 1857 "VCVTUQQ2PSZ256rm(b?)")>; 1858 1859def SKXWriteResGroup162 : SchedWriteRes<[SKXPort5,SKXPort23]> { 1860 let Latency = 11; 1861 let NumMicroOps = 3; 1862 let ReleaseAtCycles = [2,1]; 1863} 1864def: InstRW<[SKXWriteResGroup162], (instregex "FICOM(P?)(16|32)m", 1865 "VEXPANDPD(Z|Z256)rm(b?)", 1866 "VEXPANDPS(Z|Z256)rm(b?)", 1867 "VPEXPANDD(Z|Z256)rm(b?)", 1868 "VPEXPANDQ(Z|Z256)rm(b?)")>; 1869 1870def SKXWriteResGroup164 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort23]> { 1871 let Latency = 11; 1872 let NumMicroOps = 3; 1873 let ReleaseAtCycles = [1,1,1]; 1874} 1875def: InstRW<[SKXWriteResGroup164], (instregex "(V?)CVTDQ2PDrm")>; 1876 1877def SKXWriteResGroup166 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort01]> { 1878 let Latency = 11; 1879 let NumMicroOps = 3; 1880 let ReleaseAtCycles = [1,1,1]; 1881} 1882def: InstRW<[SKXWriteResGroup166], (instrs CVTPD2DQrm, 1883 CVTTPD2DQrm, 1884 MMX_CVTPD2PIrm, 1885 MMX_CVTTPD2PIrm)>; 1886 1887def SKXWriteResGroup167 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort015]> { 1888 let Latency = 11; 1889 let NumMicroOps = 4; 1890 let ReleaseAtCycles = [2,1,1]; 1891} 1892def: InstRW<[SKXWriteResGroup167], (instregex "VPCONFLICTQZ128rm(b?)")>; 1893 1894def SKXWriteResGroup169 : SchedWriteRes<[SKXPort1,SKXPort06,SKXPort0156]> { 1895 let Latency = 11; 1896 let NumMicroOps = 7; 1897 let ReleaseAtCycles = [2,3,2]; 1898} 1899def: InstRW<[SKXWriteResGroup169], (instregex "RCL(16|32|64)rCL", 1900 "RCR(16|32|64)rCL")>; 1901 1902def SKXWriteResGroup170 : SchedWriteRes<[SKXPort1,SKXPort06,SKXPort15,SKXPort0156]> { 1903 let Latency = 11; 1904 let NumMicroOps = 9; 1905 let ReleaseAtCycles = [1,5,1,2]; 1906} 1907def: InstRW<[SKXWriteResGroup170], (instrs RCL8rCL)>; 1908 1909def SKXWriteResGroup171 : SchedWriteRes<[SKXPort06,SKXPort0156]> { 1910 let Latency = 11; 1911 let NumMicroOps = 11; 1912 let ReleaseAtCycles = [2,9]; 1913} 1914def: InstRW<[SKXWriteResGroup171], (instrs LOOPE, LOOPNE)>; 1915 1916def SKXWriteResGroup174 : SchedWriteRes<[SKXPort01]> { 1917 let Latency = 15; 1918 let NumMicroOps = 3; 1919 let ReleaseAtCycles = [3]; 1920} 1921def: InstRW<[SKXWriteResGroup174], (instregex "VPMULLQ(Z128|Z256)rr")>; 1922 1923def SKXWriteResGroup174z : SchedWriteRes<[SKXPort05]> { 1924 let Latency = 15; 1925 let NumMicroOps = 3; 1926 let ReleaseAtCycles = [3]; 1927} 1928def: InstRW<[SKXWriteResGroup174z], (instregex "VPMULLQZrr")>; 1929 1930def SKXWriteResGroup175 : SchedWriteRes<[SKXPort5,SKXPort23]> { 1931 let Latency = 12; 1932 let NumMicroOps = 3; 1933 let ReleaseAtCycles = [2,1]; 1934} 1935def: InstRW<[SKXWriteResGroup175], (instregex "VPERMWZ128rm(b?)")>; 1936 1937def SKXWriteResGroup176 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort01]> { 1938 let Latency = 12; 1939 let NumMicroOps = 3; 1940 let ReleaseAtCycles = [1,1,1]; 1941} 1942def: InstRW<[SKXWriteResGroup176], (instregex "VCVT(T?)SD2USIZrm(b?)", 1943 "VCVT(T?)SS2USI64Zrm(b?)")>; 1944 1945def SKXWriteResGroup177 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort01]> { 1946 let Latency = 12; 1947 let NumMicroOps = 3; 1948 let ReleaseAtCycles = [1,1,1]; 1949} 1950def: InstRW<[SKXWriteResGroup177], (instregex "VCVT(T?)PS2QQZrm(b?)", 1951 "VCVT(T?)PS2UQQZrm(b?)")>; 1952 1953def SKXWriteResGroup180 : SchedWriteRes<[SKXPort5,SKXPort23]> { 1954 let Latency = 13; 1955 let NumMicroOps = 3; 1956 let ReleaseAtCycles = [2,1]; 1957} 1958def: InstRW<[SKXWriteResGroup180], (instregex "(ADD|SUB|SUBR)_FI(16|32)m", 1959 "VPERMWZ256rm(b?)", 1960 "VPERMWZrm(b?)")>; 1961 1962def SKXWriteResGroup181 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort23]> { 1963 let Latency = 13; 1964 let NumMicroOps = 3; 1965 let ReleaseAtCycles = [1,1,1]; 1966} 1967def: InstRW<[SKXWriteResGroup181], (instrs VCVTDQ2PDYrm)>; 1968 1969def SKXWriteResGroup183 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort015]> { 1970 let Latency = 13; 1971 let NumMicroOps = 4; 1972 let ReleaseAtCycles = [2,1,1]; 1973} 1974def: InstRW<[SKXWriteResGroup183], (instregex "VPERMI2WZ128rm(b?)", 1975 "VPERMT2WZ128rm(b?)")>; 1976 1977def SKXWriteResGroup187 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort23]> { 1978 let Latency = 14; 1979 let NumMicroOps = 3; 1980 let ReleaseAtCycles = [1,1,1]; 1981} 1982def: InstRW<[SKXWriteResGroup187], (instregex "MUL_FI(16|32)m")>; 1983 1984def SKXWriteResGroup188 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort01]> { 1985 let Latency = 14; 1986 let NumMicroOps = 3; 1987 let ReleaseAtCycles = [1,1,1]; 1988} 1989def: InstRW<[SKXWriteResGroup188], (instregex "VCVTPD2DQZrm(b?)", 1990 "VCVTPD2UDQZrm(b?)", 1991 "VCVTQQ2PSZrm(b?)", 1992 "VCVTTPD2DQZrm(b?)", 1993 "VCVTTPD2UDQZrm(b?)", 1994 "VCVTUQQ2PSZrm(b?)")>; 1995 1996def SKXWriteResGroup189 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort015]> { 1997 let Latency = 14; 1998 let NumMicroOps = 4; 1999 let ReleaseAtCycles = [2,1,1]; 2000} 2001def: InstRW<[SKXWriteResGroup189], (instregex "VPERMI2WZ256rm(b?)", 2002 "VPERMI2WZrm(b?)", 2003 "VPERMT2WZ256rm(b?)", 2004 "VPERMT2WZrm(b?)")>; 2005 2006def SKXWriteResGroup190 : SchedWriteRes<[SKXPort1,SKXPort06,SKXPort15,SKXPort0156]> { 2007 let Latency = 14; 2008 let NumMicroOps = 10; 2009 let ReleaseAtCycles = [2,4,1,3]; 2010} 2011def: InstRW<[SKXWriteResGroup190], (instrs RCR8rCL)>; 2012 2013def SKXWriteResGroup191 : SchedWriteRes<[SKXPort0]> { 2014 let Latency = 15; 2015 let NumMicroOps = 1; 2016 let ReleaseAtCycles = [1]; 2017} 2018def: InstRW<[SKXWriteResGroup191], (instregex "DIVR_(FPrST0|FST0r|FrST0)")>; 2019 2020def SKXWriteResGroup194 : SchedWriteRes<[SKXPort1,SKXPort5,SKXPort01,SKXPort23,SKXPort015]> { 2021 let Latency = 15; 2022 let NumMicroOps = 8; 2023 let ReleaseAtCycles = [1,2,2,1,2]; 2024} 2025def: InstRW<[SKXWriteResGroup194], (instregex "VPCONFLICTDZ128rm(b?)")>; 2026 2027def SKXWriteResGroup195 : SchedWriteRes<[SKXPort1,SKXPort23,SKXPort237,SKXPort06,SKXPort15,SKXPort0156]> { 2028 let Latency = 15; 2029 let NumMicroOps = 10; 2030 let ReleaseAtCycles = [1,1,1,5,1,1]; 2031} 2032def: InstRW<[SKXWriteResGroup195], (instregex "RCL(8|16|32|64)mCL")>; 2033 2034def SKXWriteResGroup199 : SchedWriteRes<[SKXPort4,SKXPort23,SKXPort237,SKXPort06,SKXPort15,SKXPort0156]> { 2035 let Latency = 16; 2036 let NumMicroOps = 14; 2037 let ReleaseAtCycles = [1,1,1,4,2,5]; 2038} 2039def: InstRW<[SKXWriteResGroup199], (instrs CMPXCHG8B)>; 2040 2041def SKXWriteResGroup200 : SchedWriteRes<[SKXPort1, SKXPort05, SKXPort6]> { 2042 let Latency = 12; 2043 let NumMicroOps = 34; 2044 let ReleaseAtCycles = [1, 4, 5]; 2045} 2046def: InstRW<[SKXWriteResGroup200], (instrs VZEROALL)>; 2047 2048def SKXWriteResGroup202 : SchedWriteRes<[SKXPort0,SKXPort1,SKXPort5,SKXPort6,SKXPort05,SKXPort0156]> { 2049 let Latency = 17; 2050 let NumMicroOps = 15; 2051 let ReleaseAtCycles = [2,1,2,4,2,4]; 2052} 2053def: InstRW<[SKXWriteResGroup202], (instrs XCH_F)>; 2054 2055def SKXWriteResGroup205 : SchedWriteRes<[SKXPort23,SKXPort01]> { 2056 let Latency = 21; 2057 let NumMicroOps = 4; 2058 let ReleaseAtCycles = [1,3]; 2059} 2060def: InstRW<[SKXWriteResGroup205], (instregex "VPMULLQZ128rm(b?)")>; 2061 2062def SKXWriteResGroup207 : SchedWriteRes<[SKXPort5,SKXPort6,SKXPort06,SKXPort0156]> { 2063 let Latency = 18; 2064 let NumMicroOps = 8; 2065 let ReleaseAtCycles = [1,1,1,5]; 2066} 2067def: InstRW<[SKXWriteResGroup207], (instrs CPUID, RDTSC)>; 2068 2069def SKXWriteResGroup208 : SchedWriteRes<[SKXPort1,SKXPort23,SKXPort237,SKXPort06,SKXPort15,SKXPort0156]> { 2070 let Latency = 18; 2071 let NumMicroOps = 11; 2072 let ReleaseAtCycles = [2,1,1,4,1,2]; 2073} 2074def: InstRW<[SKXWriteResGroup208], (instregex "RCR(8|16|32|64)mCL")>; 2075 2076def SKXWriteResGroup211 : SchedWriteRes<[SKXPort23,SKXPort01]> { 2077 let Latency = 22; 2078 let NumMicroOps = 4; 2079 let ReleaseAtCycles = [1,3]; 2080} 2081def: InstRW<[SKXWriteResGroup211], (instregex "VPMULLQZ256rm(b?)")>; 2082 2083def SKXWriteResGroup211_1 : SchedWriteRes<[SKXPort23,SKXPort05]> { 2084 let Latency = 22; 2085 let NumMicroOps = 4; 2086 let ReleaseAtCycles = [1,3]; 2087} 2088def: InstRW<[SKXWriteResGroup211_1], (instregex "VPMULLQZrm(b?)")>; 2089 2090def SKXWriteResGroup215 : SchedWriteRes<[SKXPort0]> { 2091 let Latency = 20; 2092 let NumMicroOps = 1; 2093 let ReleaseAtCycles = [1]; 2094} 2095def: InstRW<[SKXWriteResGroup215], (instregex "DIV_(FPrST0|FST0r|FrST0)")>; 2096 2097def SKXWriteGatherEVEX2 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015,SKXPort0156]> { 2098 let Latency = 17; 2099 let NumMicroOps = 5; // 2 uops perform multiple loads 2100 let ReleaseAtCycles = [1,2,1,1]; 2101} 2102def: InstRW<[SKXWriteGatherEVEX2], (instrs VGATHERQPSZ128rm, VPGATHERQDZ128rm, 2103 VGATHERDPDZ128rm, VPGATHERDQZ128rm, 2104 VGATHERQPDZ128rm, VPGATHERQQZ128rm)>; 2105 2106def SKXWriteGatherEVEX4 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015,SKXPort0156]> { 2107 let Latency = 19; 2108 let NumMicroOps = 5; // 2 uops perform multiple loads 2109 let ReleaseAtCycles = [1,4,1,1]; 2110} 2111def: InstRW<[SKXWriteGatherEVEX4], (instrs VGATHERQPSZ256rm, VPGATHERQDZ256rm, 2112 VGATHERQPDZ256rm, VPGATHERQQZ256rm, 2113 VGATHERDPSZ128rm, VPGATHERDDZ128rm, 2114 VGATHERDPDZ256rm, VPGATHERDQZ256rm)>; 2115 2116def SKXWriteGatherEVEX8 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015,SKXPort0156]> { 2117 let Latency = 21; 2118 let NumMicroOps = 5; // 2 uops perform multiple loads 2119 let ReleaseAtCycles = [1,8,1,1]; 2120} 2121def: InstRW<[SKXWriteGatherEVEX8], (instrs VGATHERDPSZ256rm, VPGATHERDDZ256rm, 2122 VGATHERDPDZrm, VPGATHERDQZrm, 2123 VGATHERQPDZrm, VPGATHERQQZrm, 2124 VGATHERQPSZrm, VPGATHERQDZrm)>; 2125 2126def SKXWriteGatherEVEX16 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015,SKXPort0156]> { 2127 let Latency = 25; 2128 let NumMicroOps = 5; // 2 uops perform multiple loads 2129 let ReleaseAtCycles = [1,16,1,1]; 2130} 2131def: InstRW<[SKXWriteGatherEVEX16], (instrs VGATHERDPSZrm, VPGATHERDDZrm)>; 2132 2133def SKXWriteResGroup219 : SchedWriteRes<[SKXPort4,SKXPort5,SKXPort6,SKXPort23,SKXPort237,SKXPort06,SKXPort0156]> { 2134 let Latency = 20; 2135 let NumMicroOps = 8; 2136 let ReleaseAtCycles = [1,1,1,1,1,1,2]; 2137} 2138def: InstRW<[SKXWriteResGroup219], (instrs INSB, INSL, INSW)>; 2139 2140def SKXWriteResGroup220 : SchedWriteRes<[SKXPort5,SKXPort6,SKXPort0156]> { 2141 let Latency = 20; 2142 let NumMicroOps = 10; 2143 let ReleaseAtCycles = [1,2,7]; 2144} 2145def: InstRW<[SKXWriteResGroup220], (instrs MWAITrr)>; 2146 2147def SKXWriteResGroup223 : SchedWriteRes<[SKXPort0,SKXPort23]> { 2148 let Latency = 22; 2149 let NumMicroOps = 2; 2150 let ReleaseAtCycles = [1,1]; 2151} 2152def: InstRW<[SKXWriteResGroup223], (instregex "DIV_F(32|64)m")>; 2153 2154def SKXWriteResGroupVEX2 : SchedWriteRes<[SKXPort0, SKXPort23, SKXPort5, SKXPort015]> { 2155 let Latency = 18; 2156 let NumMicroOps = 5; // 2 uops perform multiple loads 2157 let ReleaseAtCycles = [1,2,1,1]; 2158} 2159def: InstRW<[SKXWriteResGroupVEX2], (instrs VGATHERDPDrm, VPGATHERDQrm, 2160 VGATHERQPDrm, VPGATHERQQrm, 2161 VGATHERQPSrm, VPGATHERQDrm)>; 2162 2163def SKXWriteResGroupVEX4 : SchedWriteRes<[SKXPort0, SKXPort23, SKXPort5, SKXPort015]> { 2164 let Latency = 20; 2165 let NumMicroOps = 5; // 2 uops peform multiple loads 2166 let ReleaseAtCycles = [1,4,1,1]; 2167} 2168def: InstRW<[SKXWriteResGroupVEX4], (instrs VGATHERDPDYrm, VPGATHERDQYrm, 2169 VGATHERDPSrm, VPGATHERDDrm, 2170 VGATHERQPDYrm, VPGATHERQQYrm, 2171 VGATHERQPSYrm, VPGATHERQDYrm)>; 2172 2173def SKXWriteResGroupVEX8 : SchedWriteRes<[SKXPort0, SKXPort23, SKXPort5, SKXPort015]> { 2174 let Latency = 22; 2175 let NumMicroOps = 5; // 2 uops perform multiple loads 2176 let ReleaseAtCycles = [1,8,1,1]; 2177} 2178def: InstRW<[SKXWriteResGroupVEX8], (instrs VGATHERDPSYrm, VPGATHERDDYrm)>; 2179 2180def SKXWriteResGroup225 : SchedWriteRes<[SKXPort5,SKXPort01,SKXPort015]> { 2181 let Latency = 22; 2182 let NumMicroOps = 14; 2183 let ReleaseAtCycles = [5,5,4]; 2184} 2185def: InstRW<[SKXWriteResGroup225], (instregex "VPCONFLICTDZ128rr", 2186 "VPCONFLICTQZ256rr")>; 2187 2188def SKXWriteResGroup228 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort5,SKXPort23,SKXPort237,SKXPort06,SKXPort0156]> { 2189 let Latency = 23; 2190 let NumMicroOps = 19; 2191 let ReleaseAtCycles = [2,1,4,1,1,4,6]; 2192} 2193def: InstRW<[SKXWriteResGroup228], (instrs CMPXCHG16B)>; 2194 2195def SKXWriteResGroup233 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort23]> { 2196 let Latency = 25; 2197 let NumMicroOps = 3; 2198 let ReleaseAtCycles = [1,1,1]; 2199} 2200def: InstRW<[SKXWriteResGroup233], (instregex "DIV_FI(16|32)m")>; 2201 2202def SKXWriteResGroup239 : SchedWriteRes<[SKXPort0,SKXPort23]> { 2203 let Latency = 27; 2204 let NumMicroOps = 2; 2205 let ReleaseAtCycles = [1,1]; 2206} 2207def: InstRW<[SKXWriteResGroup239], (instregex "DIVR_F(32|64)m")>; 2208 2209def SKXWriteResGroup242 : SchedWriteRes<[SKXPort5,SKXPort01,SKXPort23,SKXPort015]> { 2210 let Latency = 29; 2211 let NumMicroOps = 15; 2212 let ReleaseAtCycles = [5,5,1,4]; 2213} 2214def: InstRW<[SKXWriteResGroup242], (instregex "VPCONFLICTQZ256rm(b?)")>; 2215 2216def SKXWriteResGroup243 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort23]> { 2217 let Latency = 30; 2218 let NumMicroOps = 3; 2219 let ReleaseAtCycles = [1,1,1]; 2220} 2221def: InstRW<[SKXWriteResGroup243], (instregex "DIVR_FI(16|32)m")>; 2222 2223def SKXWriteResGroup247 : SchedWriteRes<[SKXPort5,SKXPort6,SKXPort23,SKXPort06,SKXPort0156]> { 2224 let Latency = 35; 2225 let NumMicroOps = 23; 2226 let ReleaseAtCycles = [1,5,3,4,10]; 2227} 2228def: InstRW<[SKXWriteResGroup247], (instregex "IN(8|16|32)ri", 2229 "IN(8|16|32)rr")>; 2230 2231def SKXWriteResGroup248 : SchedWriteRes<[SKXPort5,SKXPort6,SKXPort23,SKXPort237,SKXPort06,SKXPort0156]> { 2232 let Latency = 35; 2233 let NumMicroOps = 23; 2234 let ReleaseAtCycles = [1,5,2,1,4,10]; 2235} 2236def: InstRW<[SKXWriteResGroup248], (instregex "OUT(8|16|32)ir", 2237 "OUT(8|16|32)rr")>; 2238 2239def SKXWriteResGroup249 : SchedWriteRes<[SKXPort5,SKXPort01,SKXPort015]> { 2240 let Latency = 37; 2241 let NumMicroOps = 21; 2242 let ReleaseAtCycles = [9,7,5]; 2243} 2244def: InstRW<[SKXWriteResGroup249], (instregex "VPCONFLICTDZ256rr", 2245 "VPCONFLICTQZrr")>; 2246 2247def SKXWriteResGroup250 : SchedWriteRes<[SKXPort1,SKXPort6,SKXPort23,SKXPort0156]> { 2248 let Latency = 37; 2249 let NumMicroOps = 31; 2250 let ReleaseAtCycles = [1,8,1,21]; 2251} 2252def: InstRW<[SKXWriteResGroup250], (instregex "XRSTOR(64)?")>; 2253 2254def SKXWriteResGroup252 : SchedWriteRes<[SKXPort1,SKXPort4,SKXPort5,SKXPort6,SKXPort23,SKXPort237,SKXPort15,SKXPort0156]> { 2255 let Latency = 40; 2256 let NumMicroOps = 18; 2257 let ReleaseAtCycles = [1,1,2,3,1,1,1,8]; 2258} 2259def: InstRW<[SKXWriteResGroup252], (instrs VMCLEARm)>; 2260 2261def SKXWriteResGroup253 : SchedWriteRes<[SKXPort4,SKXPort6,SKXPort23,SKXPort237,SKXPort0156]> { 2262 let Latency = 41; 2263 let NumMicroOps = 39; 2264 let ReleaseAtCycles = [1,10,1,1,26]; 2265} 2266def: InstRW<[SKXWriteResGroup253], (instrs XSAVE64)>; 2267 2268def SKXWriteResGroup254 : SchedWriteRes<[SKXPort5,SKXPort0156]> { 2269 let Latency = 42; 2270 let NumMicroOps = 22; 2271 let ReleaseAtCycles = [2,20]; 2272} 2273def: InstRW<[SKXWriteResGroup254], (instrs RDTSCP)>; 2274 2275def SKXWriteResGroup255 : SchedWriteRes<[SKXPort4,SKXPort6,SKXPort23,SKXPort237,SKXPort0156]> { 2276 let Latency = 42; 2277 let NumMicroOps = 40; 2278 let ReleaseAtCycles = [1,11,1,1,26]; 2279} 2280def: InstRW<[SKXWriteResGroup255], (instrs XSAVE)>; 2281def: InstRW<[SKXWriteResGroup255], (instregex "XSAVEC", "XSAVES", "XSAVEOPT")>; 2282 2283def SKXWriteResGroup256 : SchedWriteRes<[SKXPort5,SKXPort01,SKXPort23,SKXPort015]> { 2284 let Latency = 44; 2285 let NumMicroOps = 22; 2286 let ReleaseAtCycles = [9,7,1,5]; 2287} 2288def: InstRW<[SKXWriteResGroup256], (instregex "VPCONFLICTDZ256rm(b?)", 2289 "VPCONFLICTQZrm(b?)")>; 2290 2291def SKXWriteResGroup258 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort05,SKXPort06,SKXPort0156]> { 2292 let Latency = 62; 2293 let NumMicroOps = 64; 2294 let ReleaseAtCycles = [2,8,5,10,39]; 2295} 2296def: InstRW<[SKXWriteResGroup258], (instrs FLDENVm)>; 2297 2298def SKXWriteResGroup259 : SchedWriteRes<[SKXPort0,SKXPort6,SKXPort23,SKXPort05,SKXPort06,SKXPort15,SKXPort0156]> { 2299 let Latency = 63; 2300 let NumMicroOps = 88; 2301 let ReleaseAtCycles = [4,4,31,1,2,1,45]; 2302} 2303def: InstRW<[SKXWriteResGroup259], (instrs FXRSTOR64)>; 2304 2305def SKXWriteResGroup260 : SchedWriteRes<[SKXPort0,SKXPort6,SKXPort23,SKXPort05,SKXPort06,SKXPort15,SKXPort0156]> { 2306 let Latency = 63; 2307 let NumMicroOps = 90; 2308 let ReleaseAtCycles = [4,2,33,1,2,1,47]; 2309} 2310def: InstRW<[SKXWriteResGroup260], (instrs FXRSTOR)>; 2311 2312def SKXWriteResGroup261 : SchedWriteRes<[SKXPort5,SKXPort01,SKXPort015]> { 2313 let Latency = 67; 2314 let NumMicroOps = 35; 2315 let ReleaseAtCycles = [17,11,7]; 2316} 2317def: InstRW<[SKXWriteResGroup261], (instregex "VPCONFLICTDZrr")>; 2318 2319def SKXWriteResGroup262 : SchedWriteRes<[SKXPort5,SKXPort01,SKXPort23,SKXPort015]> { 2320 let Latency = 74; 2321 let NumMicroOps = 36; 2322 let ReleaseAtCycles = [17,11,1,7]; 2323} 2324def: InstRW<[SKXWriteResGroup262], (instregex "VPCONFLICTDZrm(b?)")>; 2325 2326def SKXWriteResGroup263 : SchedWriteRes<[SKXPort5,SKXPort05,SKXPort0156]> { 2327 let Latency = 75; 2328 let NumMicroOps = 15; 2329 let ReleaseAtCycles = [6,3,6]; 2330} 2331def: InstRW<[SKXWriteResGroup263], (instrs FNINIT)>; 2332 2333def SKXWriteResGroup266 : SchedWriteRes<[SKXPort0,SKXPort1,SKXPort4,SKXPort5,SKXPort6,SKXPort237,SKXPort06,SKXPort0156]> { 2334 let Latency = 106; 2335 let NumMicroOps = 100; 2336 let ReleaseAtCycles = [9,1,11,16,1,11,21,30]; 2337} 2338def: InstRW<[SKXWriteResGroup266], (instrs FSTENVm)>; 2339 2340def SKXWriteResGroup267 : SchedWriteRes<[SKXPort6,SKXPort0156]> { 2341 let Latency = 140; 2342 let NumMicroOps = 4; 2343 let ReleaseAtCycles = [1,3]; 2344} 2345def: InstRW<[SKXWriteResGroup267], (instrs PAUSE)>; 2346 2347def: InstRW<[WriteZero], (instrs CLC)>; 2348 2349 2350// Instruction variants handled by the renamer. These might not need execution 2351// ports in certain conditions. 2352// See Agner's Fog "The microarchitecture of Intel, AMD and VIA CPUs", 2353// section "Skylake Pipeline" > "Register allocation and renaming". 2354// These can be investigated with llvm-exegesis, e.g. 2355// echo 'pxor %mm0, %mm0' | /tmp/llvm-exegesis -mode=uops -snippets-file=- 2356// echo 'vxorpd %xmm0, %xmm0, %xmm1' | /tmp/llvm-exegesis -mode=uops -snippets-file=- 2357 2358def SKXWriteZeroLatency : SchedWriteRes<[]> { 2359 let Latency = 0; 2360} 2361 2362def SKXWriteZeroIdiom : SchedWriteVariant<[ 2363 SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKXWriteZeroLatency]>, 2364 SchedVar<NoSchedPred, [WriteALU]> 2365]>; 2366def : InstRW<[SKXWriteZeroIdiom], (instrs SUB32rr, SUB64rr, 2367 XOR32rr, XOR64rr)>; 2368 2369def SKXWriteFZeroIdiom : SchedWriteVariant<[ 2370 SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKXWriteZeroLatency]>, 2371 SchedVar<NoSchedPred, [WriteFLogic]> 2372]>; 2373def : InstRW<[SKXWriteFZeroIdiom], (instrs XORPSrr, VXORPSrr, 2374 XORPDrr, VXORPDrr, 2375 VXORPSZ128rr, 2376 VXORPDZ128rr)>; 2377 2378def SKXWriteFZeroIdiomY : SchedWriteVariant<[ 2379 SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKXWriteZeroLatency]>, 2380 SchedVar<NoSchedPred, [WriteFLogicY]> 2381]>; 2382def : InstRW<[SKXWriteFZeroIdiomY], (instrs VXORPSYrr, VXORPDYrr, 2383 VXORPSZ256rr, VXORPDZ256rr)>; 2384 2385def SKXWriteFZeroIdiomZ : SchedWriteVariant<[ 2386 SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKXWriteZeroLatency]>, 2387 SchedVar<NoSchedPred, [WriteFLogicZ]> 2388]>; 2389def : InstRW<[SKXWriteFZeroIdiomZ], (instrs VXORPSZrr, VXORPDZrr)>; 2390 2391def SKXWriteVZeroIdiomLogicX : SchedWriteVariant<[ 2392 SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKXWriteZeroLatency]>, 2393 SchedVar<NoSchedPred, [WriteVecLogicX]> 2394]>; 2395def : InstRW<[SKXWriteVZeroIdiomLogicX], (instrs PXORrr, VPXORrr, 2396 VPXORDZ128rr, VPXORQZ128rr)>; 2397 2398def SKXWriteVZeroIdiomLogicY : SchedWriteVariant<[ 2399 SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKXWriteZeroLatency]>, 2400 SchedVar<NoSchedPred, [WriteVecLogicY]> 2401]>; 2402def : InstRW<[SKXWriteVZeroIdiomLogicY], (instrs VPXORYrr, 2403 VPXORDZ256rr, VPXORQZ256rr)>; 2404 2405def SKXWriteVZeroIdiomLogicZ : SchedWriteVariant<[ 2406 SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKXWriteZeroLatency]>, 2407 SchedVar<NoSchedPred, [WriteVecLogicZ]> 2408]>; 2409def : InstRW<[SKXWriteVZeroIdiomLogicZ], (instrs VPXORDZrr, VPXORQZrr)>; 2410 2411def SKXWriteVZeroIdiomALUX : SchedWriteVariant<[ 2412 SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKXWriteZeroLatency]>, 2413 SchedVar<NoSchedPred, [WriteVecALUX]> 2414]>; 2415def : InstRW<[SKXWriteVZeroIdiomALUX], (instrs PCMPGTBrr, VPCMPGTBrr, 2416 PCMPGTDrr, VPCMPGTDrr, 2417 PCMPGTWrr, VPCMPGTWrr)>; 2418 2419def SKXWriteVZeroIdiomALUY : SchedWriteVariant<[ 2420 SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKXWriteZeroLatency]>, 2421 SchedVar<NoSchedPred, [WriteVecALUY]> 2422]>; 2423def : InstRW<[SKXWriteVZeroIdiomALUY], (instrs VPCMPGTBYrr, 2424 VPCMPGTDYrr, 2425 VPCMPGTWYrr)>; 2426 2427def SKXWritePSUB : SchedWriteRes<[SKXPort015]> { 2428 let Latency = 1; 2429 let NumMicroOps = 1; 2430 let ReleaseAtCycles = [1]; 2431} 2432 2433def SKXWriteVZeroIdiomPSUB : SchedWriteVariant<[ 2434 SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKXWriteZeroLatency]>, 2435 SchedVar<NoSchedPred, [SKXWritePSUB]> 2436]>; 2437 2438def : InstRW<[SKXWriteVZeroIdiomPSUB], (instrs PSUBBrr, VPSUBBrr, VPSUBBZ128rr, 2439 PSUBDrr, VPSUBDrr, VPSUBDZ128rr, 2440 PSUBQrr, VPSUBQrr, VPSUBQZ128rr, 2441 PSUBWrr, VPSUBWrr, VPSUBWZ128rr, 2442 VPSUBBYrr, VPSUBBZ256rr, 2443 VPSUBDYrr, VPSUBDZ256rr, 2444 VPSUBQYrr, VPSUBQZ256rr, 2445 VPSUBWYrr, VPSUBWZ256rr, 2446 VPSUBBZrr, 2447 VPSUBDZrr, 2448 VPSUBQZrr, 2449 VPSUBWZrr)>; 2450def SKXWritePCMPGTQ : SchedWriteRes<[SKXPort5]> { 2451 let Latency = 3; 2452 let NumMicroOps = 1; 2453 let ReleaseAtCycles = [1]; 2454} 2455 2456def SKXWriteVZeroIdiomPCMPGTQ : SchedWriteVariant<[ 2457 SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKXWriteZeroLatency]>, 2458 SchedVar<NoSchedPred, [SKXWritePCMPGTQ]> 2459]>; 2460def : InstRW<[SKXWriteVZeroIdiomPCMPGTQ], (instrs PCMPGTQrr, VPCMPGTQrr, 2461 VPCMPGTQYrr)>; 2462 2463 2464// CMOVs that use both Z and C flag require an extra uop. 2465def SKXWriteCMOVA_CMOVBErr : SchedWriteRes<[SKXPort06]> { 2466 let Latency = 2; 2467 let ReleaseAtCycles = [2]; 2468 let NumMicroOps = 2; 2469} 2470 2471def SKXWriteCMOVA_CMOVBErm : SchedWriteRes<[SKXPort23,SKXPort06]> { 2472 let Latency = 7; 2473 let ReleaseAtCycles = [1,2]; 2474 let NumMicroOps = 3; 2475} 2476 2477def SKXCMOVA_CMOVBErr : SchedWriteVariant<[ 2478 SchedVar<MCSchedPredicate<IsCMOVArr_Or_CMOVBErr>, [SKXWriteCMOVA_CMOVBErr]>, 2479 SchedVar<NoSchedPred, [WriteCMOV]> 2480]>; 2481 2482def SKXCMOVA_CMOVBErm : SchedWriteVariant<[ 2483 SchedVar<MCSchedPredicate<IsCMOVArm_Or_CMOVBErm>, [SKXWriteCMOVA_CMOVBErm]>, 2484 SchedVar<NoSchedPred, [WriteCMOV.Folded]> 2485]>; 2486 2487def : InstRW<[SKXCMOVA_CMOVBErr], (instrs CMOV16rr, CMOV32rr, CMOV64rr)>; 2488def : InstRW<[SKXCMOVA_CMOVBErm], (instrs CMOV16rm, CMOV32rm, CMOV64rm)>; 2489 2490// SETCCs that use both Z and C flag require an extra uop. 2491def SKXWriteSETA_SETBEr : SchedWriteRes<[SKXPort06]> { 2492 let Latency = 2; 2493 let ReleaseAtCycles = [2]; 2494 let NumMicroOps = 2; 2495} 2496 2497def SKXWriteSETA_SETBEm : SchedWriteRes<[SKXPort4,SKXPort237,SKXPort06]> { 2498 let Latency = 3; 2499 let ReleaseAtCycles = [1,1,2]; 2500 let NumMicroOps = 4; 2501} 2502 2503def SKXSETA_SETBErr : SchedWriteVariant<[ 2504 SchedVar<MCSchedPredicate<IsSETAr_Or_SETBEr>, [SKXWriteSETA_SETBEr]>, 2505 SchedVar<NoSchedPred, [WriteSETCC]> 2506]>; 2507 2508def SKXSETA_SETBErm : SchedWriteVariant<[ 2509 SchedVar<MCSchedPredicate<IsSETAm_Or_SETBEm>, [SKXWriteSETA_SETBEm]>, 2510 SchedVar<NoSchedPred, [WriteSETCCStore]> 2511]>; 2512 2513def : InstRW<[SKXSETA_SETBErr], (instrs SETCCr)>; 2514def : InstRW<[SKXSETA_SETBErm], (instrs SETCCm)>; 2515 2516/////////////////////////////////////////////////////////////////////////////// 2517// Dependency breaking instructions. 2518/////////////////////////////////////////////////////////////////////////////// 2519 2520def : IsZeroIdiomFunction<[ 2521 // GPR Zero-idioms. 2522 DepBreakingClass<[ SUB32rr, SUB64rr, XOR32rr, XOR64rr ], ZeroIdiomPredicate>, 2523 2524 // SSE Zero-idioms. 2525 DepBreakingClass<[ 2526 // fp variants. 2527 XORPSrr, XORPDrr, 2528 2529 // int variants. 2530 PXORrr, 2531 PSUBBrr, PSUBWrr, PSUBDrr, PSUBQrr, 2532 PCMPGTBrr, PCMPGTDrr, PCMPGTQrr, PCMPGTWrr 2533 ], ZeroIdiomPredicate>, 2534 2535 // AVX Zero-idioms. 2536 DepBreakingClass<[ 2537 // xmm fp variants. 2538 VXORPSrr, VXORPDrr, 2539 2540 // xmm int variants. 2541 VPXORrr, 2542 VPSUBBrr, VPSUBWrr, VPSUBDrr, VPSUBQrr, 2543 VPCMPGTBrr, VPCMPGTWrr, VPCMPGTDrr, VPCMPGTQrr, 2544 2545 // ymm variants. 2546 VXORPSYrr, VXORPDYrr, VPXORYrr, 2547 VPSUBBYrr, VPSUBWYrr, VPSUBDYrr, VPSUBQYrr, 2548 VPCMPGTBYrr, VPCMPGTWYrr, VPCMPGTDYrr, VPCMPGTQYrr, 2549 2550 // zmm variants. 2551 VXORPSZrr, VXORPDZrr, VPXORDZrr, VPXORQZrr, 2552 VXORPSZ128rr, VXORPDZ128rr, VPXORDZ128rr, VPXORQZ128rr, 2553 VXORPSZ256rr, VXORPDZ256rr, VPXORDZ256rr, VPXORQZ256rr, 2554 VPSUBBZrr, VPSUBWZrr, VPSUBDZrr, VPSUBQZrr, 2555 VPSUBBZ128rr, VPSUBWZ128rr, VPSUBDZ128rr, VPSUBQZ128rr, 2556 VPSUBBZ256rr, VPSUBWZ256rr, VPSUBDZ256rr, VPSUBQZ256rr, 2557 ], ZeroIdiomPredicate>, 2558]>; 2559 2560} // SchedModel 2561