1//=- X86SchedIceLake.td - X86 Ice Lake Scheduling ------------*- tablegen -*-=// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This file defines the machine model for Ice Lake to support 10// instruction scheduling and other instruction cost heuristics. 11// 12// TODO: This is mainly a copy X86SchedSkylakeServer.td, but allows us to 13// iteratively improve scheduling handling toward better modelling the 14// Ice Lake (Sunny/Cypress Cove) microarchitecture. 15// 16//===----------------------------------------------------------------------===// 17 18def IceLakeModel : SchedMachineModel { 19 // All x86 instructions are modeled as a single micro-op, and Ice Lake can 20 // decode 6 instructions per cycle. 21 let IssueWidth = 6; 22 let MicroOpBufferSize = 352; // Based on the reorder buffer. 23 let LoadLatency = 5; 24 let MispredictPenalty = 14; 25 26 // Based on the LSD (loop-stream detector) queue size and benchmarking data. 27 let LoopMicroOpBufferSize = 50; 28 29 // This flag is set to allow the scheduler to assign a default model to 30 // unrecognized opcodes. 31 let CompleteModel = 0; 32} 33 34let SchedModel = IceLakeModel in { 35 36// Ice Lake can issue micro-ops to 8 different ports in one cycle. 37 38// Ports 0, 1, 5, and 6 handle all computation. 39// Ports 4 and 9 gets the data half of stores. Store data can be available later 40// than the store address, but since we don't model the latency of stores, we 41// can ignore that. 42// Ports 2 and 3 are identical. They handle loads and address calculations. 43// Ports 7 and 8 are identical. They handle stores address calculations. 44def ICXPort0 : ProcResource<1>; 45def ICXPort1 : ProcResource<1>; 46def ICXPort2 : ProcResource<1>; 47def ICXPort3 : ProcResource<1>; 48def ICXPort4 : ProcResource<1>; 49def ICXPort5 : ProcResource<1>; 50def ICXPort6 : ProcResource<1>; 51def ICXPort7 : ProcResource<1>; 52def ICXPort8 : ProcResource<1>; 53def ICXPort9 : ProcResource<1>; 54 55// Many micro-ops are capable of issuing on multiple ports. 56def ICXPort01 : ProcResGroup<[ICXPort0, ICXPort1]>; 57def ICXPort23 : ProcResGroup<[ICXPort2, ICXPort3]>; 58def ICXPort04 : ProcResGroup<[ICXPort0, ICXPort4]>; 59def ICXPort05 : ProcResGroup<[ICXPort0, ICXPort5]>; 60def ICXPort06 : ProcResGroup<[ICXPort0, ICXPort6]>; 61def ICXPort15 : ProcResGroup<[ICXPort1, ICXPort5]>; 62def ICXPort16 : ProcResGroup<[ICXPort1, ICXPort6]>; 63def ICXPort49 : ProcResGroup<[ICXPort4, ICXPort9]>; 64def ICXPort56 : ProcResGroup<[ICXPort5, ICXPort6]>; 65def ICXPort78 : ProcResGroup<[ICXPort7, ICXPort8]>; 66def ICXPort015 : ProcResGroup<[ICXPort0, ICXPort1, ICXPort5]>; 67def ICXPort056 : ProcResGroup<[ICXPort0, ICXPort5, ICXPort6]>; 68def ICXPort0156: ProcResGroup<[ICXPort0, ICXPort1, ICXPort5, ICXPort6]>; 69 70def ICXDivider : ProcResource<1>; // Integer division issued on port 0. 71// FP division and sqrt on port 0. 72def ICXFPDivider : ProcResource<1>; 73 74// 60 Entry Unified Scheduler 75def ICXPortAny : ProcResGroup<[ICXPort0, ICXPort1, ICXPort2, ICXPort3, ICXPort4, 76 ICXPort5, ICXPort6, ICXPort7, ICXPort8, ICXPort9]> { 77 let BufferSize=60; 78} 79 80// Integer loads are 5 cycles, so ReadAfterLd registers needn't be available until 5 81// cycles after the memory operand. 82def : ReadAdvance<ReadAfterLd, 5>; 83 84// Vector loads are 5/6/7 cycles, so ReadAfterVec*Ld registers needn't be available 85// until 5/6/7 cycles after the memory operand. 86def : ReadAdvance<ReadAfterVecLd, 5>; 87def : ReadAdvance<ReadAfterVecXLd, 6>; 88def : ReadAdvance<ReadAfterVecYLd, 7>; 89 90def : ReadAdvance<ReadInt2Fpu, 0>; 91 92// Many SchedWrites are defined in pairs with and without a folded load. 93// Instructions with folded loads are usually micro-fused, so they only appear 94// as two micro-ops when queued in the reservation station. 95// This multiclass defines the resource usage for variants with and without 96// folded loads. 97multiclass ICXWriteResPair<X86FoldableSchedWrite SchedRW, 98 list<ProcResourceKind> ExePorts, 99 int Lat, list<int> Res = [1], int UOps = 1, 100 int LoadLat = 5, int LoadUOps = 1> { 101 // Register variant is using a single cycle on ExePort. 102 def : WriteRes<SchedRW, ExePorts> { 103 let Latency = Lat; 104 let ReleaseAtCycles = Res; 105 let NumMicroOps = UOps; 106 } 107 108 // Memory variant also uses a cycle on port 2/3 and adds LoadLat cycles to 109 // the latency (default = 5). 110 def : WriteRes<SchedRW.Folded, !listconcat([ICXPort23], ExePorts)> { 111 let Latency = !add(Lat, LoadLat); 112 let ReleaseAtCycles = !listconcat([1], Res); 113 let NumMicroOps = !add(UOps, LoadUOps); 114 } 115} 116 117// A folded store needs a cycle on port 4 for the store data, and an extra port 118// 2/3/7 cycle to recompute the address. 119def : WriteRes<WriteRMW, [ICXPort78,ICXPort49]>; 120 121// Arithmetic. 122defm : ICXWriteResPair<WriteALU, [ICXPort0156], 1>; // Simple integer ALU op. 123defm : ICXWriteResPair<WriteADC, [ICXPort06], 1>; // Integer ALU + flags op. 124 125// Integer multiplication. 126defm : ICXWriteResPair<WriteIMul8, [ICXPort1], 3>; 127defm : ICXWriteResPair<WriteIMul16, [ICXPort1,ICXPort06,ICXPort0156], 4, [1,1,2], 4>; 128defm : X86WriteRes<WriteIMul16Imm, [ICXPort1,ICXPort0156], 4, [1,1], 2>; 129defm : X86WriteRes<WriteIMul16ImmLd, [ICXPort1,ICXPort0156,ICXPort23], 8, [1,1,1], 3>; 130defm : X86WriteRes<WriteIMul16Reg, [ICXPort1], 3, [1], 1>; 131defm : X86WriteRes<WriteIMul16RegLd, [ICXPort1,ICXPort0156,ICXPort23], 8, [1,1,1], 3>; 132defm : ICXWriteResPair<WriteIMul32, [ICXPort1,ICXPort06,ICXPort0156], 4, [1,1,1], 3>; 133defm : ICXWriteResPair<WriteMULX32, [ICXPort1,ICXPort06,ICXPort0156], 3, [1,1,1], 3>; 134defm : ICXWriteResPair<WriteIMul32Imm, [ICXPort1], 3>; 135defm : ICXWriteResPair<WriteIMul32Reg, [ICXPort1], 3>; 136defm : ICXWriteResPair<WriteIMul64, [ICXPort1,ICXPort5], 4, [1,1], 2>; 137defm : ICXWriteResPair<WriteMULX64, [ICXPort1,ICXPort5], 3, [1,1], 2>; 138defm : ICXWriteResPair<WriteIMul64Imm, [ICXPort1], 3>; 139defm : ICXWriteResPair<WriteIMul64Reg, [ICXPort1], 3>; 140def ICXWriteIMulH : WriteRes<WriteIMulH, []> { let Latency = 4; } 141def : WriteRes<WriteIMulHLd, []> { 142 let Latency = !add(ICXWriteIMulH.Latency, SkylakeServerModel.LoadLatency); 143} 144 145defm : X86WriteRes<WriteBSWAP32, [ICXPort15], 1, [1], 1>; 146defm : X86WriteRes<WriteBSWAP64, [ICXPort06, ICXPort15], 2, [1,1], 2>; 147defm : X86WriteRes<WriteCMPXCHG,[ICXPort06, ICXPort0156], 5, [2,3], 5>; 148defm : X86WriteRes<WriteCMPXCHGRMW,[ICXPort23,ICXPort06,ICXPort0156,ICXPort78,ICXPort49], 8, [1,2,1,1,1], 6>; 149defm : X86WriteRes<WriteXCHG, [ICXPort0156], 2, [3], 3>; 150 151// TODO: Why isn't the ICXDivider used? 152defm : ICXWriteResPair<WriteDiv8, [ICXPort0, ICXDivider], 25, [1,10], 1, 4>; 153defm : X86WriteRes<WriteDiv16, [ICXPort0,ICXPort1,ICXPort5,ICXPort6,ICXPort05,ICXPort0156], 76, [7,2,8,3,1,11], 32>; 154defm : X86WriteRes<WriteDiv32, [ICXPort0,ICXPort1,ICXPort5,ICXPort6,ICXPort05,ICXPort0156], 76, [7,2,8,3,1,11], 32>; 155defm : X86WriteRes<WriteDiv64, [ICXPort0,ICXPort1,ICXPort5,ICXPort6,ICXPort05,ICXPort0156], 76, [7,2,8,3,1,11], 32>; 156defm : X86WriteRes<WriteDiv16Ld, [ICXPort0,ICXPort23,ICXDivider], 29, [1,1,10], 2>; 157defm : X86WriteRes<WriteDiv32Ld, [ICXPort0,ICXPort23,ICXDivider], 29, [1,1,10], 2>; 158defm : X86WriteRes<WriteDiv64Ld, [ICXPort0,ICXPort23,ICXDivider], 29, [1,1,10], 2>; 159 160defm : X86WriteRes<WriteIDiv8, [ICXPort0, ICXDivider], 25, [1,10], 1>; 161defm : X86WriteRes<WriteIDiv16, [ICXPort0,ICXPort1,ICXPort5,ICXPort6,ICXPort06,ICXPort0156], 102, [4,2,4,8,14,34], 66>; 162defm : X86WriteRes<WriteIDiv32, [ICXPort0,ICXPort1,ICXPort5,ICXPort6,ICXPort06,ICXPort0156], 102, [4,2,4,8,14,34], 66>; 163defm : X86WriteRes<WriteIDiv64, [ICXPort0,ICXPort1,ICXPort5,ICXPort6,ICXPort06,ICXPort0156], 102, [4,2,4,8,14,34], 66>; 164defm : X86WriteRes<WriteIDiv8Ld, [ICXPort0,ICXPort5,ICXPort23,ICXPort0156], 28, [2,4,1,1], 8>; 165defm : X86WriteRes<WriteIDiv16Ld, [ICXPort0,ICXPort5,ICXPort23,ICXPort0156], 28, [2,4,1,1], 8>; 166defm : X86WriteRes<WriteIDiv32Ld, [ICXPort0,ICXPort5,ICXPort23,ICXPort0156], 28, [2,4,1,1], 8>; 167defm : X86WriteRes<WriteIDiv64Ld, [ICXPort0,ICXPort5,ICXPort23,ICXPort0156], 28, [2,4,1,1], 8>; 168 169defm : ICXWriteResPair<WriteCRC32, [ICXPort1], 3>; 170 171def : WriteRes<WriteLEA, [ICXPort15]>; // LEA instructions can't fold loads. 172 173defm : ICXWriteResPair<WriteCMOV, [ICXPort06], 1, [1], 1>; // Conditional move. 174defm : X86WriteRes<WriteFCMOV, [ICXPort1], 3, [1], 1>; // x87 conditional move. 175def : WriteRes<WriteSETCC, [ICXPort06]>; // Setcc. 176def : WriteRes<WriteSETCCStore, [ICXPort06,ICXPort49,ICXPort78]> { 177 let Latency = 2; 178 let NumMicroOps = 3; 179} 180defm : X86WriteRes<WriteLAHFSAHF, [ICXPort06], 1, [1], 1>; 181defm : X86WriteRes<WriteBitTest, [ICXPort06], 1, [1], 1>; 182defm : X86WriteRes<WriteBitTestImmLd, [ICXPort06,ICXPort23], 6, [1,1], 2>; 183defm : X86WriteRes<WriteBitTestRegLd, [ICXPort0156,ICXPort23], 6, [1,1], 2>; 184defm : X86WriteRes<WriteBitTestSet, [ICXPort06], 1, [1], 1>; 185defm : X86WriteRes<WriteBitTestSetImmLd, [ICXPort06,ICXPort23], 5, [1,1], 3>; 186defm : X86WriteRes<WriteBitTestSetRegLd, [ICXPort0156,ICXPort23], 5, [1,1], 2>; 187 188// Integer shifts and rotates. 189defm : ICXWriteResPair<WriteShift, [ICXPort06], 1>; 190defm : ICXWriteResPair<WriteShiftCL, [ICXPort06], 3, [3], 3>; 191defm : ICXWriteResPair<WriteRotate, [ICXPort06], 1, [1], 1>; 192defm : ICXWriteResPair<WriteRotateCL, [ICXPort06], 3, [3], 3>; 193 194// SHLD/SHRD. 195defm : X86WriteRes<WriteSHDrri, [ICXPort1], 3, [1], 1>; 196defm : X86WriteRes<WriteSHDrrcl,[ICXPort1,ICXPort06,ICXPort0156], 6, [1, 2, 1], 4>; 197defm : X86WriteRes<WriteSHDmri, [ICXPort1,ICXPort23,ICXPort78,ICXPort0156], 9, [1, 1, 1, 1], 4>; 198defm : X86WriteRes<WriteSHDmrcl,[ICXPort1,ICXPort23,ICXPort78,ICXPort06,ICXPort0156], 11, [1, 1, 1, 2, 1], 6>; 199 200// Bit counts. 201defm : ICXWriteResPair<WriteBSF, [ICXPort1], 3>; 202defm : ICXWriteResPair<WriteBSR, [ICXPort1], 3>; 203defm : ICXWriteResPair<WriteLZCNT, [ICXPort1], 3>; 204defm : ICXWriteResPair<WriteTZCNT, [ICXPort1], 3>; 205defm : ICXWriteResPair<WritePOPCNT, [ICXPort1], 3>; 206 207// BMI1 BEXTR/BLS, BMI2 BZHI 208defm : ICXWriteResPair<WriteBEXTR, [ICXPort06,ICXPort15], 2, [1,1], 2>; 209defm : ICXWriteResPair<WriteBLS, [ICXPort15], 1>; 210defm : ICXWriteResPair<WriteBZHI, [ICXPort15], 1>; 211 212// Loads, stores, and moves, not folded with other operations. 213defm : X86WriteRes<WriteLoad, [ICXPort23], 5, [1], 1>; 214defm : X86WriteRes<WriteStore, [ICXPort78, ICXPort49], 1, [1,1], 1>; 215defm : X86WriteRes<WriteStoreNT, [ICXPort78, ICXPort49], 1, [1,1], 2>; 216defm : X86WriteRes<WriteMove, [ICXPort0156], 1, [1], 1>; 217 218// Model the effect of clobbering the read-write mask operand of the GATHER operation. 219// Does not cost anything by itself, only has latency, matching that of the WriteLoad, 220defm : X86WriteRes<WriteVecMaskedGatherWriteback, [], 5, [], 0>; 221 222// Idioms that clear a register, like xorps %xmm0, %xmm0. 223// These can often bypass execution ports completely. 224def : WriteRes<WriteZero, []>; 225 226// Branches don't produce values, so they have no latency, but they still 227// consume resources. Indirect branches can fold loads. 228defm : ICXWriteResPair<WriteJump, [ICXPort06], 1>; 229 230// Floating point. This covers both scalar and vector operations. 231defm : X86WriteRes<WriteFLD0, [ICXPort05], 1, [1], 1>; 232defm : X86WriteRes<WriteFLD1, [ICXPort05], 1, [2], 2>; 233defm : X86WriteRes<WriteFLDC, [ICXPort05], 1, [2], 2>; 234defm : X86WriteRes<WriteFLoad, [ICXPort23], 5, [1], 1>; 235defm : X86WriteRes<WriteFLoadX, [ICXPort23], 6, [1], 1>; 236defm : X86WriteRes<WriteFLoadY, [ICXPort23], 7, [1], 1>; 237defm : X86WriteRes<WriteFMaskedLoad, [ICXPort23,ICXPort015], 7, [1,1], 2>; 238defm : X86WriteRes<WriteFMaskedLoadY, [ICXPort23,ICXPort015], 8, [1,1], 2>; 239defm : X86WriteRes<WriteFStore, [ICXPort78,ICXPort49], 1, [1,1], 2>; 240defm : X86WriteRes<WriteFStoreX, [ICXPort78,ICXPort49], 1, [1,1], 2>; 241defm : X86WriteRes<WriteFStoreY, [ICXPort78,ICXPort49], 1, [1,1], 2>; 242defm : X86WriteRes<WriteFStoreNT, [ICXPort78,ICXPort49], 1, [1,1], 2>; 243defm : X86WriteRes<WriteFStoreNTX, [ICXPort78,ICXPort49], 1, [1,1], 2>; 244defm : X86WriteRes<WriteFStoreNTY, [ICXPort78,ICXPort49], 1, [1,1], 2>; 245 246defm : X86WriteRes<WriteFMaskedStore32, [ICXPort78,ICXPort49,ICXPort0], 2, [1,1,1], 3>; 247defm : X86WriteRes<WriteFMaskedStore32Y, [ICXPort78,ICXPort49,ICXPort0], 2, [1,1,1], 3>; 248defm : X86WriteRes<WriteFMaskedStore64, [ICXPort78,ICXPort49,ICXPort0], 2, [1,1,1], 3>; 249defm : X86WriteRes<WriteFMaskedStore64Y, [ICXPort78,ICXPort49,ICXPort0], 2, [1,1,1], 3>; 250 251defm : X86WriteRes<WriteFMove, [ICXPort015], 1, [1], 1>; 252defm : X86WriteRes<WriteFMoveX, [ICXPort015], 1, [1], 1>; 253defm : X86WriteRes<WriteFMoveY, [ICXPort015], 1, [1], 1>; 254defm : X86WriteRes<WriteFMoveZ, [ICXPort05], 1, [1], 1>; 255defm : X86WriteRes<WriteEMMS, [ICXPort05,ICXPort0156], 10, [9,1], 10>; 256 257defm : ICXWriteResPair<WriteFAdd, [ICXPort01], 4, [1], 1, 5>; // Floating point add/sub. 258defm : ICXWriteResPair<WriteFAddX, [ICXPort01], 4, [1], 1, 6>; 259defm : ICXWriteResPair<WriteFAddY, [ICXPort01], 4, [1], 1, 7>; 260defm : ICXWriteResPair<WriteFAddZ, [ICXPort0], 4, [1], 1, 7>; 261defm : ICXWriteResPair<WriteFAdd64, [ICXPort01], 4, [1], 1, 5>; // Floating point double add/sub. 262defm : ICXWriteResPair<WriteFAdd64X, [ICXPort01], 4, [1], 1, 6>; 263defm : ICXWriteResPair<WriteFAdd64Y, [ICXPort01], 4, [1], 1, 7>; 264defm : ICXWriteResPair<WriteFAdd64Z, [ICXPort0], 4, [1], 1, 7>; 265 266defm : ICXWriteResPair<WriteFCmp, [ICXPort01], 4, [1], 1, 5>; // Floating point compare. 267defm : ICXWriteResPair<WriteFCmpX, [ICXPort01], 4, [1], 1, 6>; 268defm : ICXWriteResPair<WriteFCmpY, [ICXPort01], 4, [1], 1, 7>; 269defm : ICXWriteResPair<WriteFCmpZ, [ICXPort05], 4, [1], 1, 7>; 270defm : ICXWriteResPair<WriteFCmp64, [ICXPort01], 4, [1], 1, 5>; // Floating point double compare. 271defm : ICXWriteResPair<WriteFCmp64X, [ICXPort01], 4, [1], 1, 6>; 272defm : ICXWriteResPair<WriteFCmp64Y, [ICXPort01], 4, [1], 1, 7>; 273defm : ICXWriteResPair<WriteFCmp64Z, [ICXPort05], 4, [1], 1, 7>; 274 275defm : ICXWriteResPair<WriteFCom, [ICXPort0], 2>; // Floating point compare to flags (X87). 276defm : ICXWriteResPair<WriteFComX, [ICXPort0], 2>; // Floating point compare to flags (SSE). 277 278defm : ICXWriteResPair<WriteFMul, [ICXPort01], 4, [1], 1, 5>; // Floating point multiplication. 279defm : ICXWriteResPair<WriteFMulX, [ICXPort01], 4, [1], 1, 6>; 280defm : ICXWriteResPair<WriteFMulY, [ICXPort01], 4, [1], 1, 7>; 281defm : ICXWriteResPair<WriteFMulZ, [ICXPort0], 4, [1], 1, 7>; 282defm : ICXWriteResPair<WriteFMul64, [ICXPort01], 4, [1], 1, 5>; // Floating point double multiplication. 283defm : ICXWriteResPair<WriteFMul64X, [ICXPort01], 4, [1], 1, 6>; 284defm : ICXWriteResPair<WriteFMul64Y, [ICXPort01], 4, [1], 1, 7>; 285defm : ICXWriteResPair<WriteFMul64Z, [ICXPort0], 4, [1], 1, 7>; 286 287defm : ICXWriteResPair<WriteFDiv, [ICXPort0,ICXFPDivider], 11, [1,3], 1, 5>; // 10-14 cycles. // Floating point division. 288defm : ICXWriteResPair<WriteFDivX, [ICXPort0,ICXFPDivider], 11, [1,3], 1, 6>; // 10-14 cycles. 289defm : ICXWriteResPair<WriteFDivY, [ICXPort0,ICXFPDivider], 11, [1,5], 1, 7>; // 10-14 cycles. 290defm : ICXWriteResPair<WriteFDivZ, [ICXPort0,ICXPort5,ICXFPDivider], 18, [2,1,10], 3, 7>; // 10-14 cycles. 291defm : ICXWriteResPair<WriteFDiv64, [ICXPort0,ICXFPDivider], 14, [1,4], 1, 5>; // 10-14 cycles. // Floating point division. 292defm : ICXWriteResPair<WriteFDiv64X, [ICXPort0,ICXFPDivider], 14, [1,4], 1, 6>; // 10-14 cycles. 293defm : ICXWriteResPair<WriteFDiv64Y, [ICXPort0,ICXFPDivider], 14, [1,8], 1, 7>; // 10-14 cycles. 294defm : ICXWriteResPair<WriteFDiv64Z, [ICXPort0,ICXPort5,ICXFPDivider], 23, [2,1,16], 3, 7>; // 10-14 cycles. 295 296defm : ICXWriteResPair<WriteFSqrt, [ICXPort0,ICXFPDivider], 12, [1,3], 1, 5>; // Floating point square root. 297defm : ICXWriteResPair<WriteFSqrtX, [ICXPort0,ICXFPDivider], 12, [1,3], 1, 6>; 298defm : ICXWriteResPair<WriteFSqrtY, [ICXPort0,ICXFPDivider], 12, [1,6], 1, 7>; 299defm : ICXWriteResPair<WriteFSqrtZ, [ICXPort0,ICXPort5,ICXFPDivider], 20, [2,1,12], 3, 7>; 300defm : ICXWriteResPair<WriteFSqrt64, [ICXPort0,ICXFPDivider], 18, [1,6], 1, 5>; // Floating point double square root. 301defm : ICXWriteResPair<WriteFSqrt64X, [ICXPort0,ICXFPDivider], 18, [1,6], 1, 6>; 302defm : ICXWriteResPair<WriteFSqrt64Y, [ICXPort0,ICXFPDivider], 18, [1,12],1, 7>; 303defm : ICXWriteResPair<WriteFSqrt64Z, [ICXPort0,ICXPort5,ICXFPDivider], 32, [2,1,24], 3, 7>; 304defm : ICXWriteResPair<WriteFSqrt80, [ICXPort0,ICXFPDivider], 21, [1,7]>; // Floating point long double square root. 305 306defm : ICXWriteResPair<WriteFRcp, [ICXPort0], 4, [1], 1, 5>; // Floating point reciprocal estimate. 307defm : ICXWriteResPair<WriteFRcpX, [ICXPort0], 4, [1], 1, 6>; 308defm : ICXWriteResPair<WriteFRcpY, [ICXPort0], 4, [1], 1, 7>; 309defm : ICXWriteResPair<WriteFRcpZ, [ICXPort0,ICXPort5], 4, [2,1], 3, 7>; 310 311defm : ICXWriteResPair<WriteFRsqrt, [ICXPort0], 4, [1], 1, 5>; // Floating point reciprocal square root estimate. 312defm : ICXWriteResPair<WriteFRsqrtX,[ICXPort0], 4, [1], 1, 6>; 313defm : ICXWriteResPair<WriteFRsqrtY,[ICXPort0], 4, [1], 1, 7>; 314defm : ICXWriteResPair<WriteFRsqrtZ,[ICXPort0,ICXPort5], 9, [2,1], 3, 7>; 315 316defm : ICXWriteResPair<WriteFMA, [ICXPort01], 4, [1], 1, 5>; // Fused Multiply Add. 317defm : ICXWriteResPair<WriteFMAX, [ICXPort01], 4, [1], 1, 6>; 318defm : ICXWriteResPair<WriteFMAY, [ICXPort01], 4, [1], 1, 7>; 319defm : ICXWriteResPair<WriteFMAZ, [ICXPort0], 4, [1], 1, 7>; 320defm : ICXWriteResPair<WriteDPPD, [ICXPort5,ICXPort015], 9, [1,2], 3, 6>; // Floating point double dot product. 321defm : ICXWriteResPair<WriteDPPS, [ICXPort5,ICXPort015], 13, [1,3], 4, 6>; 322defm : ICXWriteResPair<WriteDPPSY,[ICXPort5,ICXPort015], 13, [1,3], 4, 7>; 323defm : ICXWriteResPair<WriteFSign, [ICXPort0], 1>; // Floating point fabs/fchs. 324defm : ICXWriteResPair<WriteFRnd, [ICXPort01], 8, [2], 2, 6>; // Floating point rounding. 325defm : ICXWriteResPair<WriteFRndY, [ICXPort01], 8, [2], 2, 7>; 326defm : ICXWriteResPair<WriteFRndZ, [ICXPort05], 8, [2], 2, 7>; 327defm : ICXWriteResPair<WriteFLogic, [ICXPort015], 1, [1], 1, 6>; // Floating point and/or/xor logicals. 328defm : ICXWriteResPair<WriteFLogicY, [ICXPort015], 1, [1], 1, 7>; 329defm : ICXWriteResPair<WriteFLogicZ, [ICXPort05], 1, [1], 1, 7>; 330defm : ICXWriteResPair<WriteFTest, [ICXPort0], 2, [1], 1, 6>; // Floating point TEST instructions. 331defm : ICXWriteResPair<WriteFTestY, [ICXPort0], 2, [1], 1, 7>; 332defm : ICXWriteResPair<WriteFTestZ, [ICXPort0], 2, [1], 1, 7>; 333defm : ICXWriteResPair<WriteFShuffle, [ICXPort15], 1, [1], 1, 6>; // Floating point vector shuffles. 334defm : ICXWriteResPair<WriteFShuffleY, [ICXPort15], 1, [1], 1, 7>; 335defm : ICXWriteResPair<WriteFShuffleZ, [ICXPort5], 1, [1], 1, 7>; 336defm : ICXWriteResPair<WriteFVarShuffle, [ICXPort15], 1, [1], 1, 6>; // Floating point vector variable shuffles. 337defm : ICXWriteResPair<WriteFVarShuffleY, [ICXPort15], 1, [1], 1, 7>; 338defm : ICXWriteResPair<WriteFVarShuffleZ, [ICXPort5], 1, [1], 1, 7>; 339defm : ICXWriteResPair<WriteFBlend, [ICXPort015], 1, [1], 1, 6>; // Floating point vector blends. 340defm : ICXWriteResPair<WriteFBlendY,[ICXPort015], 1, [1], 1, 7>; 341defm : ICXWriteResPair<WriteFBlendZ,[ICXPort015], 1, [1], 1, 7>; 342defm : ICXWriteResPair<WriteFVarBlend, [ICXPort015], 2, [2], 2, 6>; // Fp vector variable blends. 343defm : ICXWriteResPair<WriteFVarBlendY,[ICXPort015], 2, [2], 2, 7>; 344defm : ICXWriteResPair<WriteFVarBlendZ,[ICXPort015], 2, [2], 2, 7>; 345 346// FMA Scheduling helper class. 347// class FMASC { X86FoldableSchedWrite Sched = WriteFAdd; } 348 349// Vector integer operations. 350defm : X86WriteRes<WriteVecLoad, [ICXPort23], 5, [1], 1>; 351defm : X86WriteRes<WriteVecLoadX, [ICXPort23], 6, [1], 1>; 352defm : X86WriteRes<WriteVecLoadY, [ICXPort23], 7, [1], 1>; 353defm : X86WriteRes<WriteVecLoadNT, [ICXPort23,ICXPort015], 7, [1,1], 2>; 354defm : X86WriteRes<WriteVecLoadNTY, [ICXPort23,ICXPort015], 8, [1,1], 2>; 355defm : X86WriteRes<WriteVecMaskedLoad, [ICXPort23,ICXPort015], 7, [1,1], 2>; 356defm : X86WriteRes<WriteVecMaskedLoadY, [ICXPort23,ICXPort015], 8, [1,1], 2>; 357defm : X86WriteRes<WriteVecStore, [ICXPort78,ICXPort49], 1, [1,1], 2>; 358defm : X86WriteRes<WriteVecStoreX, [ICXPort78,ICXPort49], 1, [1,1], 2>; 359defm : X86WriteRes<WriteVecStoreY, [ICXPort78,ICXPort49], 1, [1,1], 2>; 360defm : X86WriteRes<WriteVecStoreNT, [ICXPort78,ICXPort49], 1, [1,1], 2>; 361defm : X86WriteRes<WriteVecStoreNTY, [ICXPort78,ICXPort49], 1, [1,1], 2>; 362defm : X86WriteRes<WriteVecMaskedStore32, [ICXPort78,ICXPort49,ICXPort0], 2, [1,1,1], 3>; 363defm : X86WriteRes<WriteVecMaskedStore32Y, [ICXPort78,ICXPort49,ICXPort0], 2, [1,1,1], 3>; 364defm : X86WriteRes<WriteVecMaskedStore64, [ICXPort78,ICXPort49,ICXPort0], 2, [1,1,1], 3>; 365defm : X86WriteRes<WriteVecMaskedStore64Y, [ICXPort78,ICXPort49,ICXPort0], 2, [1,1,1], 3>; 366defm : X86WriteRes<WriteVecMove, [ICXPort05], 1, [1], 1>; 367defm : X86WriteRes<WriteVecMoveX, [ICXPort015], 1, [1], 1>; 368defm : X86WriteRes<WriteVecMoveY, [ICXPort015], 1, [1], 1>; 369defm : X86WriteRes<WriteVecMoveZ, [ICXPort05], 1, [1], 1>; 370defm : X86WriteRes<WriteVecMoveToGpr, [ICXPort0], 2, [1], 1>; 371defm : X86WriteRes<WriteVecMoveFromGpr, [ICXPort5], 1, [1], 1>; 372 373defm : ICXWriteResPair<WriteVecALU, [ICXPort05], 1, [1], 1, 5>; // Vector integer ALU op, no logicals. 374defm : ICXWriteResPair<WriteVecALUX, [ICXPort01], 1, [1], 1, 6>; 375defm : ICXWriteResPair<WriteVecALUY, [ICXPort01], 1, [1], 1, 7>; 376defm : ICXWriteResPair<WriteVecALUZ, [ICXPort0], 1, [1], 1, 7>; 377defm : ICXWriteResPair<WriteVecLogic, [ICXPort05], 1, [1], 1, 5>; // Vector integer and/or/xor. 378defm : ICXWriteResPair<WriteVecLogicX,[ICXPort015], 1, [1], 1, 6>; 379defm : ICXWriteResPair<WriteVecLogicY,[ICXPort015], 1, [1], 1, 7>; 380defm : ICXWriteResPair<WriteVecLogicZ,[ICXPort05], 1, [1], 1, 7>; 381defm : ICXWriteResPair<WriteVecTest, [ICXPort0,ICXPort5], 3, [1,1], 2, 6>; // Vector integer TEST instructions. 382defm : ICXWriteResPair<WriteVecTestY, [ICXPort0,ICXPort5], 3, [1,1], 2, 7>; 383defm : ICXWriteResPair<WriteVecTestZ, [ICXPort0,ICXPort5], 3, [1,1], 2, 7>; 384defm : ICXWriteResPair<WriteVecIMul, [ICXPort0], 5, [1], 1, 5>; // Vector integer multiply. 385defm : ICXWriteResPair<WriteVecIMulX, [ICXPort01], 5, [1], 1, 6>; 386defm : ICXWriteResPair<WriteVecIMulY, [ICXPort01], 5, [1], 1, 7>; 387defm : ICXWriteResPair<WriteVecIMulZ, [ICXPort05], 5, [1], 1, 7>; 388defm : ICXWriteResPair<WritePMULLD, [ICXPort01], 10, [2], 2, 6>; // Vector PMULLD. 389defm : ICXWriteResPair<WritePMULLDY, [ICXPort01], 10, [2], 2, 7>; 390defm : ICXWriteResPair<WritePMULLDZ, [ICXPort05], 10, [2], 2, 7>; 391defm : ICXWriteResPair<WriteShuffle, [ICXPort5], 1, [1], 1, 5>; // Vector shuffles. 392defm : ICXWriteResPair<WriteShuffleX, [ICXPort15], 1, [1], 1, 6>; 393defm : ICXWriteResPair<WriteShuffleY, [ICXPort15], 1, [1], 1, 7>; 394defm : ICXWriteResPair<WriteShuffleZ, [ICXPort5], 1, [1], 1, 7>; 395defm : ICXWriteResPair<WriteVarShuffle, [ICXPort0,ICXPort5], 1, [1,1], 2, 5>; // Vector variable shuffles. 396defm : ICXWriteResPair<WriteVarShuffleX, [ICXPort15], 1, [1], 1, 6>; 397defm : ICXWriteResPair<WriteVarShuffleY, [ICXPort15], 1, [1], 1, 7>; 398defm : ICXWriteResPair<WriteVarShuffleZ, [ICXPort5], 1, [1], 1, 7>; 399defm : ICXWriteResPair<WriteBlend, [ICXPort15], 1, [1], 1, 6>; // Vector blends. 400defm : ICXWriteResPair<WriteBlendY,[ICXPort15], 1, [1], 1, 7>; 401defm : ICXWriteResPair<WriteBlendZ,[ICXPort15], 1, [1], 1, 7>; 402defm : ICXWriteResPair<WriteVarBlend, [ICXPort015], 2, [2], 2, 6>; // Vector variable blends. 403defm : ICXWriteResPair<WriteVarBlendY,[ICXPort015], 2, [2], 2, 6>; 404defm : ICXWriteResPair<WriteVarBlendZ,[ICXPort05], 2, [1], 1, 6>; 405defm : ICXWriteResPair<WriteMPSAD, [ICXPort15,ICXPort5], 4, [1,1], 2, 6>; // Vector MPSAD. 406defm : ICXWriteResPair<WriteMPSADY, [ICXPort15,ICXPort5], 4, [1,1], 2, 7>; 407defm : ICXWriteResPair<WriteMPSADZ, [ICXPort15,ICXPort5], 4, [1,1], 2, 7>; 408defm : ICXWriteResPair<WritePSADBW, [ICXPort5], 3, [1], 1, 5>; // Vector PSADBW. 409defm : ICXWriteResPair<WritePSADBWX, [ICXPort5], 3, [1], 1, 6>; 410defm : ICXWriteResPair<WritePSADBWY, [ICXPort5], 3, [1], 1, 7>; 411defm : ICXWriteResPair<WritePSADBWZ, [ICXPort5], 3, [1], 1, 7>; // TODO: 512-bit ops require ports 0/1 to be joined. 412defm : ICXWriteResPair<WritePHMINPOS, [ICXPort0], 4, [1], 1, 6>; // Vector PHMINPOS. 413 414// Vector integer shifts. 415defm : ICXWriteResPair<WriteVecShift, [ICXPort0], 1, [1], 1, 5>; 416defm : X86WriteRes<WriteVecShiftX, [ICXPort01,ICXPort15], 2, [1,1], 2>; 417defm : X86WriteRes<WriteVecShiftY, [ICXPort01,ICXPort5], 4, [1,1], 2>; 418defm : X86WriteRes<WriteVecShiftZ, [ICXPort0,ICXPort5], 4, [1,1], 2>; 419defm : X86WriteRes<WriteVecShiftXLd, [ICXPort01,ICXPort23], 7, [1,1], 2>; 420defm : X86WriteRes<WriteVecShiftYLd, [ICXPort01,ICXPort23], 8, [1,1], 2>; 421defm : X86WriteRes<WriteVecShiftZLd, [ICXPort0,ICXPort23], 8, [1,1], 2>; 422 423defm : ICXWriteResPair<WriteVecShiftImm, [ICXPort0], 1, [1], 1, 5>; 424defm : ICXWriteResPair<WriteVecShiftImmX, [ICXPort01], 1, [1], 1, 6>; // Vector integer immediate shifts. 425defm : ICXWriteResPair<WriteVecShiftImmY, [ICXPort01], 1, [1], 1, 7>; 426defm : ICXWriteResPair<WriteVecShiftImmZ, [ICXPort0], 1, [1], 1, 7>; 427defm : ICXWriteResPair<WriteVarVecShift, [ICXPort01], 1, [1], 1, 6>; // Variable vector shifts. 428defm : ICXWriteResPair<WriteVarVecShiftY, [ICXPort01], 1, [1], 1, 7>; 429defm : ICXWriteResPair<WriteVarVecShiftZ, [ICXPort0], 1, [1], 1, 7>; 430 431// Vector insert/extract operations. 432def : WriteRes<WriteVecInsert, [ICXPort15,ICXPort5]> { 433 let Latency = 2; 434 let NumMicroOps = 2; 435} 436def : WriteRes<WriteVecInsertLd, [ICXPort15,ICXPort23]> { 437 let Latency = 6; 438 let NumMicroOps = 2; 439} 440 441def : WriteRes<WriteVecExtract, [ICXPort0,ICXPort15]> { 442 let Latency = 3; 443 let NumMicroOps = 2; 444} 445def : WriteRes<WriteVecExtractSt, [ICXPort49,ICXPort15,ICXPort78]> { 446 let Latency = 2; 447 let NumMicroOps = 3; 448} 449 450// Conversion between integer and float. 451defm : ICXWriteResPair<WriteCvtSS2I, [ICXPort01], 6, [2], 2>; // Needs more work: DD vs DQ. 452defm : ICXWriteResPair<WriteCvtPS2I, [ICXPort01], 3>; 453defm : ICXWriteResPair<WriteCvtPS2IY, [ICXPort01], 3>; 454defm : ICXWriteResPair<WriteCvtPS2IZ, [ICXPort05], 3>; 455defm : ICXWriteResPair<WriteCvtSD2I, [ICXPort01], 6, [2], 2>; 456defm : ICXWriteResPair<WriteCvtPD2I, [ICXPort01], 3>; 457defm : ICXWriteResPair<WriteCvtPD2IY, [ICXPort01], 3>; 458defm : ICXWriteResPair<WriteCvtPD2IZ, [ICXPort05], 3>; 459 460defm : ICXWriteResPair<WriteCvtI2SS, [ICXPort1], 4>; 461defm : ICXWriteResPair<WriteCvtI2PS, [ICXPort01], 4>; 462defm : ICXWriteResPair<WriteCvtI2PSY, [ICXPort01], 4>; 463defm : ICXWriteResPair<WriteCvtI2PSZ, [ICXPort05], 4>; // Needs more work: DD vs DQ. 464defm : ICXWriteResPair<WriteCvtI2SD, [ICXPort1], 4>; 465defm : ICXWriteResPair<WriteCvtI2PD, [ICXPort01], 4>; 466defm : ICXWriteResPair<WriteCvtI2PDY, [ICXPort01], 4>; 467defm : ICXWriteResPair<WriteCvtI2PDZ, [ICXPort05], 4>; 468 469defm : ICXWriteResPair<WriteCvtSS2SD, [ICXPort1], 3>; 470defm : ICXWriteResPair<WriteCvtPS2PD, [ICXPort1], 3>; 471defm : ICXWriteResPair<WriteCvtPS2PDY, [ICXPort5,ICXPort01], 3, [1,1], 2>; 472defm : ICXWriteResPair<WriteCvtPS2PDZ, [ICXPort05], 3, [2], 2>; 473defm : ICXWriteResPair<WriteCvtSD2SS, [ICXPort5,ICXPort01], 5, [1,1], 2, 5>; 474defm : ICXWriteResPair<WriteCvtPD2PS, [ICXPort5,ICXPort01], 5, [1,1], 2, 6>; 475defm : ICXWriteResPair<WriteCvtPD2PSY, [ICXPort5,ICXPort01], 7, [1,1], 2, 7>; 476defm : ICXWriteResPair<WriteCvtPD2PSZ, [ICXPort5,ICXPort0], 7, [1,1], 2, 7>; 477 478defm : X86WriteRes<WriteCvtPH2PS, [ICXPort5,ICXPort01], 5, [1,1], 2>; 479defm : X86WriteRes<WriteCvtPH2PSY, [ICXPort5,ICXPort01], 7, [1,1], 2>; 480defm : X86WriteRes<WriteCvtPH2PSZ, [ICXPort5,ICXPort0], 7, [1,1], 2>; 481defm : X86WriteRes<WriteCvtPH2PSLd, [ICXPort23,ICXPort01], 9, [1,1], 2>; 482defm : X86WriteRes<WriteCvtPH2PSYLd, [ICXPort23,ICXPort01], 10, [1,1], 2>; 483defm : X86WriteRes<WriteCvtPH2PSZLd, [ICXPort23,ICXPort05], 10, [1,1], 2>; 484 485defm : X86WriteRes<WriteCvtPS2PH, [ICXPort5,ICXPort01], 5, [1,1], 2>; 486defm : X86WriteRes<WriteCvtPS2PHY, [ICXPort5,ICXPort01], 7, [1,1], 2>; 487defm : X86WriteRes<WriteCvtPS2PHZ, [ICXPort5,ICXPort05], 7, [1,1], 2>; 488defm : X86WriteRes<WriteCvtPS2PHSt, [ICXPort49,ICXPort5,ICXPort78,ICXPort01], 6, [1,1,1,1], 4>; 489defm : X86WriteRes<WriteCvtPS2PHYSt, [ICXPort49,ICXPort5,ICXPort78,ICXPort01], 8, [1,1,1,1], 4>; 490defm : X86WriteRes<WriteCvtPS2PHZSt, [ICXPort49,ICXPort5,ICXPort78,ICXPort05], 8, [1,1,1,1], 4>; 491 492// Strings instructions. 493 494// Packed Compare Implicit Length Strings, Return Mask 495def : WriteRes<WritePCmpIStrM, [ICXPort0]> { 496 let Latency = 10; 497 let NumMicroOps = 3; 498 let ReleaseAtCycles = [3]; 499} 500def : WriteRes<WritePCmpIStrMLd, [ICXPort0, ICXPort23]> { 501 let Latency = 16; 502 let NumMicroOps = 4; 503 let ReleaseAtCycles = [3,1]; 504} 505 506// Packed Compare Explicit Length Strings, Return Mask 507def : WriteRes<WritePCmpEStrM, [ICXPort0, ICXPort5, ICXPort015, ICXPort0156]> { 508 let Latency = 19; 509 let NumMicroOps = 9; 510 let ReleaseAtCycles = [4,3,1,1]; 511} 512def : WriteRes<WritePCmpEStrMLd, [ICXPort0, ICXPort5, ICXPort23, ICXPort015, ICXPort0156]> { 513 let Latency = 25; 514 let NumMicroOps = 10; 515 let ReleaseAtCycles = [4,3,1,1,1]; 516} 517 518// Packed Compare Implicit Length Strings, Return Index 519def : WriteRes<WritePCmpIStrI, [ICXPort0]> { 520 let Latency = 10; 521 let NumMicroOps = 3; 522 let ReleaseAtCycles = [3]; 523} 524def : WriteRes<WritePCmpIStrILd, [ICXPort0, ICXPort23]> { 525 let Latency = 16; 526 let NumMicroOps = 4; 527 let ReleaseAtCycles = [3,1]; 528} 529 530// Packed Compare Explicit Length Strings, Return Index 531def : WriteRes<WritePCmpEStrI, [ICXPort0,ICXPort5,ICXPort0156]> { 532 let Latency = 18; 533 let NumMicroOps = 8; 534 let ReleaseAtCycles = [4,3,1]; 535} 536def : WriteRes<WritePCmpEStrILd, [ICXPort0, ICXPort5, ICXPort23, ICXPort0156]> { 537 let Latency = 24; 538 let NumMicroOps = 9; 539 let ReleaseAtCycles = [4,3,1,1]; 540} 541 542// MOVMSK Instructions. 543def : WriteRes<WriteFMOVMSK, [ICXPort0]> { let Latency = 2; } 544def : WriteRes<WriteVecMOVMSK, [ICXPort0]> { let Latency = 2; } 545def : WriteRes<WriteVecMOVMSKY, [ICXPort0]> { let Latency = 2; } 546def : WriteRes<WriteMMXMOVMSK, [ICXPort0]> { let Latency = 2; } 547 548// AES instructions. 549def : WriteRes<WriteAESDecEnc, [ICXPort0]> { // Decryption, encryption. 550 let Latency = 4; 551 let NumMicroOps = 1; 552 let ReleaseAtCycles = [1]; 553} 554def : WriteRes<WriteAESDecEncLd, [ICXPort0, ICXPort23]> { 555 let Latency = 10; 556 let NumMicroOps = 2; 557 let ReleaseAtCycles = [1,1]; 558} 559 560def : WriteRes<WriteAESIMC, [ICXPort0]> { // InvMixColumn. 561 let Latency = 8; 562 let NumMicroOps = 2; 563 let ReleaseAtCycles = [2]; 564} 565def : WriteRes<WriteAESIMCLd, [ICXPort0, ICXPort23]> { 566 let Latency = 14; 567 let NumMicroOps = 3; 568 let ReleaseAtCycles = [2,1]; 569} 570 571def : WriteRes<WriteAESKeyGen, [ICXPort0,ICXPort5,ICXPort015]> { // Key Generation. 572 let Latency = 20; 573 let NumMicroOps = 11; 574 let ReleaseAtCycles = [3,6,2]; 575} 576def : WriteRes<WriteAESKeyGenLd, [ICXPort0,ICXPort5,ICXPort23,ICXPort015]> { 577 let Latency = 25; 578 let NumMicroOps = 11; 579 let ReleaseAtCycles = [3,6,1,1]; 580} 581 582// Carry-less multiplication instructions. 583def : WriteRes<WriteCLMul, [ICXPort5]> { 584 let Latency = 6; 585 let NumMicroOps = 1; 586 let ReleaseAtCycles = [1]; 587} 588def : WriteRes<WriteCLMulLd, [ICXPort5, ICXPort23]> { 589 let Latency = 12; 590 let NumMicroOps = 2; 591 let ReleaseAtCycles = [1,1]; 592} 593 594// Catch-all for expensive system instructions. 595def : WriteRes<WriteSystem, [ICXPort0156]> { let Latency = 100; } // def WriteSystem : SchedWrite; 596 597// AVX2. 598defm : ICXWriteResPair<WriteFShuffle256, [ICXPort5], 3, [1], 1, 7>; // Fp 256-bit width vector shuffles. 599defm : ICXWriteResPair<WriteFVarShuffle256, [ICXPort5], 3, [1], 1, 7>; // Fp 256-bit width vector variable shuffles. 600defm : ICXWriteResPair<WriteShuffle256, [ICXPort5], 3, [1], 1, 7>; // 256-bit width vector shuffles. 601defm : ICXWriteResPair<WriteVPMOV256, [ICXPort5], 3, [1], 1, 7>; // 256-bit width packed vector width-changing move. 602defm : ICXWriteResPair<WriteVarShuffle256, [ICXPort5], 3, [1], 1, 7>; // 256-bit width vector variable shuffles. 603 604// Old microcoded instructions that nobody use. 605def : WriteRes<WriteMicrocoded, [ICXPort0156]> { let Latency = 100; } // def WriteMicrocoded : SchedWrite; 606 607// Fence instructions. 608def : WriteRes<WriteFence, [ICXPort78, ICXPort49]> { let NumMicroOps = 2; let ReleaseAtCycles = [1,1]; } 609 610// Load/store MXCSR. 611def : WriteRes<WriteLDMXCSR, [ICXPort0,ICXPort23,ICXPort0156]> { let Latency = 7; let NumMicroOps = 3; let ReleaseAtCycles = [1,1,1]; } 612def : WriteRes<WriteSTMXCSR, [ICXPort49,ICXPort5,ICXPort78]> { let Latency = 2; let NumMicroOps = 3; let ReleaseAtCycles = [1,1,1]; } 613 614// Nop, not very useful expect it provides a model for nops! 615def : WriteRes<WriteNop, []>; 616 617//////////////////////////////////////////////////////////////////////////////// 618// Horizontal add/sub instructions. 619//////////////////////////////////////////////////////////////////////////////// 620 621defm : ICXWriteResPair<WriteFHAdd, [ICXPort5,ICXPort01], 6, [2,1], 3, 6>; 622defm : ICXWriteResPair<WriteFHAddY, [ICXPort5,ICXPort01], 6, [2,1], 3, 7>; 623defm : ICXWriteResPair<WritePHAdd, [ICXPort5,ICXPort05], 3, [2,1], 3, 5>; 624defm : ICXWriteResPair<WritePHAddX, [ICXPort15,ICXPort015], 3, [2,1], 3, 6>; 625defm : ICXWriteResPair<WritePHAddY, [ICXPort15,ICXPort015], 3, [2,1], 3, 7>; 626 627// Remaining instrs. 628 629def ICXWriteResGroup1 : SchedWriteRes<[ICXPort0]> { 630 let Latency = 1; 631 let NumMicroOps = 1; 632 let ReleaseAtCycles = [1]; 633} 634def: InstRW<[ICXWriteResGroup1], (instregex "KAND(B|D|Q|W)kk", 635 "KANDN(B|D|Q|W)kk", 636 "KMOV(B|D|Q|W)kk", 637 "KNOT(B|D|Q|W)kk", 638 "KOR(B|D|Q|W)kk", 639 "KXNOR(B|D|Q|W)kk", 640 "KXOR(B|D|Q|W)kk", 641 "KSET0(B|D|Q|W)", // Same as KXOR 642 "KSET1(B|D|Q|W)", // Same as KXNOR 643 "MMX_PADDS(B|W)rr", 644 "MMX_PADDUS(B|W)rr", 645 "MMX_PAVG(B|W)rr", 646 "MMX_PCMPEQ(B|D|W)rr", 647 "MMX_PCMPGT(B|D|W)rr", 648 "MMX_P(MAX|MIN)SWrr", 649 "MMX_P(MAX|MIN)UBrr", 650 "MMX_PSUBS(B|W)rr", 651 "MMX_PSUBUS(B|W)rr", 652 "VPMOVB2M(Z|Z128|Z256)kr", 653 "VPMOVD2M(Z|Z128|Z256)kr", 654 "VPMOVQ2M(Z|Z128|Z256)kr", 655 "VPMOVW2M(Z|Z128|Z256)kr")>; 656 657def ICXWriteResGroup3 : SchedWriteRes<[ICXPort5]> { 658 let Latency = 1; 659 let NumMicroOps = 1; 660 let ReleaseAtCycles = [1]; 661} 662def: InstRW<[ICXWriteResGroup3], (instregex "COM(P?)_FST0r", 663 "KMOV(B|D|Q|W)kr", 664 "UCOM_F(P?)r", 665 "VPBROADCAST(D|Q)rr", 666 "(V?)INSERTPS(Z?)rr", 667 "(V?)MOV(HL|LH)PS(Z?)rr", 668 "(V?)MOVDDUP(Y|Z128|Z256)?rr", 669 "(V?)PALIGNR(Y|Z128|Z256)?rri", 670 "(V?)PERMIL(PD|PS)(Y|Z128|Z256)?ri", 671 "(V?)PERMIL(PD|PS)(Y|Z128|Z256)?rr", 672 "(V?)UNPCK(L|H)(PD|PS)(Y|Z128|Z256)?rr")>; 673 674def ICXWriteResGroup4 : SchedWriteRes<[ICXPort6]> { 675 let Latency = 1; 676 let NumMicroOps = 1; 677 let ReleaseAtCycles = [1]; 678} 679def: InstRW<[ICXWriteResGroup4], (instregex "JMP(16|32|64)r")>; 680 681def ICXWriteResGroup6 : SchedWriteRes<[ICXPort05]> { 682 let Latency = 1; 683 let NumMicroOps = 1; 684 let ReleaseAtCycles = [1]; 685} 686def: InstRW<[ICXWriteResGroup6], (instrs FINCSTP, FNOP)>; 687 688def ICXWriteResGroup7 : SchedWriteRes<[ICXPort06]> { 689 let Latency = 1; 690 let NumMicroOps = 1; 691 let ReleaseAtCycles = [1]; 692} 693def: InstRW<[ICXWriteResGroup7], (instrs CDQ, CQO, CLAC, STAC)>; 694 695def ICXWriteResGroup8 : SchedWriteRes<[ICXPort15]> { 696 let Latency = 1; 697 let NumMicroOps = 1; 698 let ReleaseAtCycles = [1]; 699} 700def: InstRW<[ICXWriteResGroup8], (instregex "ANDN(32|64)rr")>; 701 702def ICXWriteResGroup9 : SchedWriteRes<[ICXPort015]> { 703 let Latency = 1; 704 let NumMicroOps = 1; 705 let ReleaseAtCycles = [1]; 706} 707def: InstRW<[ICXWriteResGroup9], (instregex "VBLENDMPD(Z128|Z256)rr", 708 "VBLENDMPS(Z128|Z256)rr", 709 "VPADD(B|D|Q|W)(Y|Z|Z128|Z256)rr", 710 "(V?)PADD(B|D|Q|W)rr", 711 "(V?)MOV(SD|SS)(Z?)rr", 712 "VPBLENDD(Y?)rri", 713 "VPBLENDMB(Z128|Z256)rr", 714 "VPBLENDMD(Z128|Z256)rr", 715 "VPBLENDMQ(Z128|Z256)rr", 716 "VPBLENDMW(Z128|Z256)rr", 717 "VPSUB(B|D|Q|W)(Y|Z|Z128|Z256)rrk", 718 "VPTERNLOGD(Z|Z128|Z256)rri", 719 "VPTERNLOGQ(Z|Z128|Z256)rri")>; 720 721def ICXWriteResGroup10 : SchedWriteRes<[ICXPort0156]> { 722 let Latency = 1; 723 let NumMicroOps = 1; 724 let ReleaseAtCycles = [1]; 725} 726def: InstRW<[ICXWriteResGroup10], (instrs SGDT64m, 727 SIDT64m, 728 SMSW16m, 729 STRm, 730 SYSCALL)>; 731 732def ICXWriteResGroup11 : SchedWriteRes<[ICXPort49,ICXPort78]> { 733 let Latency = 1; 734 let NumMicroOps = 2; 735 let ReleaseAtCycles = [1,1]; 736} 737def: InstRW<[ICXWriteResGroup11], (instrs FBSTPm, VMPTRSTm)>; 738def: InstRW<[ICXWriteResGroup11], (instregex "KMOV(B|D|Q|W)mk", 739 "ST_FP(32|64|80)m")>; 740 741def ICXWriteResGroup13 : SchedWriteRes<[ICXPort5]> { 742 let Latency = 2; 743 let NumMicroOps = 2; 744 let ReleaseAtCycles = [2]; 745} 746def: InstRW<[ICXWriteResGroup13], (instrs MMX_MOVQ2DQrr)>; 747 748def ICXWriteResGroup14 : SchedWriteRes<[ICXPort05]> { 749 let Latency = 2; 750 let NumMicroOps = 2; 751 let ReleaseAtCycles = [2]; 752} 753def: InstRW<[ICXWriteResGroup14], (instrs FDECSTP, 754 MMX_MOVDQ2Qrr)>; 755 756def ICXWriteResGroup17 : SchedWriteRes<[ICXPort0156]> { 757 let Latency = 2; 758 let NumMicroOps = 2; 759 let ReleaseAtCycles = [2]; 760} 761def: InstRW<[ICXWriteResGroup17], (instrs LFENCE, 762 WAIT, 763 XGETBV)>; 764 765def ICXWriteResGroup20 : SchedWriteRes<[ICXPort6,ICXPort0156]> { 766 let Latency = 2; 767 let NumMicroOps = 2; 768 let ReleaseAtCycles = [1,1]; 769} 770def: InstRW<[ICXWriteResGroup20], (instregex "CLFLUSH")>; 771 772def ICXWriteResGroup23 : SchedWriteRes<[ICXPort06,ICXPort0156]> { 773 let Latency = 2; 774 let NumMicroOps = 2; 775 let ReleaseAtCycles = [1,1]; 776} 777def: InstRW<[ICXWriteResGroup23], (instrs CWD, 778 JCXZ, JECXZ, JRCXZ, 779 ADC8i8, SBB8i8, 780 ADC16i16, SBB16i16, 781 ADC32i32, SBB32i32, 782 ADC64i32, SBB64i32)>; 783 784def ICXWriteResGroup25 : SchedWriteRes<[ICXPort49,ICXPort6,ICXPort78]> { 785 let Latency = 2; 786 let NumMicroOps = 3; 787 let ReleaseAtCycles = [1,1,1]; 788} 789def: InstRW<[ICXWriteResGroup25], (instrs FNSTCW16m)>; 790 791def ICXWriteResGroup27 : SchedWriteRes<[ICXPort49,ICXPort78,ICXPort15]> { 792 let Latency = 2; 793 let NumMicroOps = 3; 794 let ReleaseAtCycles = [1,1,1]; 795} 796def: InstRW<[ICXWriteResGroup27], (instregex "MOVBE(16|32|64)mr")>; 797 798def ICXWriteResGroup28 : SchedWriteRes<[ICXPort49,ICXPort78,ICXPort0156]> { 799 let Latency = 2; 800 let NumMicroOps = 3; 801 let ReleaseAtCycles = [1,1,1]; 802} 803def: InstRW<[ICXWriteResGroup28], (instrs PUSH16r, PUSH32r, PUSH64r, PUSH64i8, 804 STOSB, STOSL, STOSQ, STOSW)>; 805def: InstRW<[ICXWriteResGroup28], (instregex "PUSH(16|32|64)rmr")>; 806 807def ICXWriteResGroup29 : SchedWriteRes<[ICXPort49,ICXPort78,ICXPort15]> { 808 let Latency = 2; 809 let NumMicroOps = 5; 810 let ReleaseAtCycles = [2,2,1]; 811} 812def: InstRW<[ICXWriteResGroup29], (instregex "VMOVDQU8Zmr(b?)")>; 813 814def ICXWriteResGroup30 : SchedWriteRes<[ICXPort0]> { 815 let Latency = 3; 816 let NumMicroOps = 1; 817 let ReleaseAtCycles = [1]; 818} 819def: InstRW<[ICXWriteResGroup30], (instregex "KMOV(B|D|Q|W)rk", 820 "KORTEST(B|D|Q|W)kk", 821 "KTEST(B|D|Q|W)kk")>; 822 823def ICXWriteResGroup31 : SchedWriteRes<[ICXPort1]> { 824 let Latency = 3; 825 let NumMicroOps = 1; 826 let ReleaseAtCycles = [1]; 827} 828def: InstRW<[ICXWriteResGroup31], (instregex "PDEP(32|64)rr", 829 "PEXT(32|64)rr")>; 830 831def ICXWriteResGroup32 : SchedWriteRes<[ICXPort5]> { 832 let Latency = 3; 833 let NumMicroOps = 1; 834 let ReleaseAtCycles = [1]; 835} 836def: InstRW<[ICXWriteResGroup32], (instregex "(ADD|SUB|SUBR)_(FPrST0|FST0r|FrST0)", 837 "VALIGND(Z|Z128|Z256)rri", 838 "VALIGNQ(Z|Z128|Z256)rri", 839 "VPBROADCAST(B|W)rr", 840 "(V?)PACK(U|S)S(DW|WB)(Y|Z|Z128|Z256)?rr", 841 "VP(MAX|MIN)(S|U)Q(Z|Z128|Z256)rr")>; 842 843def ICXWriteResGroup33 : SchedWriteRes<[ICXPort5]> { 844 let Latency = 4; 845 let NumMicroOps = 1; 846 let ReleaseAtCycles = [1]; 847} 848def: InstRW<[ICXWriteResGroup33], (instregex "KADD(B|D|Q|W)kk", 849 "KSHIFTL(B|D|Q|W)ki", 850 "KSHIFTR(B|D|Q|W)ki", 851 "KUNPCK(BW|DQ|WD)kk", 852 "VCMPPD(Z|Z128|Z256)rri", 853 "VCMPPS(Z|Z128|Z256)rri", 854 "VCMP(SD|SS)Zrr", 855 "VFPCLASS(PD|PS)(Z|Z128|Z256)ri", 856 "VFPCLASS(SD|SS)Zri", 857 "VPCMPB(Z|Z128|Z256)rri", 858 "VPCMPD(Z|Z128|Z256)rri", 859 "VPCMPEQ(B|D|Q|W)(Z|Z128|Z256)rr", 860 "VPCMPGT(B|D|Q|W)(Z|Z128|Z256)rr", 861 "VPCMPQ(Z|Z128|Z256)rri", 862 "VPCMPU(B|D|Q|W)(Z|Z128|Z256)rri", 863 "VPCMPW(Z|Z128|Z256)rri", 864 "VPTEST(N?)M(B|D|Q|W)(Z|Z128|Z256)rr")>; 865 866def ICXWriteResGroup34 : SchedWriteRes<[ICXPort0,ICXPort0156]> { 867 let Latency = 3; 868 let NumMicroOps = 2; 869 let ReleaseAtCycles = [1,1]; 870} 871def: InstRW<[ICXWriteResGroup34], (instrs FNSTSW16r)>; 872 873def ICXWriteResGroup36 : SchedWriteRes<[ICXPort0,ICXPort5]> { 874 let Latency = 3; 875 let NumMicroOps = 2; 876 let ReleaseAtCycles = [1,1]; 877} 878def: InstRW<[ICXWriteResGroup36], (instregex "(V?)EXTRACTPS(Z?)rri")>; 879 880def ICXWriteResGroup37 : SchedWriteRes<[ICXPort0,ICXPort5]> { 881 let Latency = 3; 882 let NumMicroOps = 3; 883 let ReleaseAtCycles = [1,2]; 884} 885def: InstRW<[ICXWriteResGroup37], (instregex "MMX_PH(ADD|SUB)SWrr")>; 886 887def ICXWriteResGroup38 : SchedWriteRes<[ICXPort15,ICXPort01]> { 888 let Latency = 3; 889 let NumMicroOps = 3; 890 let ReleaseAtCycles = [2,1]; 891} 892def: InstRW<[ICXWriteResGroup38], (instregex "(V?)PH(ADD|SUB)SW(Y?)rr")>; 893 894def ICXWriteResGroup41 : SchedWriteRes<[ICXPort5]> { 895 let Latency = 4; 896 let NumMicroOps = 2; 897 let ReleaseAtCycles = [2]; 898} 899def: InstRW<[ICXWriteResGroup41], (instrs MMX_PACKSSDWrr, 900 MMX_PACKSSWBrr, 901 MMX_PACKUSWBrr)>; 902 903def ICXWriteResGroup42 : SchedWriteRes<[ICXPort6,ICXPort0156]> { 904 let Latency = 3; 905 let NumMicroOps = 3; 906 let ReleaseAtCycles = [1,2]; 907} 908def: InstRW<[ICXWriteResGroup42], (instregex "CLD")>; 909 910def ICXWriteResGroup44 : SchedWriteRes<[ICXPort06,ICXPort0156]> { 911 let Latency = 2; 912 let NumMicroOps = 3; 913 let ReleaseAtCycles = [1,2]; 914} 915def: InstRW<[ICXWriteResGroup44], (instrs RCL8r1, RCL16r1, RCL32r1, RCL64r1, 916 RCR8r1, RCR16r1, RCR32r1, RCR64r1)>; 917 918def ICXWriteResGroup44b : SchedWriteRes<[ICXPort1,ICXPort06,ICXPort0156]> { 919 let Latency = 5; 920 let NumMicroOps = 7; 921 let ReleaseAtCycles = [2,3,2]; 922} 923def: InstRW<[ICXWriteResGroup44b], (instrs RCR8ri, RCR16ri, RCR32ri, RCR64ri)>; 924 925def ICXWriteResGroup44c : SchedWriteRes<[ICXPort1,ICXPort06,ICXPort0156]> { 926 let Latency = 6; 927 let NumMicroOps = 7; 928 let ReleaseAtCycles = [2,3,2]; 929} 930def: InstRW<[ICXWriteResGroup44c], (instrs RCL8ri, RCL16ri, RCL32ri, RCL64ri)>; 931 932def ICXWriteResGroup45 : SchedWriteRes<[ICXPort0,ICXPort49,ICXPort78]> { 933 let Latency = 3; 934 let NumMicroOps = 3; 935 let ReleaseAtCycles = [1,1,1]; 936} 937def: InstRW<[ICXWriteResGroup45], (instrs FNSTSWm)>; 938 939def ICXWriteResGroup47 : SchedWriteRes<[ICXPort49,ICXPort6,ICXPort78,ICXPort0156]> { 940 let Latency = 3; 941 let NumMicroOps = 4; 942 let ReleaseAtCycles = [1,1,1,1]; 943} 944def: InstRW<[ICXWriteResGroup47], (instregex "CALL(16|32|64)r")>; 945 946def ICXWriteResGroup48 : SchedWriteRes<[ICXPort49,ICXPort78,ICXPort06,ICXPort0156]> { 947 let Latency = 3; 948 let NumMicroOps = 4; 949 let ReleaseAtCycles = [1,1,1,1]; 950} 951def: InstRW<[ICXWriteResGroup48], (instrs CALL64pcrel32)>; 952 953def ICXWriteResGroup49 : SchedWriteRes<[ICXPort0]> { 954 let Latency = 4; 955 let NumMicroOps = 1; 956 let ReleaseAtCycles = [1]; 957} 958def: InstRW<[ICXWriteResGroup49], (instregex "MUL_(FPrST0|FST0r|FrST0)")>; 959 960def ICXWriteResGroup50 : SchedWriteRes<[ICXPort01]> { 961 let Latency = 4; 962 let NumMicroOps = 1; 963 let ReleaseAtCycles = [1]; 964} 965def: InstRW<[ICXWriteResGroup50], (instregex "VCVTPD2QQ(Z128|Z256)rr", 966 "VCVTPD2UQQ(Z128|Z256)rr", 967 "VCVTPS2DQ(Y|Z128|Z256)rr", 968 "(V?)CVTPS2DQrr", 969 "VCVTPS2UDQ(Z128|Z256)rr", 970 "VCVTTPD2QQ(Z128|Z256)rr", 971 "VCVTTPD2UQQ(Z128|Z256)rr", 972 "VCVTTPS2DQ(Z128|Z256)rr", 973 "(V?)CVTTPS2DQrr", 974 "VCVTTPS2UDQ(Z128|Z256)rr")>; 975 976def ICXWriteResGroup50z : SchedWriteRes<[ICXPort05]> { 977 let Latency = 4; 978 let NumMicroOps = 1; 979 let ReleaseAtCycles = [1]; 980} 981def: InstRW<[ICXWriteResGroup50z], (instrs VCVTPD2QQZrr, 982 VCVTPD2UQQZrr, 983 VCVTPS2DQZrr, 984 VCVTPS2UDQZrr, 985 VCVTTPD2QQZrr, 986 VCVTTPD2UQQZrr, 987 VCVTTPS2DQZrr, 988 VCVTTPS2UDQZrr)>; 989 990def ICXWriteResGroup51 : SchedWriteRes<[ICXPort5]> { 991 let Latency = 4; 992 let NumMicroOps = 2; 993 let ReleaseAtCycles = [2]; 994} 995def: InstRW<[ICXWriteResGroup51], (instregex "VEXPANDPD(Z|Z128|Z256)rr", 996 "VEXPANDPS(Z|Z128|Z256)rr", 997 "VPEXPANDD(Z|Z128|Z256)rr", 998 "VPEXPANDQ(Z|Z128|Z256)rr", 999 "VPMOVDB(Z|Z128|Z256)rr", 1000 "VPMOVDW(Z|Z128|Z256)rr", 1001 "VPMOVQB(Z|Z128|Z256)rr", 1002 "VPMOVQW(Z|Z128|Z256)rr", 1003 "VPMOVSDB(Z|Z128|Z256)rr", 1004 "VPMOVSDW(Z|Z128|Z256)rr", 1005 "VPMOVSQB(Z|Z128|Z256)rr", 1006 "VPMOVSQD(Z|Z128|Z256)rr", 1007 "VPMOVSQW(Z|Z128|Z256)rr", 1008 "VPMOVSWB(Z|Z128|Z256)rr", 1009 "VPMOVUSDB(Z|Z128|Z256)rr", 1010 "VPMOVUSDW(Z|Z128|Z256)rr", 1011 "VPMOVUSQB(Z|Z128|Z256)rr", 1012 "VPMOVUSQD(Z|Z128|Z256)rr", 1013 "VPMOVUSWB(Z|Z128|Z256)rr", 1014 "VPMOVWB(Z|Z128|Z256)rr")>; 1015 1016def ICXWriteResGroup53 : SchedWriteRes<[ICXPort49,ICXPort5,ICXPort78]> { 1017 let Latency = 2; 1018 let NumMicroOps = 3; 1019 let ReleaseAtCycles = [1,1,1]; 1020} 1021def: InstRW<[ICXWriteResGroup53], (instregex "(V?)EXTRACTPS(Z?)mri")>; 1022 1023def ICXWriteResGroup54 : SchedWriteRes<[ICXPort49,ICXPort5,ICXPort78]> { 1024 let Latency = 4; 1025 let NumMicroOps = 3; 1026 let ReleaseAtCycles = [1,1,1]; 1027} 1028def: InstRW<[ICXWriteResGroup54], (instregex "IST(T?)_FP(16|32|64)m", 1029 "IST_F(16|32)m", 1030 "VPMOVQD(Z|Z128|Z256)mr(b?)")>; 1031 1032def ICXWriteResGroup55 : SchedWriteRes<[ICXPort0156]> { 1033 let Latency = 4; 1034 let NumMicroOps = 4; 1035 let ReleaseAtCycles = [4]; 1036} 1037def: InstRW<[ICXWriteResGroup55], (instrs FNCLEX)>; 1038 1039def ICXWriteResGroup56 : SchedWriteRes<[]> { 1040 let Latency = 0; 1041 let NumMicroOps = 4; 1042 let ReleaseAtCycles = []; 1043} 1044def: InstRW<[ICXWriteResGroup56], (instrs VZEROUPPER)>; 1045 1046def ICXWriteResGroup57 : SchedWriteRes<[ICXPort1,ICXPort6,ICXPort0156]> { 1047 let Latency = 4; 1048 let NumMicroOps = 4; 1049 let ReleaseAtCycles = [1,1,2]; 1050} 1051def: InstRW<[ICXWriteResGroup57], (instregex "LAR(16|32|64)rr")>; 1052 1053def ICXWriteResGroup61 : SchedWriteRes<[ICXPort5,ICXPort01]> { 1054 let Latency = 5; 1055 let NumMicroOps = 2; 1056 let ReleaseAtCycles = [1,1]; 1057} 1058def: InstRW<[ICXWriteResGroup61], (instregex "MMX_CVT(T?)PD2PIrr", 1059 "MMX_CVT(T?)PS2PIrr", 1060 "VCVTDQ2PDZ128rr", 1061 "VCVTPD2DQZ128rr", 1062 "(V?)CVT(T?)PD2DQrr", 1063 "VCVTPD2UDQZ128rr", 1064 "VCVTPS2PDZ128rr", 1065 "(V?)CVTPS2PDrr", 1066 "VCVTPS2QQZ128rr", 1067 "VCVTPS2UQQZ128rr", 1068 "VCVTQQ2PSZ128rr", 1069 "(V?)CVTSI(64)?2SDrr", 1070 "VCVTSI2SSZrr", 1071 "(V?)CVTSI2SSrr", 1072 "VCVTSI(64)?2SDZrr", 1073 "VCVTSS2SDZrr", 1074 "(V?)CVTSS2SDrr", 1075 "VCVTTPD2DQZ128rr", 1076 "VCVTTPD2UDQZ128rr", 1077 "VCVTTPS2QQZ128rr", 1078 "VCVTTPS2UQQZ128rr", 1079 "VCVTUDQ2PDZ128rr", 1080 "VCVTUQQ2PSZ128rr", 1081 "VCVTUSI2SSZrr", 1082 "VCVTUSI(64)?2SDZrr")>; 1083 1084def ICXWriteResGroup62 : SchedWriteRes<[ICXPort5,ICXPort015]> { 1085 let Latency = 5; 1086 let NumMicroOps = 3; 1087 let ReleaseAtCycles = [2,1]; 1088} 1089def: InstRW<[ICXWriteResGroup62], (instregex "VPCONFLICTQZ128rr")>; 1090 1091def ICXWriteResGroup63 : SchedWriteRes<[ICXPort1,ICXPort6,ICXPort06]> { 1092 let Latency = 5; 1093 let NumMicroOps = 3; 1094 let ReleaseAtCycles = [1,1,1]; 1095} 1096def: InstRW<[ICXWriteResGroup63], (instregex "STR(16|32|64)r")>; 1097 1098def ICXWriteResGroup65 : SchedWriteRes<[ICXPort49,ICXPort78,ICXPort01]> { 1099 let Latency = 5; 1100 let NumMicroOps = 3; 1101 let ReleaseAtCycles = [1,1,1]; 1102} 1103def: InstRW<[ICXWriteResGroup65], (instregex "VCVTPS2PHZ128mr(b?)", 1104 "VCVTPS2PHZ256mr(b?)", 1105 "VCVTPS2PHZmr(b?)")>; 1106 1107def ICXWriteResGroup66 : SchedWriteRes<[ICXPort49,ICXPort5,ICXPort78]> { 1108 let Latency = 5; 1109 let NumMicroOps = 4; 1110 let ReleaseAtCycles = [1,2,1]; 1111} 1112def: InstRW<[ICXWriteResGroup66], (instregex "VPMOVDB(Z|Z128|Z256)mr(b?)", 1113 "VPMOVDW(Z|Z128|Z256)mr(b?)", 1114 "VPMOVQB(Z|Z128|Z256)mr(b?)", 1115 "VPMOVQW(Z|Z128|Z256)mr(b?)", 1116 "VPMOVSDB(Z|Z128|Z256)mr(b?)", 1117 "VPMOVSDW(Z|Z128|Z256)mr(b?)", 1118 "VPMOVSQB(Z|Z128|Z256)mr(b?)", 1119 "VPMOVSQD(Z|Z128|Z256)mr(b?)", 1120 "VPMOVSQW(Z|Z128|Z256)mr(b?)", 1121 "VPMOVSWB(Z|Z128|Z256)mr(b?)", 1122 "VPMOVUSDB(Z|Z128|Z256)mr(b?)", 1123 "VPMOVUSDW(Z|Z128|Z256)mr(b?)", 1124 "VPMOVUSQB(Z|Z128|Z256)mr(b?)", 1125 "VPMOVUSQD(Z|Z128|Z256)mr(b?)", 1126 "VPMOVUSQW(Z|Z128|Z256)mr(b?)", 1127 "VPMOVUSWB(Z|Z128|Z256)mr(b?)", 1128 "VPMOVWB(Z|Z128|Z256)mr(b?)")>; 1129 1130def ICXWriteResGroup67 : SchedWriteRes<[ICXPort06,ICXPort0156]> { 1131 let Latency = 5; 1132 let NumMicroOps = 5; 1133 let ReleaseAtCycles = [1,4]; 1134} 1135def: InstRW<[ICXWriteResGroup67], (instrs XSETBV)>; 1136 1137def ICXWriteResGroup69 : SchedWriteRes<[ICXPort49,ICXPort78,ICXPort0156]> { 1138 let Latency = 5; 1139 let NumMicroOps = 6; 1140 let ReleaseAtCycles = [1,1,4]; 1141} 1142def: InstRW<[ICXWriteResGroup69], (instregex "PUSHF(16|64)")>; 1143 1144def ICXWriteResGroup71 : SchedWriteRes<[ICXPort23]> { 1145 let Latency = 6; 1146 let NumMicroOps = 1; 1147 let ReleaseAtCycles = [1]; 1148} 1149def: InstRW<[ICXWriteResGroup71], (instrs VBROADCASTSSrm, 1150 VPBROADCASTDrm, 1151 VPBROADCASTQrm, 1152 VMOVSHDUPrm, 1153 VMOVSLDUPrm, 1154 VMOVDDUPrm, 1155 MOVSHDUPrm, 1156 MOVSLDUPrm, 1157 MOVDDUPrm)>; 1158 1159def ICXWriteResGroup72 : SchedWriteRes<[ICXPort5]> { 1160 let Latency = 6; 1161 let NumMicroOps = 2; 1162 let ReleaseAtCycles = [2]; 1163} 1164def: InstRW<[ICXWriteResGroup72], (instrs MMX_CVTPI2PSrr)>; 1165def: InstRW<[ICXWriteResGroup72], (instregex "VCOMPRESSPD(Z|Z128|Z256)rr", 1166 "VCOMPRESSPS(Z|Z128|Z256)rr", 1167 "VPCOMPRESSD(Z|Z128|Z256)rr", 1168 "VPCOMPRESSQ(Z|Z128|Z256)rr", 1169 "VPERMW(Z|Z128|Z256)rr")>; 1170 1171def ICXWriteResGroup73 : SchedWriteRes<[ICXPort0,ICXPort23]> { 1172 let Latency = 6; 1173 let NumMicroOps = 2; 1174 let ReleaseAtCycles = [1,1]; 1175} 1176def: InstRW<[ICXWriteResGroup73], (instrs MMX_PADDSBrm, 1177 MMX_PADDSWrm, 1178 MMX_PADDUSBrm, 1179 MMX_PADDUSWrm, 1180 MMX_PAVGBrm, 1181 MMX_PAVGWrm, 1182 MMX_PCMPEQBrm, 1183 MMX_PCMPEQDrm, 1184 MMX_PCMPEQWrm, 1185 MMX_PCMPGTBrm, 1186 MMX_PCMPGTDrm, 1187 MMX_PCMPGTWrm, 1188 MMX_PMAXSWrm, 1189 MMX_PMAXUBrm, 1190 MMX_PMINSWrm, 1191 MMX_PMINUBrm, 1192 MMX_PSUBSBrm, 1193 MMX_PSUBSWrm, 1194 MMX_PSUBUSBrm, 1195 MMX_PSUBUSWrm)>; 1196 1197def ICXWriteResGroup76 : SchedWriteRes<[ICXPort6,ICXPort23]> { 1198 let Latency = 6; 1199 let NumMicroOps = 2; 1200 let ReleaseAtCycles = [1,1]; 1201} 1202def: InstRW<[ICXWriteResGroup76], (instrs FARJMP64m)>; 1203def: InstRW<[ICXWriteResGroup76], (instregex "JMP(16|32|64)m")>; 1204 1205def ICXWriteResGroup79 : SchedWriteRes<[ICXPort23,ICXPort15]> { 1206 let Latency = 6; 1207 let NumMicroOps = 2; 1208 let ReleaseAtCycles = [1,1]; 1209} 1210def: InstRW<[ICXWriteResGroup79], (instregex "ANDN(32|64)rm", 1211 "MOVBE(16|32|64)rm")>; 1212 1213def ICXWriteResGroup80 : SchedWriteRes<[ICXPort23,ICXPort015]> { 1214 let Latency = 6; 1215 let NumMicroOps = 2; 1216 let ReleaseAtCycles = [1,1]; 1217} 1218def: InstRW<[ICXWriteResGroup80], (instregex "VMOV(64to|QI2)PQIZrm(b?)")>; 1219def: InstRW<[ICXWriteResGroup80], (instrs VMOVDI2PDIZrm)>; 1220 1221def ICXWriteResGroup81 : SchedWriteRes<[ICXPort23,ICXPort0156]> { 1222 let Latency = 6; 1223 let NumMicroOps = 2; 1224 let ReleaseAtCycles = [1,1]; 1225} 1226def: InstRW<[ICXWriteResGroup81], (instrs POP16r, POP32r, POP64r)>; 1227def: InstRW<[ICXWriteResGroup81], (instregex "POP(16|32|64)rmr")>; 1228 1229def ICXWriteResGroup82 : SchedWriteRes<[ICXPort5,ICXPort01]> { 1230 let Latency = 6; 1231 let NumMicroOps = 3; 1232 let ReleaseAtCycles = [2,1]; 1233} 1234def: InstRW<[ICXWriteResGroup82], (instregex "(V?)CVTSI642SSrr", 1235 "VCVTSI642SSZrr", 1236 "VCVTUSI642SSZrr")>; 1237 1238def ICXWriteResGroup84 : SchedWriteRes<[ICXPort1,ICXPort6,ICXPort06,ICXPort0156]> { 1239 let Latency = 6; 1240 let NumMicroOps = 4; 1241 let ReleaseAtCycles = [1,1,1,1]; 1242} 1243def: InstRW<[ICXWriteResGroup84], (instregex "SLDT(16|32|64)r")>; 1244 1245def ICXWriteResGroup86 : SchedWriteRes<[ICXPort49,ICXPort23,ICXPort78,ICXPort06]> { 1246 let Latency = 6; 1247 let NumMicroOps = 4; 1248 let ReleaseAtCycles = [1,1,1,1]; 1249} 1250def: InstRW<[ICXWriteResGroup86], (instregex "SAR(8|16|32|64)m(1|i)", 1251 "SHL(8|16|32|64)m(1|i)", 1252 "SHR(8|16|32|64)m(1|i)")>; 1253 1254def ICXWriteResGroup87 : SchedWriteRes<[ICXPort49,ICXPort23,ICXPort78,ICXPort0156]> { 1255 let Latency = 6; 1256 let NumMicroOps = 4; 1257 let ReleaseAtCycles = [1,1,1,1]; 1258} 1259def: InstRW<[ICXWriteResGroup87], (instregex "POP(16|32|64)rmm", 1260 "PUSH(16|32|64)rmm")>; 1261 1262def ICXWriteResGroup88 : SchedWriteRes<[ICXPort6,ICXPort0156]> { 1263 let Latency = 6; 1264 let NumMicroOps = 6; 1265 let ReleaseAtCycles = [1,5]; 1266} 1267def: InstRW<[ICXWriteResGroup88], (instrs STD)>; 1268 1269def ICXWriteResGroup89 : SchedWriteRes<[ICXPort23]> { 1270 let Latency = 7; 1271 let NumMicroOps = 1; 1272 let ReleaseAtCycles = [1]; 1273} 1274def: InstRW<[ICXWriteResGroup89], (instregex "LD_F(32|64|80)m")>; 1275def: InstRW<[ICXWriteResGroup89], (instrs VBROADCASTF128rm, 1276 VBROADCASTI128rm, 1277 VBROADCASTSDYrm, 1278 VBROADCASTSSYrm, 1279 VMOVDDUPYrm, 1280 VMOVSHDUPYrm, 1281 VMOVSLDUPYrm, 1282 VPBROADCASTDYrm, 1283 VPBROADCASTQYrm)>; 1284 1285def ICXWriteResGroup90 : SchedWriteRes<[ICXPort01,ICXPort5]> { 1286 let Latency = 7; 1287 let NumMicroOps = 2; 1288 let ReleaseAtCycles = [1,1]; 1289} 1290def: InstRW<[ICXWriteResGroup90], (instrs VCVTDQ2PDYrr)>; 1291 1292def ICXWriteResGroup92 : SchedWriteRes<[ICXPort5,ICXPort23]> { 1293 let Latency = 7; 1294 let NumMicroOps = 2; 1295 let ReleaseAtCycles = [1,1]; 1296} 1297def: InstRW<[ICXWriteResGroup92], (instregex "VMOV(SD|SS)Zrm(b?)", 1298 "VPBROADCAST(B|W)(Z128)?rm", 1299 "(V?)MOV(H|L)(PD|PS)rm", 1300 "(V?)INSERTPS(Z?)rm", 1301 "(V?)PALIGNR(Z128)?rmi", 1302 "(V?)PERMIL(PD|PS)(Z128)?m(b?)i", 1303 "(V?)PERMIL(PD|PS)(Z128)?rm", 1304 "(V?)UNPCK(L|H)(PD|PS)(Z128)?rm")>; 1305 1306def ICXWriteResGroup93 : SchedWriteRes<[ICXPort5,ICXPort01]> { 1307 let Latency = 7; 1308 let NumMicroOps = 2; 1309 let ReleaseAtCycles = [1,1]; 1310} 1311def: InstRW<[ICXWriteResGroup93], (instregex "VCVTDQ2PDZ256rr", 1312 "VCVTPD2DQ(Y|Z256)rr", 1313 "VCVTPD2UDQZ256rr", 1314 "VCVTPS2PD(Y|Z256)rr", 1315 "VCVTPS2QQZ256rr", 1316 "VCVTPS2UQQZ256rr", 1317 "VCVTQQ2PSZ256rr", 1318 "VCVTTPD2DQ(Y|Z256)rr", 1319 "VCVTTPD2UDQZ256rr", 1320 "VCVTTPS2QQZ256rr", 1321 "VCVTTPS2UQQZ256rr", 1322 "VCVTUDQ2PDZ256rr", 1323 "VCVTUQQ2PSZ256rr")>; 1324 1325def ICXWriteResGroup93z : SchedWriteRes<[ICXPort5,ICXPort05]> { 1326 let Latency = 7; 1327 let NumMicroOps = 2; 1328 let ReleaseAtCycles = [1,1]; 1329} 1330def: InstRW<[ICXWriteResGroup93z], (instrs VCVTDQ2PDZrr, 1331 VCVTPD2DQZrr, 1332 VCVTPD2UDQZrr, 1333 VCVTPS2PDZrr, 1334 VCVTPS2QQZrr, 1335 VCVTPS2UQQZrr, 1336 VCVTQQ2PSZrr, 1337 VCVTTPD2DQZrr, 1338 VCVTTPD2UDQZrr, 1339 VCVTTPS2QQZrr, 1340 VCVTTPS2UQQZrr, 1341 VCVTUDQ2PDZrr, 1342 VCVTUQQ2PSZrr)>; 1343 1344def ICXWriteResGroup95 : SchedWriteRes<[ICXPort23,ICXPort015]> { 1345 let Latency = 7; 1346 let NumMicroOps = 2; 1347 let ReleaseAtCycles = [1,1]; 1348} 1349def: InstRW<[ICXWriteResGroup95], (instrs VPBLENDDrmi)>; 1350def: InstRW<[ICXWriteResGroup95, ReadAfterVecXLd], 1351 (instregex "VBLENDMPDZ128rm(b?)", 1352 "VBLENDMPSZ128rm(b?)", 1353 "VBROADCASTI32X2Z128rm(b?)", 1354 "VBROADCASTSSZ128rm(b?)", 1355 "VINSERT(F|I)128rm", 1356 "VMOVAPDZ128rm(b?)", 1357 "VMOVAPSZ128rm(b?)", 1358 "VMOVDDUPZ128rm(b?)", 1359 "VMOVDQA32Z128rm(b?)", 1360 "VMOVDQA64Z128rm(b?)", 1361 "VMOVDQU16Z128rm(b?)", 1362 "VMOVDQU32Z128rm(b?)", 1363 "VMOVDQU64Z128rm(b?)", 1364 "VMOVDQU8Z128rm(b?)", 1365 "VMOVSHDUPZ128rm(b?)", 1366 "VMOVSLDUPZ128rm(b?)", 1367 "VMOVUPDZ128rm(b?)", 1368 "VMOVUPSZ128rm(b?)", 1369 "VPADD(B|D|Q|W)Z128rm(b?)", 1370 "(V?)PADD(B|D|Q|W)rm", 1371 "VPBLENDM(B|D|Q|W)Z128rm(b?)", 1372 "VPBROADCASTDZ128rm(b?)", 1373 "VPBROADCASTQZ128rm(b?)", 1374 "VPSUB(B|D|Q|W)Z128rm(b?)", 1375 "(V?)PSUB(B|D|Q|W)rm", 1376 "VPTERNLOGDZ128rm(b?)i", 1377 "VPTERNLOGQZ128rm(b?)i")>; 1378 1379def ICXWriteResGroup96 : SchedWriteRes<[ICXPort5,ICXPort23]> { 1380 let Latency = 7; 1381 let NumMicroOps = 3; 1382 let ReleaseAtCycles = [2,1]; 1383} 1384def: InstRW<[ICXWriteResGroup96], (instrs MMX_PACKSSDWrm, 1385 MMX_PACKSSWBrm, 1386 MMX_PACKUSWBrm)>; 1387 1388def ICXWriteResGroup97 : SchedWriteRes<[ICXPort5,ICXPort015]> { 1389 let Latency = 7; 1390 let NumMicroOps = 3; 1391 let ReleaseAtCycles = [2,1]; 1392} 1393def: InstRW<[ICXWriteResGroup97], (instregex "VPERMI2WZ128rr", 1394 "VPERMI2WZ256rr", 1395 "VPERMI2WZrr", 1396 "VPERMT2WZ128rr", 1397 "VPERMT2WZ256rr", 1398 "VPERMT2WZrr")>; 1399 1400def ICXWriteResGroup99 : SchedWriteRes<[ICXPort23,ICXPort0156]> { 1401 let Latency = 7; 1402 let NumMicroOps = 3; 1403 let ReleaseAtCycles = [1,2]; 1404} 1405def: InstRW<[ICXWriteResGroup99], (instrs LEAVE, LEAVE64, 1406 SCASB, SCASL, SCASQ, SCASW)>; 1407 1408def ICXWriteResGroup100 : SchedWriteRes<[ICXPort0,ICXPort5,ICXPort01]> { 1409 let Latency = 7; 1410 let NumMicroOps = 3; 1411 let ReleaseAtCycles = [1,1,1]; 1412} 1413def: InstRW<[ICXWriteResGroup100], (instregex "(V?)CVT(T?)SS2SI64(Z?)rr", 1414 "VCVT(T?)SS2USI64Zrr")>; 1415 1416def ICXWriteResGroup101 : SchedWriteRes<[ICXPort0,ICXPort23,ICXPort05]> { 1417 let Latency = 7; 1418 let NumMicroOps = 3; 1419 let ReleaseAtCycles = [1,1,1]; 1420} 1421def: InstRW<[ICXWriteResGroup101], (instrs FLDCW16m)>; 1422 1423def ICXWriteResGroup103 : SchedWriteRes<[ICXPort5,ICXPort23,ICXPort0156]> { 1424 let Latency = 7; 1425 let NumMicroOps = 3; 1426 let ReleaseAtCycles = [1,1,1]; 1427} 1428def: InstRW<[ICXWriteResGroup103], (instregex "KMOV(B|D|Q|W)km")>; 1429 1430def ICXWriteResGroup104 : SchedWriteRes<[ICXPort6,ICXPort23,ICXPort0156]> { 1431 let Latency = 7; 1432 let NumMicroOps = 3; 1433 let ReleaseAtCycles = [1,1,1]; 1434} 1435def: InstRW<[ICXWriteResGroup104], (instrs LRET64, RET64)>; 1436 1437def ICXWriteResGroup106 : SchedWriteRes<[ICXPort49,ICXPort5,ICXPort78]> { 1438 let Latency = 7; 1439 let NumMicroOps = 4; 1440 let ReleaseAtCycles = [1,2,1]; 1441} 1442def: InstRW<[ICXWriteResGroup106], (instregex "VCOMPRESSPD(Z|Z128|Z256)mr(b?)", 1443 "VCOMPRESSPS(Z|Z128|Z256)mr(b?)", 1444 "VPCOMPRESSD(Z|Z128|Z256)mr(b?)", 1445 "VPCOMPRESSQ(Z|Z128|Z256)mr(b?)")>; 1446 1447def ICXWriteResGroup107 : SchedWriteRes<[ICXPort49,ICXPort23,ICXPort78,ICXPort06]> { 1448 let Latency = 7; 1449 let NumMicroOps = 5; 1450 let ReleaseAtCycles = [1,1,1,2]; 1451} 1452def: InstRW<[ICXWriteResGroup107], (instregex "ROL(8|16|32|64)m(1|i)", 1453 "ROR(8|16|32|64)m(1|i)")>; 1454 1455def ICXWriteResGroup107_1 : SchedWriteRes<[ICXPort06]> { 1456 let Latency = 2; 1457 let NumMicroOps = 2; 1458 let ReleaseAtCycles = [2]; 1459} 1460def: InstRW<[ICXWriteResGroup107_1], (instrs ROL8r1, ROL16r1, ROL32r1, ROL64r1, 1461 ROR8r1, ROR16r1, ROR32r1, ROR64r1)>; 1462 1463def ICXWriteResGroup108 : SchedWriteRes<[ICXPort49,ICXPort23,ICXPort78,ICXPort0156]> { 1464 let Latency = 7; 1465 let NumMicroOps = 5; 1466 let ReleaseAtCycles = [1,1,1,2]; 1467} 1468def: InstRW<[ICXWriteResGroup108], (instregex "XADD(8|16|32|64)rm")>; 1469 1470def ICXWriteResGroup109 : SchedWriteRes<[ICXPort49,ICXPort6,ICXPort23,ICXPort78,ICXPort0156]> { 1471 let Latency = 7; 1472 let NumMicroOps = 5; 1473 let ReleaseAtCycles = [1,1,1,1,1]; 1474} 1475def: InstRW<[ICXWriteResGroup109], (instregex "CALL(16|32|64)m")>; 1476def: InstRW<[ICXWriteResGroup109], (instrs FARCALL64m)>; 1477 1478def ICXWriteResGroup110 : SchedWriteRes<[ICXPort0,ICXPort49,ICXPort78,ICXPort0156]> { 1479 let Latency = 7; 1480 let NumMicroOps = 7; 1481 let ReleaseAtCycles = [1,2,2,2]; 1482} 1483def: InstRW<[ICXWriteResGroup110], (instrs VPSCATTERDQZ128mr, 1484 VPSCATTERQQZ128mr, 1485 VSCATTERDPDZ128mr, 1486 VSCATTERQPDZ128mr)>; 1487 1488def ICXWriteResGroup111 : SchedWriteRes<[ICXPort6,ICXPort06,ICXPort15,ICXPort0156]> { 1489 let Latency = 7; 1490 let NumMicroOps = 7; 1491 let ReleaseAtCycles = [1,3,1,2]; 1492} 1493def: InstRW<[ICXWriteResGroup111], (instrs LOOP)>; 1494 1495def ICXWriteResGroup112 : SchedWriteRes<[ICXPort0,ICXPort49,ICXPort78,ICXPort0156]> { 1496 let Latency = 7; 1497 let NumMicroOps = 11; 1498 let ReleaseAtCycles = [1,4,4,2]; 1499} 1500def: InstRW<[ICXWriteResGroup112], (instrs VPSCATTERDQZ256mr, 1501 VPSCATTERQQZ256mr, 1502 VSCATTERDPDZ256mr, 1503 VSCATTERQPDZ256mr)>; 1504 1505def ICXWriteResGroup113 : SchedWriteRes<[ICXPort0,ICXPort49,ICXPort78,ICXPort0156]> { 1506 let Latency = 7; 1507 let NumMicroOps = 19; 1508 let ReleaseAtCycles = [1,8,8,2]; 1509} 1510def: InstRW<[ICXWriteResGroup113], (instrs VPSCATTERDQZmr, 1511 VPSCATTERQDZmr, 1512 VPSCATTERQQZmr, 1513 VSCATTERDPDZmr, 1514 VSCATTERQPSZmr, 1515 VSCATTERQPDZmr)>; 1516 1517def ICXWriteResGroup114 : SchedWriteRes<[ICXPort0,ICXPort49,ICXPort5,ICXPort78,ICXPort0156]> { 1518 let Latency = 7; 1519 let NumMicroOps = 36; 1520 let ReleaseAtCycles = [1,16,1,16,2]; 1521} 1522def: InstRW<[ICXWriteResGroup114], (instrs VSCATTERDPSZmr)>; 1523 1524def ICXWriteResGroup118 : SchedWriteRes<[ICXPort1,ICXPort23]> { 1525 let Latency = 8; 1526 let NumMicroOps = 2; 1527 let ReleaseAtCycles = [1,1]; 1528} 1529def: InstRW<[ICXWriteResGroup118], (instregex "PDEP(32|64)rm", 1530 "PEXT(32|64)rm")>; 1531 1532def ICXWriteResGroup119 : SchedWriteRes<[ICXPort5,ICXPort23]> { 1533 let Latency = 8; 1534 let NumMicroOps = 2; 1535 let ReleaseAtCycles = [1,1]; 1536} 1537def: InstRW<[ICXWriteResGroup119], (instregex "FCOM(P?)(32|64)m", 1538 "VPBROADCASTB(Z|Z256)rm(b?)", 1539 "VPBROADCASTW(Z|Z256)rm(b?)", 1540 "(V?)PALIGNR(Y|Z256)rmi", 1541 "(V?)PERMIL(PD|PS)(Y|Z256)m(b?)i", 1542 "(V?)PERMIL(PD|PS)(Y|Z256)rm", 1543 "(V?)UNPCK(L|H)(PD|PS)(Y|Z256)rm")>; 1544def: InstRW<[ICXWriteResGroup119], (instrs VPBROADCASTBYrm, 1545 VPBROADCASTWYrm, 1546 VPMOVSXBDYrm, 1547 VPMOVSXBQYrm, 1548 VPMOVSXWQYrm)>; 1549 1550def ICXWriteResGroup121 : SchedWriteRes<[ICXPort23,ICXPort015]> { 1551 let Latency = 8; 1552 let NumMicroOps = 2; 1553 let ReleaseAtCycles = [1,1]; 1554} 1555def: InstRW<[ICXWriteResGroup121], (instrs VPBLENDDYrmi)>; 1556def: InstRW<[ICXWriteResGroup121, ReadAfterVecYLd], 1557 (instregex "VBLENDMPD(Z|Z256)rm(b?)", 1558 "VBLENDMPS(Z|Z256)rm(b?)", 1559 "VBROADCASTF32X2Z256rm(b?)", 1560 "VBROADCASTF32X2Zrm(b?)", 1561 "VBROADCASTF32X4Z256rm(b?)", 1562 "VBROADCASTF32X4Zrm(b?)", 1563 "VBROADCASTF32X8Zrm(b?)", 1564 "VBROADCASTF64X2Z256rm(b?)", 1565 "VBROADCASTF64X2Zrm(b?)", 1566 "VBROADCASTF64X4Zrm(b?)", 1567 "VBROADCASTI32X2Z256rm(b?)", 1568 "VBROADCASTI32X2Zrm(b?)", 1569 "VBROADCASTI32X4Z256rm(b?)", 1570 "VBROADCASTI32X4Zrm(b?)", 1571 "VBROADCASTI32X8Zrm(b?)", 1572 "VBROADCASTI64X2Z256rm(b?)", 1573 "VBROADCASTI64X2Zrm(b?)", 1574 "VBROADCASTI64X4Zrm(b?)", 1575 "VBROADCASTSD(Z|Z256)rm(b?)", 1576 "VBROADCASTSS(Z|Z256)rm(b?)", 1577 "VINSERTF32X4(Z|Z256)rm(b?)", 1578 "VINSERTF32X8Zrm(b?)", 1579 "VINSERTF64X2(Z|Z256)rm(b?)", 1580 "VINSERTF64X4Zrm(b?)", 1581 "VINSERTI32X4(Z|Z256)rm(b?)", 1582 "VINSERTI32X8Zrm(b?)", 1583 "VINSERTI64X2(Z|Z256)rm(b?)", 1584 "VINSERTI64X4Zrm(b?)", 1585 "VMOVAPD(Z|Z256)rm(b?)", 1586 "VMOVAPS(Z|Z256)rm(b?)", 1587 "VMOVDDUP(Z|Z256)rm(b?)", 1588 "VMOVDQA32(Z|Z256)rm(b?)", 1589 "VMOVDQA64(Z|Z256)rm(b?)", 1590 "VMOVDQU16(Z|Z256)rm(b?)", 1591 "VMOVDQU32(Z|Z256)rm(b?)", 1592 "VMOVDQU64(Z|Z256)rm(b?)", 1593 "VMOVDQU8(Z|Z256)rm(b?)", 1594 "VMOVSHDUP(Z|Z256)rm(b?)", 1595 "VMOVSLDUP(Z|Z256)rm(b?)", 1596 "VMOVUPD(Z|Z256)rm(b?)", 1597 "VMOVUPS(Z|Z256)rm(b?)", 1598 "VPADD(B|D|Q|W)Yrm", 1599 "VPADD(B|D|Q|W)(Z|Z256)rm(b?)", 1600 "VPBLENDM(B|D|Q|W)(Z|Z256)rm(b?)", 1601 "VPBROADCASTD(Z|Z256)rm(b?)", 1602 "VPBROADCASTQ(Z|Z256)rm(b?)", 1603 "VPSUB(B|D|Q|W)Yrm", 1604 "VPSUB(B|D|Q|W)(Z|Z256)rm(b?)", 1605 "VPTERNLOGD(Z|Z256)rm(b?)i", 1606 "VPTERNLOGQ(Z|Z256)rm(b?)i")>; 1607 1608def ICXWriteResGroup123 : SchedWriteRes<[ICXPort0,ICXPort5,ICXPort23]> { 1609 let Latency = 8; 1610 let NumMicroOps = 4; 1611 let ReleaseAtCycles = [1,2,1]; 1612} 1613def: InstRW<[ICXWriteResGroup123], (instregex "MMX_PH(ADD|SUB)SWrm")>; 1614 1615def ICXWriteResGroup127 : SchedWriteRes<[ICXPort23,ICXPort78,ICXPort06,ICXPort0156]> { 1616 let Latency = 8; 1617 let NumMicroOps = 5; 1618 let ReleaseAtCycles = [1,1,1,2]; 1619} 1620def: InstRW<[ICXWriteResGroup127], (instregex "RCL(8|16|32|64)m(1|i)", 1621 "RCR(8|16|32|64)m(1|i)")>; 1622 1623def ICXWriteResGroup128 : SchedWriteRes<[ICXPort49,ICXPort23,ICXPort78,ICXPort06]> { 1624 let Latency = 8; 1625 let NumMicroOps = 6; 1626 let ReleaseAtCycles = [1,1,1,3]; 1627} 1628def: InstRW<[ICXWriteResGroup128], (instregex "ROL(8|16|32|64)mCL", 1629 "ROR(8|16|32|64)mCL", 1630 "SAR(8|16|32|64)mCL", 1631 "SHL(8|16|32|64)mCL", 1632 "SHR(8|16|32|64)mCL")>; 1633 1634def ICXWriteResGroup130 : SchedWriteRes<[ICXPort49,ICXPort23,ICXPort78,ICXPort06,ICXPort0156]> { 1635 let Latency = 8; 1636 let NumMicroOps = 6; 1637 let ReleaseAtCycles = [1,1,1,2,1]; 1638} 1639def: SchedAlias<WriteADCRMW, ICXWriteResGroup130>; 1640 1641def ICXWriteResGroup131 : SchedWriteRes<[ICXPort0,ICXPort49,ICXPort5,ICXPort78,ICXPort0156]> { 1642 let Latency = 8; 1643 let NumMicroOps = 8; 1644 let ReleaseAtCycles = [1,2,1,2,2]; 1645} 1646def: InstRW<[ICXWriteResGroup131], (instrs VPSCATTERQDZ128mr, 1647 VPSCATTERQDZ256mr, 1648 VSCATTERQPSZ128mr, 1649 VSCATTERQPSZ256mr)>; 1650 1651def ICXWriteResGroup132 : SchedWriteRes<[ICXPort0,ICXPort49,ICXPort5,ICXPort78,ICXPort0156]> { 1652 let Latency = 8; 1653 let NumMicroOps = 12; 1654 let ReleaseAtCycles = [1,4,1,4,2]; 1655} 1656def: InstRW<[ICXWriteResGroup132], (instrs VPSCATTERDDZ128mr, 1657 VSCATTERDPSZ128mr)>; 1658 1659def ICXWriteResGroup133 : SchedWriteRes<[ICXPort0,ICXPort49,ICXPort5,ICXPort78,ICXPort0156]> { 1660 let Latency = 8; 1661 let NumMicroOps = 20; 1662 let ReleaseAtCycles = [1,8,1,8,2]; 1663} 1664def: InstRW<[ICXWriteResGroup133], (instrs VPSCATTERDDZ256mr, 1665 VSCATTERDPSZ256mr)>; 1666 1667def ICXWriteResGroup134 : SchedWriteRes<[ICXPort0,ICXPort49,ICXPort5,ICXPort78,ICXPort0156]> { 1668 let Latency = 8; 1669 let NumMicroOps = 36; 1670 let ReleaseAtCycles = [1,16,1,16,2]; 1671} 1672def: InstRW<[ICXWriteResGroup134], (instrs VPSCATTERDDZmr)>; 1673 1674def ICXWriteResGroup135 : SchedWriteRes<[ICXPort0,ICXPort23]> { 1675 let Latency = 9; 1676 let NumMicroOps = 2; 1677 let ReleaseAtCycles = [1,1]; 1678} 1679def: InstRW<[ICXWriteResGroup135], (instrs MMX_CVTPI2PSrm)>; 1680 1681def ICXWriteResGroup136 : SchedWriteRes<[ICXPort5,ICXPort23]> { 1682 let Latency = 9; 1683 let NumMicroOps = 2; 1684 let ReleaseAtCycles = [1,1]; 1685} 1686def: InstRW<[ICXWriteResGroup136], (instrs VPMOVSXBWYrm, 1687 VPMOVSXDQYrm, 1688 VPMOVSXWDYrm, 1689 VPMOVZXWDYrm)>; 1690def: InstRW<[ICXWriteResGroup136], (instregex "VALIGN(D|Q)Z128rm(b?)i", 1691 "VFPCLASSSDZm(b?)i", 1692 "VFPCLASSSSZm(b?)i", 1693 "(V?)PCMPGTQrm", 1694 "VPERMI2DZ128rm(b?)", 1695 "VPERMI2PDZ128rm(b?)", 1696 "VPERMI2PSZ128rm(b?)", 1697 "VPERMI2QZ128rm(b?)", 1698 "VPERMT2DZ128rm(b?)", 1699 "VPERMT2PDZ128rm(b?)", 1700 "VPERMT2PSZ128rm(b?)", 1701 "VPERMT2QZ128rm(b?)", 1702 "VPMAXSQZ128rm(b?)", 1703 "VPMAXUQZ128rm(b?)", 1704 "VPMINSQZ128rm(b?)", 1705 "VPMINUQZ128rm(b?)")>; 1706 1707def ICXWriteResGroup136_2 : SchedWriteRes<[ICXPort5,ICXPort23]> { 1708 let Latency = 10; 1709 let NumMicroOps = 2; 1710 let ReleaseAtCycles = [1,1]; 1711} 1712def: InstRW<[ICXWriteResGroup136_2], (instregex "VCMP(PD|PS)Z128rm(b?)i", 1713 "VCMP(SD|SS)Zrm", 1714 "VFPCLASSPDZ128m(b?)i", 1715 "VFPCLASSPSZ128m(b?)i", 1716 "VPCMPBZ128rm(b?)i", 1717 "VPCMPDZ128rm(b?)i", 1718 "VPCMPEQ(B|D|Q|W)Z128rm(b?)", 1719 "VPCMPGT(B|D|Q|W)Z128rm(b?)", 1720 "VPCMPQZ128rm(b?)i", 1721 "VPCMPU(B|D|Q|W)Z128rm(b?)i", 1722 "VPCMPWZ128rm(b?)i", 1723 "(V?)PACK(U|S)S(DW|WB)(Z128)?rm", 1724 "VPTESTMBZ128rm(b?)", 1725 "VPTESTMDZ128rm(b?)", 1726 "VPTESTMQZ128rm(b?)", 1727 "VPTESTMWZ128rm(b?)", 1728 "VPTESTNMBZ128rm(b?)", 1729 "VPTESTNMDZ128rm(b?)", 1730 "VPTESTNMQZ128rm(b?)", 1731 "VPTESTNMWZ128rm(b?)")>; 1732 1733def ICXWriteResGroup137 : SchedWriteRes<[ICXPort23,ICXPort01]> { 1734 let Latency = 9; 1735 let NumMicroOps = 2; 1736 let ReleaseAtCycles = [1,1]; 1737} 1738def: InstRW<[ICXWriteResGroup137], (instregex "MMX_CVT(T?)PS2PIrm", 1739 "(V?)CVTPS2PDrm")>; 1740 1741def ICXWriteResGroup143 : SchedWriteRes<[ICXPort15,ICXPort01,ICXPort23]> { 1742 let Latency = 9; 1743 let NumMicroOps = 4; 1744 let ReleaseAtCycles = [2,1,1]; 1745} 1746def: InstRW<[ICXWriteResGroup143], (instrs PHADDSWrm, VPHADDSWrm, 1747 PHSUBSWrm, VPHSUBSWrm)>; 1748 1749def ICXWriteResGroup146 : SchedWriteRes<[ICXPort1,ICXPort6,ICXPort23,ICXPort0156]> { 1750 let Latency = 9; 1751 let NumMicroOps = 5; 1752 let ReleaseAtCycles = [1,2,1,1]; 1753} 1754def: InstRW<[ICXWriteResGroup146], (instregex "LAR(16|32|64)rm", 1755 "LSL(16|32|64)rm")>; 1756 1757def ICXWriteResGroup148 : SchedWriteRes<[ICXPort5,ICXPort23]> { 1758 let Latency = 10; 1759 let NumMicroOps = 2; 1760 let ReleaseAtCycles = [1,1]; 1761} 1762def: InstRW<[ICXWriteResGroup148], (instrs VPCMPGTQYrm)>; 1763def: InstRW<[ICXWriteResGroup148], (instregex "(ADD|SUB|SUBR)_F(32|64)m", 1764 "ILD_F(16|32|64)m", 1765 "VALIGND(Z|Z256)rm(b?)i", 1766 "VALIGNQ(Z|Z256)rm(b?)i", 1767 "VPMAXSQ(Z|Z256)rm(b?)", 1768 "VPMAXUQ(Z|Z256)rm(b?)", 1769 "VPMINSQ(Z|Z256)rm(b?)", 1770 "VPMINUQ(Z|Z256)rm(b?)")>; 1771 1772def ICXWriteResGroup148_2 : SchedWriteRes<[ICXPort5,ICXPort23]> { 1773 let Latency = 11; 1774 let NumMicroOps = 2; 1775 let ReleaseAtCycles = [1,1]; 1776} 1777def: InstRW<[ICXWriteResGroup148_2], (instregex "VCMPPD(Z|Z256)rm(b?)i", 1778 "VCMPPS(Z|Z256)rm(b?)i", 1779 "VFPCLASSPD(Z|Z256)m(b?)i", 1780 "VFPCLASSPS(Z|Z256)m(b?)i", 1781 "VPCMPB(Z|Z256)rm(b?)i", 1782 "VPCMPD(Z|Z256)rm(b?)i", 1783 "VPCMPEQB(Z|Z256)rm(b?)", 1784 "VPCMPEQD(Z|Z256)rm(b?)", 1785 "VPCMPEQQ(Z|Z256)rm(b?)", 1786 "VPCMPEQW(Z|Z256)rm(b?)", 1787 "VPCMPGTB(Z|Z256)rm(b?)", 1788 "VPCMPGTD(Z|Z256)rm(b?)", 1789 "VPCMPGTQ(Z|Z256)rm(b?)", 1790 "VPCMPGTW(Z|Z256)rm(b?)", 1791 "VPCMPQ(Z|Z256)rm(b?)i", 1792 "VPCMPU(B|D|Q|W)Z256rm(b?)i", 1793 "VPCMPU(B|D|Q|W)Zrm(b?)i", 1794 "VPCMPW(Z|Z256)rm(b?)i", 1795 "(V?)PACK(U|S)S(DW|WB)(Y|Z|Z256)rm", 1796 "VPTESTM(B|D|Q|W)Z256rm(b?)", 1797 "VPTESTM(B|D|Q|W)Zrm(b?)", 1798 "VPTESTNM(B|D|Q|W)Z256rm(b?)", 1799 "VPTESTNM(B|D|Q|W)Zrm(b?)")>; 1800 1801def ICXWriteResGroup149 : SchedWriteRes<[ICXPort23,ICXPort01]> { 1802 let Latency = 10; 1803 let NumMicroOps = 2; 1804 let ReleaseAtCycles = [1,1]; 1805} 1806def: InstRW<[ICXWriteResGroup149], (instregex "VCVTDQ2PDZ128rm(b?)", 1807 "VCVTDQ2PSZ128rm(b?)", 1808 "(V?)CVTDQ2PSrm", 1809 "VCVTPD2QQZ128rm(b?)", 1810 "VCVTPD2UQQZ128rm(b?)", 1811 "VCVTPH2PSZ128rm(b?)", 1812 "VCVTPS2DQZ128rm(b?)", 1813 "(V?)CVTPS2DQrm", 1814 "VCVTPS2PDZ128rm(b?)", 1815 "VCVTPS2QQZ128rm(b?)", 1816 "VCVTPS2UDQZ128rm(b?)", 1817 "VCVTPS2UQQZ128rm(b?)", 1818 "VCVTQQ2PDZ128rm(b?)", 1819 "VCVTQQ2PSZ128rm(b?)", 1820 "VCVTSS2SDZrm", 1821 "(V?)CVTSS2SDrm", 1822 "VCVTTPD2QQZ128rm(b?)", 1823 "VCVTTPD2UQQZ128rm(b?)", 1824 "VCVTTPS2DQZ128rm(b?)", 1825 "(V?)CVTTPS2DQrm", 1826 "VCVTTPS2QQZ128rm(b?)", 1827 "VCVTTPS2UDQZ128rm(b?)", 1828 "VCVTTPS2UQQZ128rm(b?)", 1829 "VCVTUDQ2PDZ128rm(b?)", 1830 "VCVTUDQ2PSZ128rm(b?)", 1831 "VCVTUQQ2PDZ128rm(b?)", 1832 "VCVTUQQ2PSZ128rm(b?)")>; 1833 1834def ICXWriteResGroup151 : SchedWriteRes<[ICXPort5,ICXPort23]> { 1835 let Latency = 10; 1836 let NumMicroOps = 3; 1837 let ReleaseAtCycles = [2,1]; 1838} 1839def: InstRW<[ICXWriteResGroup151], (instregex "VEXPANDPDZ128rm(b?)", 1840 "VEXPANDPSZ128rm(b?)", 1841 "VPEXPANDDZ128rm(b?)", 1842 "VPEXPANDQZ128rm(b?)")>; 1843 1844def ICXWriteResGroup154 : SchedWriteRes<[ICXPort15,ICXPort01,ICXPort23]> { 1845 let Latency = 10; 1846 let NumMicroOps = 4; 1847 let ReleaseAtCycles = [2,1,1]; 1848} 1849def: InstRW<[ICXWriteResGroup154], (instrs VPHADDSWYrm, 1850 VPHSUBSWYrm)>; 1851 1852def ICXWriteResGroup157 : SchedWriteRes<[ICXPort49,ICXPort6,ICXPort23,ICXPort78,ICXPort06,ICXPort0156]> { 1853 let Latency = 10; 1854 let NumMicroOps = 8; 1855 let ReleaseAtCycles = [1,1,1,1,1,3]; 1856} 1857def: InstRW<[ICXWriteResGroup157], (instregex "XCHG(8|16|32|64)rm")>; 1858 1859def ICXWriteResGroup160 : SchedWriteRes<[ICXPort0,ICXPort23]> { 1860 let Latency = 11; 1861 let NumMicroOps = 2; 1862 let ReleaseAtCycles = [1,1]; 1863} 1864def: InstRW<[ICXWriteResGroup160], (instregex "MUL_F(32|64)m")>; 1865 1866def ICXWriteResGroup161 : SchedWriteRes<[ICXPort23,ICXPort01]> { 1867 let Latency = 11; 1868 let NumMicroOps = 2; 1869 let ReleaseAtCycles = [1,1]; 1870} 1871def: InstRW<[ICXWriteResGroup161], (instrs VCVTDQ2PSYrm, 1872 VCVTPS2PDYrm)>; 1873def: InstRW<[ICXWriteResGroup161], (instregex "VCVTDQ2(PD|PS)(Z|Z256)rm(b?)", 1874 "VCVTPH2PS(Z|Z256)rm(b?)", 1875 "VCVTPS2PD(Z|Z256)rm(b?)", 1876 "VCVTQQ2PD(Z|Z256)rm(b?)", 1877 "VCVTQQ2PSZ256rm(b?)", 1878 "VCVT(T?)PD2QQ(Z|Z256)rm(b?)", 1879 "VCVT(T?)PD2UQQ(Z|Z256)rm(b?)", 1880 "VCVT(T?)PS2DQYrm", 1881 "VCVT(T?)PS2DQ(Z|Z256)rm(b?)", 1882 "VCVT(T?)PS2QQZ256rm(b?)", 1883 "VCVT(T?)PS2UDQ(Z|Z256)rm(b?)", 1884 "VCVT(T?)PS2UQQZ256rm(b?)", 1885 "VCVTUDQ2(PD|PS)(Z|Z256)rm(b?)", 1886 "VCVTUQQ2PD(Z|Z256)rm(b?)", 1887 "VCVTUQQ2PSZ256rm(b?)")>; 1888 1889def ICXWriteResGroup162 : SchedWriteRes<[ICXPort5,ICXPort23]> { 1890 let Latency = 11; 1891 let NumMicroOps = 3; 1892 let ReleaseAtCycles = [2,1]; 1893} 1894def: InstRW<[ICXWriteResGroup162], (instregex "FICOM(P?)(16|32)m", 1895 "VEXPANDPD(Z|Z256)rm(b?)", 1896 "VEXPANDPS(Z|Z256)rm(b?)", 1897 "VPEXPANDD(Z|Z256)rm(b?)", 1898 "VPEXPANDQ(Z|Z256)rm(b?)")>; 1899 1900def ICXWriteResGroup164 : SchedWriteRes<[ICXPort0,ICXPort5,ICXPort23]> { 1901 let Latency = 11; 1902 let NumMicroOps = 3; 1903 let ReleaseAtCycles = [1,1,1]; 1904} 1905def: InstRW<[ICXWriteResGroup164], (instregex "(V?)CVTDQ2PDrm")>; 1906 1907def ICXWriteResGroup166 : SchedWriteRes<[ICXPort5,ICXPort23,ICXPort01]> { 1908 let Latency = 11; 1909 let NumMicroOps = 3; 1910 let ReleaseAtCycles = [1,1,1]; 1911} 1912def: InstRW<[ICXWriteResGroup166], (instrs CVTPD2DQrm, 1913 CVTTPD2DQrm, 1914 MMX_CVTPD2PIrm, 1915 MMX_CVTTPD2PIrm)>; 1916 1917def ICXWriteResGroup167 : SchedWriteRes<[ICXPort5,ICXPort23,ICXPort015]> { 1918 let Latency = 11; 1919 let NumMicroOps = 4; 1920 let ReleaseAtCycles = [2,1,1]; 1921} 1922def: InstRW<[ICXWriteResGroup167], (instregex "VPCONFLICTQZ128rm(b?)")>; 1923 1924def ICXWriteResGroup169 : SchedWriteRes<[ICXPort1,ICXPort06,ICXPort0156]> { 1925 let Latency = 11; 1926 let NumMicroOps = 7; 1927 let ReleaseAtCycles = [2,3,2]; 1928} 1929def: InstRW<[ICXWriteResGroup169], (instregex "RCL(16|32|64)rCL", 1930 "RCR(16|32|64)rCL")>; 1931 1932def ICXWriteResGroup170 : SchedWriteRes<[ICXPort1,ICXPort06,ICXPort15,ICXPort0156]> { 1933 let Latency = 11; 1934 let NumMicroOps = 9; 1935 let ReleaseAtCycles = [1,5,1,2]; 1936} 1937def: InstRW<[ICXWriteResGroup170], (instrs RCL8rCL)>; 1938 1939def ICXWriteResGroup171 : SchedWriteRes<[ICXPort06,ICXPort0156]> { 1940 let Latency = 11; 1941 let NumMicroOps = 11; 1942 let ReleaseAtCycles = [2,9]; 1943} 1944def: InstRW<[ICXWriteResGroup171], (instrs LOOPE, LOOPNE)>; 1945 1946def ICXWriteResGroup174 : SchedWriteRes<[ICXPort01]> { 1947 let Latency = 15; 1948 let NumMicroOps = 3; 1949 let ReleaseAtCycles = [3]; 1950} 1951def: InstRW<[ICXWriteResGroup174], (instregex "VPMULLQ(Z128|Z256)rr")>; 1952 1953def ICXWriteResGroup174z : SchedWriteRes<[ICXPort0]> { 1954 let Latency = 15; 1955 let NumMicroOps = 3; 1956 let ReleaseAtCycles = [3]; 1957} 1958def: InstRW<[ICXWriteResGroup174z], (instregex "VPMULLQZrr")>; 1959 1960def ICXWriteResGroup175 : SchedWriteRes<[ICXPort5,ICXPort23]> { 1961 let Latency = 12; 1962 let NumMicroOps = 3; 1963 let ReleaseAtCycles = [2,1]; 1964} 1965def: InstRW<[ICXWriteResGroup175], (instregex "VPERMWZ128rm(b?)")>; 1966 1967def ICXWriteResGroup176 : SchedWriteRes<[ICXPort0,ICXPort23,ICXPort01]> { 1968 let Latency = 12; 1969 let NumMicroOps = 3; 1970 let ReleaseAtCycles = [1,1,1]; 1971} 1972def: InstRW<[ICXWriteResGroup176], (instregex "VCVT(T?)SD2USIZrm(b?)", 1973 "VCVT(T?)SS2USI64Zrm(b?)")>; 1974 1975def ICXWriteResGroup177 : SchedWriteRes<[ICXPort5,ICXPort23,ICXPort01]> { 1976 let Latency = 12; 1977 let NumMicroOps = 3; 1978 let ReleaseAtCycles = [1,1,1]; 1979} 1980def: InstRW<[ICXWriteResGroup177], (instregex "VCVT(T?)PS2QQZrm(b?)", 1981 "VCVT(T?)PS2UQQZrm(b?)")>; 1982 1983def ICXWriteResGroup180 : SchedWriteRes<[ICXPort5,ICXPort23]> { 1984 let Latency = 13; 1985 let NumMicroOps = 3; 1986 let ReleaseAtCycles = [2,1]; 1987} 1988def: InstRW<[ICXWriteResGroup180], (instregex "(ADD|SUB|SUBR)_FI(16|32)m", 1989 "VPERMWZ256rm(b?)", 1990 "VPERMWZrm(b?)")>; 1991 1992def ICXWriteResGroup181 : SchedWriteRes<[ICXPort0,ICXPort5,ICXPort23]> { 1993 let Latency = 13; 1994 let NumMicroOps = 3; 1995 let ReleaseAtCycles = [1,1,1]; 1996} 1997def: InstRW<[ICXWriteResGroup181], (instrs VCVTDQ2PDYrm)>; 1998 1999def ICXWriteResGroup183 : SchedWriteRes<[ICXPort5,ICXPort23,ICXPort015]> { 2000 let Latency = 13; 2001 let NumMicroOps = 4; 2002 let ReleaseAtCycles = [2,1,1]; 2003} 2004def: InstRW<[ICXWriteResGroup183], (instregex "VPERMI2WZ128rm(b?)", 2005 "VPERMT2WZ128rm(b?)")>; 2006 2007def ICXWriteResGroup187 : SchedWriteRes<[ICXPort0,ICXPort5,ICXPort23]> { 2008 let Latency = 14; 2009 let NumMicroOps = 3; 2010 let ReleaseAtCycles = [1,1,1]; 2011} 2012def: InstRW<[ICXWriteResGroup187], (instregex "MUL_FI(16|32)m")>; 2013 2014def ICXWriteResGroup188 : SchedWriteRes<[ICXPort5,ICXPort23,ICXPort01]> { 2015 let Latency = 14; 2016 let NumMicroOps = 3; 2017 let ReleaseAtCycles = [1,1,1]; 2018} 2019def: InstRW<[ICXWriteResGroup188], (instregex "VCVTPD2DQZrm(b?)", 2020 "VCVTPD2UDQZrm(b?)", 2021 "VCVTQQ2PSZrm(b?)", 2022 "VCVTTPD2DQZrm(b?)", 2023 "VCVTTPD2UDQZrm(b?)", 2024 "VCVTUQQ2PSZrm(b?)")>; 2025 2026def ICXWriteResGroup189 : SchedWriteRes<[ICXPort5,ICXPort23,ICXPort015]> { 2027 let Latency = 14; 2028 let NumMicroOps = 4; 2029 let ReleaseAtCycles = [2,1,1]; 2030} 2031def: InstRW<[ICXWriteResGroup189], (instregex "VPERMI2WZ256rm(b?)", 2032 "VPERMI2WZrm(b?)", 2033 "VPERMT2WZ256rm(b?)", 2034 "VPERMT2WZrm(b?)")>; 2035 2036def ICXWriteResGroup190 : SchedWriteRes<[ICXPort1,ICXPort06,ICXPort15,ICXPort0156]> { 2037 let Latency = 14; 2038 let NumMicroOps = 10; 2039 let ReleaseAtCycles = [2,4,1,3]; 2040} 2041def: InstRW<[ICXWriteResGroup190], (instrs RCR8rCL)>; 2042 2043def ICXWriteResGroup191 : SchedWriteRes<[ICXPort0]> { 2044 let Latency = 15; 2045 let NumMicroOps = 1; 2046 let ReleaseAtCycles = [1]; 2047} 2048def: InstRW<[ICXWriteResGroup191], (instregex "DIVR_(FPrST0|FST0r|FrST0)")>; 2049 2050def ICXWriteResGroup194 : SchedWriteRes<[ICXPort1,ICXPort5,ICXPort01,ICXPort23,ICXPort015]> { 2051 let Latency = 15; 2052 let NumMicroOps = 8; 2053 let ReleaseAtCycles = [1,2,2,1,2]; 2054} 2055def: InstRW<[ICXWriteResGroup194], (instregex "VPCONFLICTDZ128rm(b?)")>; 2056 2057def ICXWriteResGroup195 : SchedWriteRes<[ICXPort1,ICXPort23,ICXPort78,ICXPort06,ICXPort15,ICXPort0156]> { 2058 let Latency = 15; 2059 let NumMicroOps = 10; 2060 let ReleaseAtCycles = [1,1,1,5,1,1]; 2061} 2062def: InstRW<[ICXWriteResGroup195], (instregex "RCL(8|16|32|64)mCL")>; 2063 2064def ICXWriteResGroup199 : SchedWriteRes<[ICXPort49,ICXPort23,ICXPort78,ICXPort06,ICXPort15,ICXPort0156]> { 2065 let Latency = 16; 2066 let NumMicroOps = 14; 2067 let ReleaseAtCycles = [1,1,1,4,2,5]; 2068} 2069def: InstRW<[ICXWriteResGroup199], (instrs CMPXCHG8B)>; 2070 2071def ICXWriteResGroup200 : SchedWriteRes<[ICXPort1, ICXPort05, ICXPort6]> { 2072 let Latency = 12; 2073 let NumMicroOps = 34; 2074 let ReleaseAtCycles = [1, 4, 5]; 2075} 2076def: InstRW<[ICXWriteResGroup200], (instrs VZEROALL)>; 2077 2078def ICXWriteResGroup202 : SchedWriteRes<[ICXPort0,ICXPort1,ICXPort5,ICXPort6,ICXPort05,ICXPort0156]> { 2079 let Latency = 17; 2080 let NumMicroOps = 15; 2081 let ReleaseAtCycles = [2,1,2,4,2,4]; 2082} 2083def: InstRW<[ICXWriteResGroup202], (instrs XCH_F)>; 2084 2085def ICXWriteResGroup205 : SchedWriteRes<[ICXPort23,ICXPort01]> { 2086 let Latency = 21; 2087 let NumMicroOps = 4; 2088 let ReleaseAtCycles = [1,3]; 2089} 2090def: InstRW<[ICXWriteResGroup205], (instregex "VPMULLQZ128rm(b?)")>; 2091 2092def ICXWriteResGroup207 : SchedWriteRes<[ICXPort5,ICXPort6,ICXPort06,ICXPort0156]> { 2093 let Latency = 18; 2094 let NumMicroOps = 8; 2095 let ReleaseAtCycles = [1,1,1,5]; 2096} 2097def: InstRW<[ICXWriteResGroup207], (instrs CPUID, RDTSC)>; 2098 2099def ICXWriteResGroup208 : SchedWriteRes<[ICXPort1,ICXPort23,ICXPort78,ICXPort06,ICXPort15,ICXPort0156]> { 2100 let Latency = 18; 2101 let NumMicroOps = 11; 2102 let ReleaseAtCycles = [2,1,1,4,1,2]; 2103} 2104def: InstRW<[ICXWriteResGroup208], (instregex "RCR(8|16|32|64)mCL")>; 2105 2106def ICXWriteResGroup211 : SchedWriteRes<[ICXPort23,ICXPort01]> { 2107 let Latency = 22; 2108 let NumMicroOps = 4; 2109 let ReleaseAtCycles = [1,3]; 2110} 2111def: InstRW<[ICXWriteResGroup211], (instregex "VPMULLQZ256rm(b?)")>; 2112 2113def ICXWriteResGroup211_1 : SchedWriteRes<[ICXPort23,ICXPort0]> { 2114 let Latency = 22; 2115 let NumMicroOps = 4; 2116 let ReleaseAtCycles = [1,3]; 2117} 2118def: InstRW<[ICXWriteResGroup211_1], (instregex "VPMULLQZrm(b?)")>; 2119 2120def ICXWriteResGroup215 : SchedWriteRes<[ICXPort0]> { 2121 let Latency = 20; 2122 let NumMicroOps = 1; 2123 let ReleaseAtCycles = [1]; 2124} 2125def: InstRW<[ICXWriteResGroup215], (instregex "DIV_(FPrST0|FST0r|FrST0)")>; 2126 2127def ICXWriteGatherEVEX2 : SchedWriteRes<[ICXPort0,ICXPort23,ICXPort015,ICXPort0156]> { 2128 let Latency = 17; 2129 let NumMicroOps = 5; // 2 uops perform multiple loads 2130 let ReleaseAtCycles = [1,2,1,1]; 2131} 2132def: InstRW<[ICXWriteGatherEVEX2], (instrs VGATHERQPSZ128rm, VPGATHERQDZ128rm, 2133 VGATHERDPDZ128rm, VPGATHERDQZ128rm, 2134 VGATHERQPDZ128rm, VPGATHERQQZ128rm)>; 2135 2136def ICXWriteGatherEVEX4 : SchedWriteRes<[ICXPort0,ICXPort23,ICXPort015,ICXPort0156]> { 2137 let Latency = 19; 2138 let NumMicroOps = 5; // 2 uops perform multiple loads 2139 let ReleaseAtCycles = [1,4,1,1]; 2140} 2141def: InstRW<[ICXWriteGatherEVEX4], (instrs VGATHERQPSZ256rm, VPGATHERQDZ256rm, 2142 VGATHERQPDZ256rm, VPGATHERQQZ256rm, 2143 VGATHERDPSZ128rm, VPGATHERDDZ128rm, 2144 VGATHERDPDZ256rm, VPGATHERDQZ256rm)>; 2145 2146def ICXWriteGatherEVEX8 : SchedWriteRes<[ICXPort0,ICXPort23,ICXPort015,ICXPort0156]> { 2147 let Latency = 21; 2148 let NumMicroOps = 5; // 2 uops perform multiple loads 2149 let ReleaseAtCycles = [1,8,1,1]; 2150} 2151def: InstRW<[ICXWriteGatherEVEX8], (instrs VGATHERDPSZ256rm, VPGATHERDDZ256rm, 2152 VGATHERDPDZrm, VPGATHERDQZrm, 2153 VGATHERQPDZrm, VPGATHERQQZrm, 2154 VGATHERQPSZrm, VPGATHERQDZrm)>; 2155 2156def ICXWriteGatherEVEX16 : SchedWriteRes<[ICXPort0,ICXPort23,ICXPort015,ICXPort0156]> { 2157 let Latency = 25; 2158 let NumMicroOps = 5; // 2 uops perform multiple loads 2159 let ReleaseAtCycles = [1,16,1,1]; 2160} 2161def: InstRW<[ICXWriteGatherEVEX16], (instrs VGATHERDPSZrm, VPGATHERDDZrm)>; 2162 2163def ICXWriteResGroup219 : SchedWriteRes<[ICXPort49,ICXPort5,ICXPort6,ICXPort23,ICXPort78,ICXPort06,ICXPort0156]> { 2164 let Latency = 20; 2165 let NumMicroOps = 8; 2166 let ReleaseAtCycles = [1,1,1,1,1,1,2]; 2167} 2168def: InstRW<[ICXWriteResGroup219], (instrs INSB, INSL, INSW)>; 2169 2170def ICXWriteResGroup220 : SchedWriteRes<[ICXPort5,ICXPort6,ICXPort0156]> { 2171 let Latency = 20; 2172 let NumMicroOps = 10; 2173 let ReleaseAtCycles = [1,2,7]; 2174} 2175def: InstRW<[ICXWriteResGroup220], (instrs MWAITrr)>; 2176 2177def ICXWriteResGroup223 : SchedWriteRes<[ICXPort0,ICXPort23]> { 2178 let Latency = 22; 2179 let NumMicroOps = 2; 2180 let ReleaseAtCycles = [1,1]; 2181} 2182def: InstRW<[ICXWriteResGroup223], (instregex "DIV_F(32|64)m")>; 2183 2184def ICXWriteResGroupVEX2 : SchedWriteRes<[ICXPort0, ICXPort23, ICXPort5, ICXPort015]> { 2185 let Latency = 18; 2186 let NumMicroOps = 5; // 2 uops perform multiple loads 2187 let ReleaseAtCycles = [1,2,1,1]; 2188} 2189def: InstRW<[ICXWriteResGroupVEX2], (instrs VGATHERDPDrm, VPGATHERDQrm, 2190 VGATHERQPDrm, VPGATHERQQrm, 2191 VGATHERQPSrm, VPGATHERQDrm)>; 2192 2193def ICXWriteResGroupVEX4 : SchedWriteRes<[ICXPort0, ICXPort23, ICXPort5, ICXPort015]> { 2194 let Latency = 20; 2195 let NumMicroOps = 5; // 2 uops peform multiple loads 2196 let ReleaseAtCycles = [1,4,1,1]; 2197} 2198def: InstRW<[ICXWriteResGroupVEX4], (instrs VGATHERDPDYrm, VPGATHERDQYrm, 2199 VGATHERDPSrm, VPGATHERDDrm, 2200 VGATHERQPDYrm, VPGATHERQQYrm, 2201 VGATHERQPSYrm, VPGATHERQDYrm)>; 2202 2203def ICXWriteResGroupVEX8 : SchedWriteRes<[ICXPort0, ICXPort23, ICXPort5, ICXPort015]> { 2204 let Latency = 22; 2205 let NumMicroOps = 5; // 2 uops perform multiple loads 2206 let ReleaseAtCycles = [1,8,1,1]; 2207} 2208def: InstRW<[ICXWriteResGroupVEX8], (instrs VGATHERDPSYrm, VPGATHERDDYrm)>; 2209 2210def ICXWriteResGroup225 : SchedWriteRes<[ICXPort5,ICXPort01,ICXPort015]> { 2211 let Latency = 22; 2212 let NumMicroOps = 14; 2213 let ReleaseAtCycles = [5,5,4]; 2214} 2215def: InstRW<[ICXWriteResGroup225], (instregex "VPCONFLICTDZ128rr", 2216 "VPCONFLICTQZ256rr")>; 2217 2218def ICXWriteResGroup228 : SchedWriteRes<[ICXPort0,ICXPort49,ICXPort5,ICXPort23,ICXPort78,ICXPort06,ICXPort0156]> { 2219 let Latency = 23; 2220 let NumMicroOps = 19; 2221 let ReleaseAtCycles = [2,1,4,1,1,4,6]; 2222} 2223def: InstRW<[ICXWriteResGroup228], (instrs CMPXCHG16B)>; 2224 2225def ICXWriteResGroup233 : SchedWriteRes<[ICXPort0,ICXPort5,ICXPort23]> { 2226 let Latency = 25; 2227 let NumMicroOps = 3; 2228 let ReleaseAtCycles = [1,1,1]; 2229} 2230def: InstRW<[ICXWriteResGroup233], (instregex "DIV_FI(16|32)m")>; 2231 2232def ICXWriteResGroup239 : SchedWriteRes<[ICXPort0,ICXPort23]> { 2233 let Latency = 27; 2234 let NumMicroOps = 2; 2235 let ReleaseAtCycles = [1,1]; 2236} 2237def: InstRW<[ICXWriteResGroup239], (instregex "DIVR_F(32|64)m")>; 2238 2239def ICXWriteResGroup242 : SchedWriteRes<[ICXPort5,ICXPort01,ICXPort23,ICXPort015]> { 2240 let Latency = 29; 2241 let NumMicroOps = 15; 2242 let ReleaseAtCycles = [5,5,1,4]; 2243} 2244def: InstRW<[ICXWriteResGroup242], (instregex "VPCONFLICTQZ256rm(b?)")>; 2245 2246def ICXWriteResGroup243 : SchedWriteRes<[ICXPort0,ICXPort5,ICXPort23]> { 2247 let Latency = 30; 2248 let NumMicroOps = 3; 2249 let ReleaseAtCycles = [1,1,1]; 2250} 2251def: InstRW<[ICXWriteResGroup243], (instregex "DIVR_FI(16|32)m")>; 2252 2253def ICXWriteResGroup247 : SchedWriteRes<[ICXPort5,ICXPort6,ICXPort23,ICXPort06,ICXPort0156]> { 2254 let Latency = 35; 2255 let NumMicroOps = 23; 2256 let ReleaseAtCycles = [1,5,3,4,10]; 2257} 2258def: InstRW<[ICXWriteResGroup247], (instregex "IN(8|16|32)ri", 2259 "IN(8|16|32)rr")>; 2260 2261def ICXWriteResGroup248 : SchedWriteRes<[ICXPort5,ICXPort6,ICXPort23,ICXPort78,ICXPort06,ICXPort0156]> { 2262 let Latency = 35; 2263 let NumMicroOps = 23; 2264 let ReleaseAtCycles = [1,5,2,1,4,10]; 2265} 2266def: InstRW<[ICXWriteResGroup248], (instregex "OUT(8|16|32)ir", 2267 "OUT(8|16|32)rr")>; 2268 2269def ICXWriteResGroup249 : SchedWriteRes<[ICXPort5,ICXPort01,ICXPort015]> { 2270 let Latency = 37; 2271 let NumMicroOps = 21; 2272 let ReleaseAtCycles = [9,7,5]; 2273} 2274def: InstRW<[ICXWriteResGroup249], (instregex "VPCONFLICTDZ256rr", 2275 "VPCONFLICTQZrr")>; 2276 2277def ICXWriteResGroup250 : SchedWriteRes<[ICXPort1,ICXPort6,ICXPort23,ICXPort0156]> { 2278 let Latency = 37; 2279 let NumMicroOps = 31; 2280 let ReleaseAtCycles = [1,8,1,21]; 2281} 2282def: InstRW<[ICXWriteResGroup250], (instregex "XRSTOR(64)?")>; 2283 2284def ICXWriteResGroup252 : SchedWriteRes<[ICXPort1,ICXPort49,ICXPort5,ICXPort6,ICXPort23,ICXPort78,ICXPort15,ICXPort0156]> { 2285 let Latency = 40; 2286 let NumMicroOps = 18; 2287 let ReleaseAtCycles = [1,1,2,3,1,1,1,8]; 2288} 2289def: InstRW<[ICXWriteResGroup252], (instrs VMCLEARm)>; 2290 2291def ICXWriteResGroup253 : SchedWriteRes<[ICXPort49,ICXPort6,ICXPort23,ICXPort78,ICXPort0156]> { 2292 let Latency = 41; 2293 let NumMicroOps = 39; 2294 let ReleaseAtCycles = [1,10,1,1,26]; 2295} 2296def: InstRW<[ICXWriteResGroup253], (instrs XSAVE64)>; 2297 2298def ICXWriteResGroup254 : SchedWriteRes<[ICXPort5,ICXPort0156]> { 2299 let Latency = 42; 2300 let NumMicroOps = 22; 2301 let ReleaseAtCycles = [2,20]; 2302} 2303def: InstRW<[ICXWriteResGroup254], (instrs RDTSCP)>; 2304 2305def ICXWriteResGroup255 : SchedWriteRes<[ICXPort49,ICXPort6,ICXPort23,ICXPort78,ICXPort0156]> { 2306 let Latency = 42; 2307 let NumMicroOps = 40; 2308 let ReleaseAtCycles = [1,11,1,1,26]; 2309} 2310def: InstRW<[ICXWriteResGroup255], (instrs XSAVE)>; 2311def: InstRW<[ICXWriteResGroup255], (instregex "XSAVEC", "XSAVES", "XSAVEOPT")>; 2312 2313def ICXWriteResGroup256 : SchedWriteRes<[ICXPort5,ICXPort01,ICXPort23,ICXPort015]> { 2314 let Latency = 44; 2315 let NumMicroOps = 22; 2316 let ReleaseAtCycles = [9,7,1,5]; 2317} 2318def: InstRW<[ICXWriteResGroup256], (instregex "VPCONFLICTDZ256rm(b?)", 2319 "VPCONFLICTQZrm(b?)")>; 2320 2321def ICXWriteResGroup258 : SchedWriteRes<[ICXPort0,ICXPort23,ICXPort05,ICXPort06,ICXPort0156]> { 2322 let Latency = 62; 2323 let NumMicroOps = 64; 2324 let ReleaseAtCycles = [2,8,5,10,39]; 2325} 2326def: InstRW<[ICXWriteResGroup258], (instrs FLDENVm)>; 2327 2328def ICXWriteResGroup259 : SchedWriteRes<[ICXPort0,ICXPort6,ICXPort23,ICXPort05,ICXPort06,ICXPort15,ICXPort0156]> { 2329 let Latency = 63; 2330 let NumMicroOps = 88; 2331 let ReleaseAtCycles = [4,4,31,1,2,1,45]; 2332} 2333def: InstRW<[ICXWriteResGroup259], (instrs FXRSTOR64)>; 2334 2335def ICXWriteResGroup260 : SchedWriteRes<[ICXPort0,ICXPort6,ICXPort23,ICXPort05,ICXPort06,ICXPort15,ICXPort0156]> { 2336 let Latency = 63; 2337 let NumMicroOps = 90; 2338 let ReleaseAtCycles = [4,2,33,1,2,1,47]; 2339} 2340def: InstRW<[ICXWriteResGroup260], (instrs FXRSTOR)>; 2341 2342def ICXWriteResGroup261 : SchedWriteRes<[ICXPort5,ICXPort01,ICXPort015]> { 2343 let Latency = 67; 2344 let NumMicroOps = 35; 2345 let ReleaseAtCycles = [17,11,7]; 2346} 2347def: InstRW<[ICXWriteResGroup261], (instregex "VPCONFLICTDZrr")>; 2348 2349def ICXWriteResGroup262 : SchedWriteRes<[ICXPort5,ICXPort01,ICXPort23,ICXPort015]> { 2350 let Latency = 74; 2351 let NumMicroOps = 36; 2352 let ReleaseAtCycles = [17,11,1,7]; 2353} 2354def: InstRW<[ICXWriteResGroup262], (instregex "VPCONFLICTDZrm(b?)")>; 2355 2356def ICXWriteResGroup263 : SchedWriteRes<[ICXPort5,ICXPort05,ICXPort0156]> { 2357 let Latency = 75; 2358 let NumMicroOps = 15; 2359 let ReleaseAtCycles = [6,3,6]; 2360} 2361def: InstRW<[ICXWriteResGroup263], (instrs FNINIT)>; 2362 2363def ICXWriteResGroup266 : SchedWriteRes<[ICXPort0,ICXPort1,ICXPort49,ICXPort5,ICXPort6,ICXPort78,ICXPort06,ICXPort0156]> { 2364 let Latency = 106; 2365 let NumMicroOps = 100; 2366 let ReleaseAtCycles = [9,1,11,16,1,11,21,30]; 2367} 2368def: InstRW<[ICXWriteResGroup266], (instrs FSTENVm)>; 2369 2370def ICXWriteResGroup267 : SchedWriteRes<[ICXPort6,ICXPort0156]> { 2371 let Latency = 140; 2372 let NumMicroOps = 4; 2373 let ReleaseAtCycles = [1,3]; 2374} 2375def: InstRW<[ICXWriteResGroup267], (instrs PAUSE)>; 2376 2377def: InstRW<[WriteZero], (instrs CLC)>; 2378 2379 2380// Instruction variants handled by the renamer. These might not need execution 2381// ports in certain conditions. 2382// See Agner's Fog "The microarchitecture of Intel, AMD and VIA CPUs", 2383// section "Skylake Pipeline" > "Register allocation and renaming". 2384// These can be investigated with llvm-exegesis, e.g. 2385// echo 'pxor %mm0, %mm0' | /tmp/llvm-exegesis -mode=uops -snippets-file=- 2386// echo 'vxorpd %xmm0, %xmm0, %xmm1' | /tmp/llvm-exegesis -mode=uops -snippets-file=- 2387 2388def ICXWriteZeroLatency : SchedWriteRes<[]> { 2389 let Latency = 0; 2390} 2391 2392def ICXWriteZeroIdiom : SchedWriteVariant<[ 2393 SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [ICXWriteZeroLatency]>, 2394 SchedVar<NoSchedPred, [WriteALU]> 2395]>; 2396def : InstRW<[ICXWriteZeroIdiom], (instrs SUB32rr, SUB64rr, 2397 XOR32rr, XOR64rr)>; 2398 2399def ICXWriteFZeroIdiom : SchedWriteVariant<[ 2400 SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [ICXWriteZeroLatency]>, 2401 SchedVar<NoSchedPred, [WriteFLogic]> 2402]>; 2403def : InstRW<[ICXWriteFZeroIdiom], (instrs XORPSrr, VXORPSrr, 2404 XORPDrr, VXORPDrr, 2405 VXORPSZ128rr, 2406 VXORPDZ128rr)>; 2407 2408def ICXWriteFZeroIdiomY : SchedWriteVariant<[ 2409 SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [ICXWriteZeroLatency]>, 2410 SchedVar<NoSchedPred, [WriteFLogicY]> 2411]>; 2412def : InstRW<[ICXWriteFZeroIdiomY], (instrs VXORPSYrr, VXORPDYrr, 2413 VXORPSZ256rr, VXORPDZ256rr)>; 2414 2415def ICXWriteFZeroIdiomZ : SchedWriteVariant<[ 2416 SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [ICXWriteZeroLatency]>, 2417 SchedVar<NoSchedPred, [WriteFLogicZ]> 2418]>; 2419def : InstRW<[ICXWriteFZeroIdiomZ], (instrs VXORPSZrr, VXORPDZrr)>; 2420 2421def ICXWriteVZeroIdiomLogicX : SchedWriteVariant<[ 2422 SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [ICXWriteZeroLatency]>, 2423 SchedVar<NoSchedPred, [WriteVecLogicX]> 2424]>; 2425def : InstRW<[ICXWriteVZeroIdiomLogicX], (instrs PXORrr, VPXORrr, 2426 VPXORDZ128rr, VPXORQZ128rr)>; 2427 2428def ICXWriteVZeroIdiomLogicY : SchedWriteVariant<[ 2429 SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [ICXWriteZeroLatency]>, 2430 SchedVar<NoSchedPred, [WriteVecLogicY]> 2431]>; 2432def : InstRW<[ICXWriteVZeroIdiomLogicY], (instrs VPXORYrr, 2433 VPXORDZ256rr, VPXORQZ256rr)>; 2434 2435def ICXWriteVZeroIdiomLogicZ : SchedWriteVariant<[ 2436 SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [ICXWriteZeroLatency]>, 2437 SchedVar<NoSchedPred, [WriteVecLogicZ]> 2438]>; 2439def : InstRW<[ICXWriteVZeroIdiomLogicZ], (instrs VPXORDZrr, VPXORQZrr)>; 2440 2441def ICXWriteVZeroIdiomALUX : SchedWriteVariant<[ 2442 SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [ICXWriteZeroLatency]>, 2443 SchedVar<NoSchedPred, [WriteVecALUX]> 2444]>; 2445def : InstRW<[ICXWriteVZeroIdiomALUX], (instrs PCMPGTBrr, VPCMPGTBrr, 2446 PCMPGTDrr, VPCMPGTDrr, 2447 PCMPGTWrr, VPCMPGTWrr)>; 2448 2449def ICXWriteVZeroIdiomALUY : SchedWriteVariant<[ 2450 SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [ICXWriteZeroLatency]>, 2451 SchedVar<NoSchedPred, [WriteVecALUY]> 2452]>; 2453def : InstRW<[ICXWriteVZeroIdiomALUY], (instrs VPCMPGTBYrr, 2454 VPCMPGTDYrr, 2455 VPCMPGTWYrr)>; 2456 2457def ICXWritePSUB : SchedWriteRes<[ICXPort015]> { 2458 let Latency = 1; 2459 let NumMicroOps = 1; 2460 let ReleaseAtCycles = [1]; 2461} 2462 2463def ICXWriteVZeroIdiomPSUB : SchedWriteVariant<[ 2464 SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [ICXWriteZeroLatency]>, 2465 SchedVar<NoSchedPred, [ICXWritePSUB]> 2466]>; 2467 2468def : InstRW<[ICXWriteVZeroIdiomPSUB], (instrs PSUBBrr, VPSUBBrr, VPSUBBZ128rr, 2469 PSUBDrr, VPSUBDrr, VPSUBDZ128rr, 2470 PSUBQrr, VPSUBQrr, VPSUBQZ128rr, 2471 PSUBWrr, VPSUBWrr, VPSUBWZ128rr, 2472 VPSUBBYrr, VPSUBBZ256rr, 2473 VPSUBDYrr, VPSUBDZ256rr, 2474 VPSUBQYrr, VPSUBQZ256rr, 2475 VPSUBWYrr, VPSUBWZ256rr, 2476 VPSUBBZrr, 2477 VPSUBDZrr, 2478 VPSUBQZrr, 2479 VPSUBWZrr)>; 2480def ICXWritePCMPGTQ : SchedWriteRes<[ICXPort5]> { 2481 let Latency = 3; 2482 let NumMicroOps = 1; 2483 let ReleaseAtCycles = [1]; 2484} 2485 2486def ICXWriteVZeroIdiomPCMPGTQ : SchedWriteVariant<[ 2487 SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [ICXWriteZeroLatency]>, 2488 SchedVar<NoSchedPred, [ICXWritePCMPGTQ]> 2489]>; 2490def : InstRW<[ICXWriteVZeroIdiomPCMPGTQ], (instrs PCMPGTQrr, VPCMPGTQrr, 2491 VPCMPGTQYrr)>; 2492 2493 2494// CMOVs that use both Z and C flag require an extra uop. 2495def ICXWriteCMOVA_CMOVBErr : SchedWriteRes<[ICXPort06]> { 2496 let Latency = 2; 2497 let ReleaseAtCycles = [2]; 2498 let NumMicroOps = 2; 2499} 2500 2501def ICXWriteCMOVA_CMOVBErm : SchedWriteRes<[ICXPort23,ICXPort06]> { 2502 let Latency = 7; 2503 let ReleaseAtCycles = [1,2]; 2504 let NumMicroOps = 3; 2505} 2506 2507def ICXCMOVA_CMOVBErr : SchedWriteVariant<[ 2508 SchedVar<MCSchedPredicate<IsCMOVArr_Or_CMOVBErr>, [ICXWriteCMOVA_CMOVBErr]>, 2509 SchedVar<NoSchedPred, [WriteCMOV]> 2510]>; 2511 2512def ICXCMOVA_CMOVBErm : SchedWriteVariant<[ 2513 SchedVar<MCSchedPredicate<IsCMOVArm_Or_CMOVBErm>, [ICXWriteCMOVA_CMOVBErm]>, 2514 SchedVar<NoSchedPred, [WriteCMOV.Folded]> 2515]>; 2516 2517def : InstRW<[ICXCMOVA_CMOVBErr], (instrs CMOV16rr, CMOV32rr, CMOV64rr)>; 2518def : InstRW<[ICXCMOVA_CMOVBErm], (instrs CMOV16rm, CMOV32rm, CMOV64rm)>; 2519 2520// SETCCs that use both Z and C flag require an extra uop. 2521def ICXWriteSETA_SETBEr : SchedWriteRes<[ICXPort06]> { 2522 let Latency = 2; 2523 let ReleaseAtCycles = [2]; 2524 let NumMicroOps = 2; 2525} 2526 2527def ICXWriteSETA_SETBEm : SchedWriteRes<[ICXPort49,ICXPort78,ICXPort06]> { 2528 let Latency = 3; 2529 let ReleaseAtCycles = [1,1,2]; 2530 let NumMicroOps = 4; 2531} 2532 2533def ICXSETA_SETBErr : SchedWriteVariant<[ 2534 SchedVar<MCSchedPredicate<IsSETAr_Or_SETBEr>, [ICXWriteSETA_SETBEr]>, 2535 SchedVar<NoSchedPred, [WriteSETCC]> 2536]>; 2537 2538def ICXSETA_SETBErm : SchedWriteVariant<[ 2539 SchedVar<MCSchedPredicate<IsSETAm_Or_SETBEm>, [ICXWriteSETA_SETBEm]>, 2540 SchedVar<NoSchedPred, [WriteSETCCStore]> 2541]>; 2542 2543def : InstRW<[ICXSETA_SETBErr], (instrs SETCCr)>; 2544def : InstRW<[ICXSETA_SETBErm], (instrs SETCCm)>; 2545 2546/////////////////////////////////////////////////////////////////////////////// 2547// Dependency breaking instructions. 2548/////////////////////////////////////////////////////////////////////////////// 2549 2550def : IsZeroIdiomFunction<[ 2551 // GPR Zero-idioms. 2552 DepBreakingClass<[ SUB32rr, SUB64rr, XOR32rr, XOR64rr ], ZeroIdiomPredicate>, 2553 2554 // SSE Zero-idioms. 2555 DepBreakingClass<[ 2556 // fp variants. 2557 XORPSrr, XORPDrr, 2558 2559 // int variants. 2560 PXORrr, 2561 PSUBBrr, PSUBWrr, PSUBDrr, PSUBQrr, 2562 PCMPGTBrr, PCMPGTDrr, PCMPGTQrr, PCMPGTWrr 2563 ], ZeroIdiomPredicate>, 2564 2565 // AVX Zero-idioms. 2566 DepBreakingClass<[ 2567 // xmm fp variants. 2568 VXORPSrr, VXORPDrr, 2569 2570 // xmm int variants. 2571 VPXORrr, 2572 VPSUBBrr, VPSUBWrr, VPSUBDrr, VPSUBQrr, 2573 VPCMPGTBrr, VPCMPGTWrr, VPCMPGTDrr, VPCMPGTQrr, 2574 2575 // ymm variants. 2576 VXORPSYrr, VXORPDYrr, VPXORYrr, 2577 VPSUBBYrr, VPSUBWYrr, VPSUBDYrr, VPSUBQYrr, 2578 VPCMPGTBYrr, VPCMPGTWYrr, VPCMPGTDYrr, VPCMPGTQYrr, 2579 2580 // zmm variants. 2581 VXORPSZrr, VXORPDZrr, VPXORDZrr, VPXORQZrr, 2582 VXORPSZ128rr, VXORPDZ128rr, VPXORDZ128rr, VPXORQZ128rr, 2583 VXORPSZ256rr, VXORPDZ256rr, VPXORDZ256rr, VPXORQZ256rr, 2584 VPSUBBZrr, VPSUBWZrr, VPSUBDZrr, VPSUBQZrr, 2585 VPSUBBZ128rr, VPSUBWZ128rr, VPSUBDZ128rr, VPSUBQZ128rr, 2586 VPSUBBZ256rr, VPSUBWZ256rr, VPSUBDZ256rr, VPSUBQZ256rr, 2587 ], ZeroIdiomPredicate>, 2588]>; 2589 2590} // SchedModel 2591