10b57cec5SDimitry Andric//=- X86ScheduleBdVer2.td - X86 BdVer2 (Piledriver) Scheduling * tablegen -*-=// 20b57cec5SDimitry Andric// 30b57cec5SDimitry Andric// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric// See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric// 70b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric// 90b57cec5SDimitry Andric// This file defines the machine model for AMD bdver2 (Piledriver) to support 100b57cec5SDimitry Andric// instruction scheduling and other instruction cost heuristics. 110b57cec5SDimitry Andric// Based on: 120b57cec5SDimitry Andric// * AMD Software Optimization Guide for AMD Family 15h Processors. 130b57cec5SDimitry Andric// https://support.amd.com/TechDocs/47414_15h_sw_opt_guide.pdf 140b57cec5SDimitry Andric// * The microarchitecture of Intel, AMD and VIA CPUs, By Agner Fog 150b57cec5SDimitry Andric// http://www.agner.org/optimize/microarchitecture.pdf 160b57cec5SDimitry Andric// * https://www.realworldtech.com/bulldozer/ 170b57cec5SDimitry Andric// Yes, that is for Bulldozer aka bdver1, not Piledriver aka bdver2. 180b57cec5SDimitry Andric// 190b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 200b57cec5SDimitry Andric 210b57cec5SDimitry Andricdef BdVer2Model : SchedMachineModel { 220b57cec5SDimitry Andric let IssueWidth = 4; // Up to 4 IPC can be decoded, issued, retired. 230b57cec5SDimitry Andric let MicroOpBufferSize = 128; // RCU reorder buffer size, which is unconfirmed. 240b57cec5SDimitry Andric let LoopMicroOpBufferSize = -1; // There does not seem to be a loop buffer. 250b57cec5SDimitry Andric let LoadLatency = 4; // L1 data cache has a 4-cycle load-to-use latency. 260b57cec5SDimitry Andric let HighLatency = 25; // FIXME: any better choice? 270b57cec5SDimitry Andric let MispredictPenalty = 20; // Minimum branch misdirection penalty. 280b57cec5SDimitry Andric 290b57cec5SDimitry Andric let PostRAScheduler = 1; // Enable Post RegAlloc Scheduler pass. 300b57cec5SDimitry Andric 310b57cec5SDimitry Andric // FIXME: Incomplete. This flag is set to allow the scheduler to assign 320b57cec5SDimitry Andric // a default model to unrecognized opcodes. 330b57cec5SDimitry Andric let CompleteModel = 0; 340b57cec5SDimitry Andric} // SchedMachineModel 350b57cec5SDimitry Andric 360b57cec5SDimitry Andriclet SchedModel = BdVer2Model in { 370b57cec5SDimitry Andric 380b57cec5SDimitry Andric 390b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 400b57cec5SDimitry Andric// Pipes 410b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 420b57cec5SDimitry Andric 430b57cec5SDimitry Andric// There are total of eight pipes. 440b57cec5SDimitry Andric 450b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 460b57cec5SDimitry Andric// Integer execution pipes 470b57cec5SDimitry Andric// 480b57cec5SDimitry Andric 490b57cec5SDimitry Andric// Two EX (ALU) pipes. 500b57cec5SDimitry Andricdef PdEX0 : ProcResource<1>; // ALU, Integer Pipe0 510b57cec5SDimitry Andricdef PdEX1 : ProcResource<1>; // ALU, Integer Pipe1 520b57cec5SDimitry Andricdef PdEX01 : ProcResGroup<[PdEX0, PdEX1]>; 530b57cec5SDimitry Andric 540b57cec5SDimitry Andric// Two AGLU pipes, identical. 550b57cec5SDimitry Andricdef PdAGLU01 : ProcResource<2>; // AGU, Integer Pipe[23] 560b57cec5SDimitry Andric 570b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 580b57cec5SDimitry Andric// Floating point execution pipes 590b57cec5SDimitry Andric// 600b57cec5SDimitry Andric 610b57cec5SDimitry Andric// Four FPU pipes. 620b57cec5SDimitry Andric 630b57cec5SDimitry Andricdef PdFPU0 : ProcResource<1>; // Vector/FPU Pipe0 640b57cec5SDimitry Andricdef PdFPU1 : ProcResource<1>; // Vector/FPU Pipe1 650b57cec5SDimitry Andricdef PdFPU2 : ProcResource<1>; // Vector/FPU Pipe2 660b57cec5SDimitry Andricdef PdFPU3 : ProcResource<1>; // Vector/FPU Pipe3 670b57cec5SDimitry Andric 680b57cec5SDimitry Andric// FPU grouping 690b57cec5SDimitry Andricdef PdFPU01 : ProcResGroup<[PdFPU0, PdFPU1]>; 700b57cec5SDimitry Andricdef PdFPU23 : ProcResGroup<[PdFPU2, PdFPU3]>; 710b57cec5SDimitry Andric 720b57cec5SDimitry Andric 730b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 740b57cec5SDimitry Andric// RCU 750b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 760b57cec5SDimitry Andric 770b57cec5SDimitry Andric// The Retire Control Unit on Piledriver can retire up to 4 macro-ops per cycle. 780b57cec5SDimitry Andric// On the other hand, the RCU reorder buffer size for Piledriver does not 790b57cec5SDimitry Andric// seem be specified in any trustworthy source. 800b57cec5SDimitry Andric// But as per https://www.realworldtech.com/bulldozer/6/ the Bulldozer had 810b57cec5SDimitry Andric// RCU reorder buffer size of 128. So that is a good guess for now. 820b57cec5SDimitry Andricdef PdRCU : RetireControlUnit<128, 4>; 830b57cec5SDimitry Andric 840b57cec5SDimitry Andric 850b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 860b57cec5SDimitry Andric// Pipelines 870b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 880b57cec5SDimitry Andric 890b57cec5SDimitry Andric// There are total of two pipelines, each one with it's own scheduler. 900b57cec5SDimitry Andric 910b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 920b57cec5SDimitry Andric// Integer Pipeline Scheduling 930b57cec5SDimitry Andric// 940b57cec5SDimitry Andric 950b57cec5SDimitry Andric// There is one Integer Scheduler per core. 960b57cec5SDimitry Andric 970b57cec5SDimitry Andric// Integer physical register file has 96 registers of 64-bit. 980b57cec5SDimitry Andricdef PdIntegerPRF : RegisterFile<96, [GR64, CCR]>; 990b57cec5SDimitry Andric 1000b57cec5SDimitry Andric// Unified Integer, Memory Scheduler has 40 entries. 1010b57cec5SDimitry Andricdef PdEX : ProcResGroup<[PdEX0, PdEX1, PdAGLU01]> { 1020b57cec5SDimitry Andric // Up to 4 IPC can be decoded, issued, retired. 1030b57cec5SDimitry Andric let BufferSize = 40; 1040b57cec5SDimitry Andric} 1050b57cec5SDimitry Andric 1060b57cec5SDimitry Andric 1070b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 1080b57cec5SDimitry Andric// FPU Pipeline Scheduling 1090b57cec5SDimitry Andric// 1100b57cec5SDimitry Andric 1110b57cec5SDimitry Andric// The FPU unit is shared between the two cores. 1120b57cec5SDimitry Andric 1130b57cec5SDimitry Andric// FP physical register file has 160 registers of 128-bit. 1140b57cec5SDimitry Andric// Operations on 256-bit data types are cracked into two COPs. 1150b57cec5SDimitry Andricdef PdFpuPRF : RegisterFile<160, [VR64, VR128, VR256], [1, 1, 2]>; 1160b57cec5SDimitry Andric 1170b57cec5SDimitry Andric// Unified FP Scheduler has 64 entries, 1180b57cec5SDimitry Andricdef PdFPU : ProcResGroup<[PdFPU0, PdFPU1, PdFPU2, PdFPU3]> { 1190b57cec5SDimitry Andric // Up to 4 IPC can be decoded, issued, retired. 1200b57cec5SDimitry Andric let BufferSize = 64; 1210b57cec5SDimitry Andric} 1220b57cec5SDimitry Andric 1230b57cec5SDimitry Andric 1240b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 1250b57cec5SDimitry Andric// Functional units 1260b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 1270b57cec5SDimitry Andric 1280b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 1290b57cec5SDimitry Andric// Load-Store Units 1300b57cec5SDimitry Andric// 1310b57cec5SDimitry Andric 1320b57cec5SDimitry Andriclet Super = PdAGLU01 in 1330b57cec5SDimitry Andricdef PdLoad : ProcResource<2> { 1340b57cec5SDimitry Andric // For Piledriver, the load queue is 40 entries deep. 1350b57cec5SDimitry Andric let BufferSize = 40; 1360b57cec5SDimitry Andric} 1370b57cec5SDimitry Andric 1380b57cec5SDimitry Andricdef PdLoadQueue : LoadQueue<PdLoad>; 1390b57cec5SDimitry Andric 1400b57cec5SDimitry Andriclet Super = PdAGLU01 in 1410b57cec5SDimitry Andricdef PdStore : ProcResource<1> { 1420b57cec5SDimitry Andric // For Piledriver, the store queue is 24 entries deep. 1430b57cec5SDimitry Andric let BufferSize = 24; 1440b57cec5SDimitry Andric} 1450b57cec5SDimitry Andric 1460b57cec5SDimitry Andricdef PdStoreQueue : StoreQueue<PdStore>; 1470b57cec5SDimitry Andric 1480b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 1490b57cec5SDimitry Andric// Integer Execution Units 1500b57cec5SDimitry Andric// 1510b57cec5SDimitry Andric 1520b57cec5SDimitry Andricdef PdDiv : ProcResource<1>; // PdEX0; unpipelined integer division 1530b57cec5SDimitry Andricdef PdCount : ProcResource<1>; // PdEX0; POPCNT, LZCOUNT 1540b57cec5SDimitry Andric 1550b57cec5SDimitry Andricdef PdMul : ProcResource<1>; // PdEX1; integer multiplication 1560b57cec5SDimitry Andricdef PdBranch : ProcResource<1>; // PdEX1; JMP, fused branches 1570b57cec5SDimitry Andric 1580b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 1590b57cec5SDimitry Andric// Floating-Point Units 1600b57cec5SDimitry Andric// 1610b57cec5SDimitry Andric 1620b57cec5SDimitry Andric// Two FMAC/FPFMA units. 1630b57cec5SDimitry Andricdef PdFPFMA : ProcResource<2>; // PdFPU0, PdFPU1 1640b57cec5SDimitry Andric 1650b57cec5SDimitry Andric// One 128-bit integer multiply-accumulate unit. 1660b57cec5SDimitry Andricdef PdFPMMA : ProcResource<1>; // PdFPU0 1670b57cec5SDimitry Andric 1680b57cec5SDimitry Andric// One fp conversion unit. 1690b57cec5SDimitry Andricdef PdFPCVT : ProcResource<1>; // PdFPU0 1700b57cec5SDimitry Andric 1710b57cec5SDimitry Andric// One unit for shuffles, packs, permutes, shifts. 1720b57cec5SDimitry Andricdef PdFPXBR : ProcResource<1>; // PdFPU1 1730b57cec5SDimitry Andric 1740b57cec5SDimitry Andric// Two 128-bit packed integer units. 1750b57cec5SDimitry Andricdef PdFPMAL : ProcResource<2>; // PdFPU2, PdFPU3 1760b57cec5SDimitry Andric 1770b57cec5SDimitry Andric// One FP store unit. 1780b57cec5SDimitry Andricdef PdFPSTO : ProcResource<1>; // PdFPU3 1790b57cec5SDimitry Andric 1800b57cec5SDimitry Andric 1810b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 1820b57cec5SDimitry Andric// Basic helper classes. 1830b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 1840b57cec5SDimitry Andric 1850b57cec5SDimitry Andric// Many SchedWrites are defined in pairs with and without a folded load. 1860b57cec5SDimitry Andric// Instructions with folded loads are usually micro-fused, so they only appear 1870b57cec5SDimitry Andric// as two micro-ops when dispatched by the schedulers. 1880b57cec5SDimitry Andric// This multiclass defines the resource usage for variants with and without 1890b57cec5SDimitry Andric// folded loads. 1900b57cec5SDimitry Andricmulticlass PdWriteRes<SchedWrite SchedRW, 1910b57cec5SDimitry Andric list<ProcResourceKind> ExePorts, int Lat = 1, 1920b57cec5SDimitry Andric list<int> Res = [], int UOps = 1> { 1930b57cec5SDimitry Andric def : WriteRes<SchedRW, ExePorts> { 1940b57cec5SDimitry Andric let Latency = Lat; 1955f757f3fSDimitry Andric let ReleaseAtCycles = Res; 1960b57cec5SDimitry Andric let NumMicroOps = UOps; 1970b57cec5SDimitry Andric } 1980b57cec5SDimitry Andric} 1990b57cec5SDimitry Andric 2000b57cec5SDimitry Andricmulticlass __pdWriteResPair<X86FoldableSchedWrite SchedRW, 2010b57cec5SDimitry Andric list<ProcResourceKind> ExePorts, int Lat, 2020b57cec5SDimitry Andric list<int> Res, int UOps, 2030b57cec5SDimitry Andric int LoadLat, int LoadRes, int LoadUOps> { 2040b57cec5SDimitry Andric defm : PdWriteRes<SchedRW, ExePorts, Lat, Res, UOps>; 2050b57cec5SDimitry Andric 2060b57cec5SDimitry Andric defm : PdWriteRes<SchedRW.Folded, 2070b57cec5SDimitry Andric !listconcat([PdLoad], ExePorts), 2080b57cec5SDimitry Andric !add(Lat, LoadLat), 2090b57cec5SDimitry Andric !if(!and(!empty(Res), !eq(LoadRes, 1)), 2100b57cec5SDimitry Andric [], 2110b57cec5SDimitry Andric !listconcat([LoadRes], 2120b57cec5SDimitry Andric !if(!empty(Res), 2130b57cec5SDimitry Andric !listsplat(1, !size(ExePorts)), 2140b57cec5SDimitry Andric Res))), 2150b57cec5SDimitry Andric !add(UOps, LoadUOps)>; 2160b57cec5SDimitry Andric} 2170b57cec5SDimitry Andric 2180b57cec5SDimitry Andricmulticlass PdWriteResExPair<X86FoldableSchedWrite SchedRW, 2190b57cec5SDimitry Andric list<ProcResourceKind> ExePorts, int Lat = 1, 2200b57cec5SDimitry Andric list<int> Res = [], int UOps = 1, 2210b57cec5SDimitry Andric int LoadUOps = 0> { 2220b57cec5SDimitry Andric defm : __pdWriteResPair<SchedRW, ExePorts, Lat, Res, UOps, 2230b57cec5SDimitry Andric /*LoadLat*/4, /*LoadRes*/3, LoadUOps>; 2240b57cec5SDimitry Andric} 2250b57cec5SDimitry Andric 2260b57cec5SDimitry Andricmulticlass PdWriteResXMMPair<X86FoldableSchedWrite SchedRW, 2270b57cec5SDimitry Andric list<ProcResourceKind> ExePorts, int Lat = 1, 2280b57cec5SDimitry Andric list<int> Res = [], int UOps = 1, 2290b57cec5SDimitry Andric int LoadUOps = 0> { 2300b57cec5SDimitry Andric defm : __pdWriteResPair<SchedRW, ExePorts, Lat, Res, UOps, 2310b57cec5SDimitry Andric /*LoadLat*/5, /*LoadRes*/3, LoadUOps>; 2320b57cec5SDimitry Andric} 2330b57cec5SDimitry Andric 2340b57cec5SDimitry Andricmulticlass PdWriteResYMMPair<X86FoldableSchedWrite SchedRW, 2350b57cec5SDimitry Andric list<ProcResourceKind> ExePorts, int Lat, 2360b57cec5SDimitry Andric list<int> Res = [], int UOps = 2, 2370b57cec5SDimitry Andric int LoadUOps = 0> { 2380b57cec5SDimitry Andric defm : __pdWriteResPair<SchedRW, ExePorts, Lat, Res, UOps, 2390b57cec5SDimitry Andric /*LoadLat*/5, /*LoadRes*/3, LoadUOps>; 2400b57cec5SDimitry Andric} 2410b57cec5SDimitry Andric 2420b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 2430b57cec5SDimitry Andric// Here be dragons. 2440b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 2450b57cec5SDimitry Andric 2460b57cec5SDimitry Andric// L1 data cache has a 4-cycle load-to-use latency, so ReadAfterLd registers 2470b57cec5SDimitry Andric// needn't be available until 4 cycles after the memory operand. 2480b57cec5SDimitry Andricdef : ReadAdvance<ReadAfterLd, 4>; 2490b57cec5SDimitry Andric 2500b57cec5SDimitry Andric// Vector loads are 5 cycles, so ReadAfterVec*Ld registers needn't be available 2510b57cec5SDimitry Andric// until 5 cycles after the memory operand. 2520b57cec5SDimitry Andricdef : ReadAdvance<ReadAfterVecLd, 5>; 2530b57cec5SDimitry Andricdef : ReadAdvance<ReadAfterVecXLd, 5>; 2540b57cec5SDimitry Andricdef : ReadAdvance<ReadAfterVecYLd, 5>; 2550b57cec5SDimitry Andric 2560b57cec5SDimitry Andric// Transfer from int domain to ivec domain incurs additional latency of 8..10cy 2570b57cec5SDimitry Andric// Reference: Agner, Microarchitecture, "AMD Bulldozer, Piledriver, Steamroller 2580b57cec5SDimitry Andric// and Excavator pipeline", "Data delay between different execution domains" 2590b57cec5SDimitry Andricdef : ReadAdvance<ReadInt2Fpu, -10>; 2600b57cec5SDimitry Andric 2610b57cec5SDimitry Andric// A folded store needs a cycle on the PdStore for the store data. 2620b57cec5SDimitry Andricdef : WriteRes<WriteRMW, [PdStore]>; 2630b57cec5SDimitry Andric 2640b57cec5SDimitry Andric//////////////////////////////////////////////////////////////////////////////// 2650b57cec5SDimitry Andric// Loads, stores, and moves, not folded with other operations. 2660b57cec5SDimitry Andric//////////////////////////////////////////////////////////////////////////////// 2670b57cec5SDimitry Andric 2685f757f3fSDimitry Andricdef : WriteRes<WriteLoad, [PdLoad]> { let Latency = 5; let ReleaseAtCycles = [2]; } 2690b57cec5SDimitry Andricdef : WriteRes<WriteStore, [PdStore]>; 2700b57cec5SDimitry Andricdef : WriteRes<WriteStoreNT, [PdStore]>; 2715f757f3fSDimitry Andricdef : WriteRes<WriteMove, [PdEX01]> { let ReleaseAtCycles = [2]; } 272fe6060f1SDimitry Andricdefm : X86WriteResUnsupported<WriteVecMaskedGatherWriteback>; 2730b57cec5SDimitry Andric 2740b57cec5SDimitry Andric// Load/store MXCSR. 2750b57cec5SDimitry Andric// FIXME: These are copy and pasted from WriteLoad/Store. 2760b57cec5SDimitry Andricdef : WriteRes<WriteLDMXCSR, [PdLoad]> { let Latency = 5; } 2775f757f3fSDimitry Andricdef : WriteRes<WriteSTMXCSR, [PdStore]> { let NumMicroOps = 2; let ReleaseAtCycles = [18]; } 2780b57cec5SDimitry Andric 2790b57cec5SDimitry Andric// Treat misc copies as a move. 2800b57cec5SDimitry Andricdef : InstRW<[WriteMove], (instrs COPY)>; 2810b57cec5SDimitry Andric 2820b57cec5SDimitry Andric//////////////////////////////////////////////////////////////////////////////// 2830b57cec5SDimitry Andric// Idioms that clear a register, like xorps %xmm0, %xmm0. 2840b57cec5SDimitry Andric// These can often bypass execution ports completely. 2850b57cec5SDimitry Andric//////////////////////////////////////////////////////////////////////////////// 2860b57cec5SDimitry Andric 2870b57cec5SDimitry Andricdef : WriteRes<WriteZero, [/*No ExePorts*/]>; 2880b57cec5SDimitry Andric 2890b57cec5SDimitry Andric//////////////////////////////////////////////////////////////////////////////// 2900b57cec5SDimitry Andric// Branches don't produce values, so they have no latency, but they still 2910b57cec5SDimitry Andric// consume resources. Indirect branches can fold loads. 2920b57cec5SDimitry Andric//////////////////////////////////////////////////////////////////////////////// 2930b57cec5SDimitry Andric 2940b57cec5SDimitry Andricdefm : PdWriteResExPair<WriteJump, [PdEX1, PdBranch]>; 2950b57cec5SDimitry Andric 2960b57cec5SDimitry Andric//////////////////////////////////////////////////////////////////////////////// 2970b57cec5SDimitry Andric// Special case scheduling classes. 2980b57cec5SDimitry Andric//////////////////////////////////////////////////////////////////////////////// 2990b57cec5SDimitry Andric 3000b57cec5SDimitry Andricdef : WriteRes<WriteSystem, [PdEX01]> { let Latency = 100; } 3010b57cec5SDimitry Andricdef : WriteRes<WriteMicrocoded, [PdEX01]> { let Latency = 100; } 3020b57cec5SDimitry Andricdef : WriteRes<WriteFence, [PdStore]>; 3030b57cec5SDimitry Andric 3040b57cec5SDimitry Andricdef PdWriteXLAT : SchedWriteRes<[PdEX01]> { 3050b57cec5SDimitry Andric let Latency = 6; 3060b57cec5SDimitry Andric} 3070b57cec5SDimitry Andricdef : InstRW<[PdWriteXLAT], (instrs XLAT)>; 3080b57cec5SDimitry Andric 3090b57cec5SDimitry Andricdef PdWriteLARrr : SchedWriteRes<[PdEX01]> { 3100b57cec5SDimitry Andric let Latency = 184; 3115f757f3fSDimitry Andric let ReleaseAtCycles = [375]; 3120b57cec5SDimitry Andric let NumMicroOps = 45; 3130b57cec5SDimitry Andric} 3140b57cec5SDimitry Andricdef : InstRW<[PdWriteLARrr], (instregex "LAR(16|32|64)rr", 3150b57cec5SDimitry Andric "LSL(16|32|64)rr")>; 3160b57cec5SDimitry Andric 3170b57cec5SDimitry Andric// Nops don't have dependencies, so there's no actual latency, but we set this 3180b57cec5SDimitry Andric// to '1' to tell the scheduler that the nop uses an ALU slot for a cycle. 3195f757f3fSDimitry Andricdef : WriteRes<WriteNop, [PdEX01]> { let ReleaseAtCycles = [2]; } 3200b57cec5SDimitry Andric 3210b57cec5SDimitry Andric//////////////////////////////////////////////////////////////////////////////// 3220b57cec5SDimitry Andric// Arithmetic. 3230b57cec5SDimitry Andric//////////////////////////////////////////////////////////////////////////////// 3240b57cec5SDimitry Andric 3250b57cec5SDimitry Andricdefm : PdWriteResExPair<WriteALU, [PdEX01], 1, [2]>; 3260b57cec5SDimitry Andric 3270b57cec5SDimitry Andricdef PdWriteALURMW : SchedWriteRes<[PdLoad, PdEX01, PdStore]> { 3280b57cec5SDimitry Andric let Latency = 6; 3295f757f3fSDimitry Andric let ReleaseAtCycles = [3, 2, 1]; 3300b57cec5SDimitry Andric let NumMicroOps = 1; 3310b57cec5SDimitry Andric} 3320b57cec5SDimitry Andricdef : SchedAlias<WriteALURMW, PdWriteALURMW>; 3330b57cec5SDimitry Andric 3340b57cec5SDimitry Andricdef PdWriteLXADD : SchedWriteRes<[PdEX01]> { 3350b57cec5SDimitry Andric let Latency = 6; 3365f757f3fSDimitry Andric let ReleaseAtCycles = [88]; 3370b57cec5SDimitry Andric let NumMicroOps = 4; 3380b57cec5SDimitry Andric} 3390b57cec5SDimitry Andricdef : InstRW<[PdWriteLXADD], (instrs LXADD8, LXADD16, LXADD32, LXADD64)>; 3400b57cec5SDimitry Andric 3410b57cec5SDimitry Andricdef PdWriteBMI1 : SchedWriteRes<[PdEX01]> { 3420b57cec5SDimitry Andric let Latency = 2; 3435f757f3fSDimitry Andric let ReleaseAtCycles = [2]; 3440b57cec5SDimitry Andric let NumMicroOps = 2; 3450b57cec5SDimitry Andric} 3460b57cec5SDimitry Andricdef : InstRW<[PdWriteBMI1], 3470b57cec5SDimitry Andric (instrs BLCFILL32rr, BLCFILL64rr, BLCI32rr, BLCI64rr, 3480b57cec5SDimitry Andric BLCIC32rr, BLCIC64rr, BLCMSK32rr, BLCMSK64rr, 3490b57cec5SDimitry Andric BLCS32rr, BLCS64rr, BLSFILL32rr, BLSFILL64rr, 3500b57cec5SDimitry Andric BLSIC32rr, BLSIC64rr, T1MSKC32rr, T1MSKC64rr, 3510b57cec5SDimitry Andric TZMSK32rr, TZMSK64rr)>; 3520b57cec5SDimitry Andric 3530b57cec5SDimitry Andricdef PdWriteBMI1m : SchedWriteRes<[PdLoad, PdEX01]> { 3540b57cec5SDimitry Andric let Latency = 6; 3555f757f3fSDimitry Andric let ReleaseAtCycles = [3, 3]; 3560b57cec5SDimitry Andric let NumMicroOps = 2; 3570b57cec5SDimitry Andric} 3580b57cec5SDimitry Andricdef : InstRW<[PdWriteBMI1m], 3590b57cec5SDimitry Andric (instrs BLCFILL32rm, BLCFILL64rm, BLCI32rm, BLCI64rm, 3600b57cec5SDimitry Andric BLCIC32rm, BLCIC64rm, BLCMSK32rm, BLCMSK64rm, 3610b57cec5SDimitry Andric BLCS32rm, BLCS64rm, BLSFILL32rm, BLSFILL64rm, 3620b57cec5SDimitry Andric BLSIC32rm, BLSIC64rm, T1MSKC32rm, T1MSKC64rm, 3630b57cec5SDimitry Andric TZMSK32rm, TZMSK64rm)>; 3640b57cec5SDimitry Andric 3650b57cec5SDimitry Andricdefm : PdWriteResExPair<WriteADC, [PdEX01], 1, [2]>; 3660b57cec5SDimitry Andric 3670b57cec5SDimitry Andricdef PdWriteADCSBB64ri32 : SchedWriteRes<[PdEX01]> { 3685f757f3fSDimitry Andric let ReleaseAtCycles = [3]; 3690b57cec5SDimitry Andric} 3700b57cec5SDimitry Andricdef : InstRW<[PdWriteADCSBB64ri32], (instrs ADC64ri32, SBB64ri32)>; 3710b57cec5SDimitry Andric 3720b57cec5SDimitry Andricdefm : PdWriteRes<WriteBSWAP32, [PdEX01]>; 3730b57cec5SDimitry Andricdefm : PdWriteRes<WriteBSWAP64, [PdEX01]>; 3740b57cec5SDimitry Andricdefm : PdWriteRes<WriteCMPXCHG, [PdEX1], 3, [3], 5>; 3750b57cec5SDimitry Andricdefm : PdWriteRes<WriteCMPXCHGRMW, [PdEX1, PdStore, PdLoad], 3, [44, 1, 1], 2>; 3760b57cec5SDimitry Andricdefm : PdWriteRes<WriteXCHG, [PdEX1], 1, [], 2>; 3770b57cec5SDimitry Andric 3780b57cec5SDimitry Andricdef PdWriteCMPXCHG8rr : SchedWriteRes<[PdEX1]> { 3790b57cec5SDimitry Andric let Latency = 3; 3805f757f3fSDimitry Andric let ReleaseAtCycles = [3]; 3810b57cec5SDimitry Andric let NumMicroOps = 3; 3820b57cec5SDimitry Andric} 3830b57cec5SDimitry Andricdef : InstRW<[PdWriteCMPXCHG8rr], (instrs CMPXCHG8rr)>; 3840b57cec5SDimitry Andric 3850b57cec5SDimitry Andricdef PdWriteCMPXCHG8rm : SchedWriteRes<[PdEX1]> { 3860b57cec5SDimitry Andric let Latency = 3; 3875f757f3fSDimitry Andric let ReleaseAtCycles = [23]; 3880b57cec5SDimitry Andric let NumMicroOps = 5; 3890b57cec5SDimitry Andric} 3900b57cec5SDimitry Andricdef : InstRW<[PdWriteCMPXCHG8rm], (instrs CMPXCHG8rm)>; 3910b57cec5SDimitry Andric 3920b57cec5SDimitry Andricdef PdWriteCMPXCHG16rm_CMPXCHG32rm_CMPXCHG64rm : SchedWriteRes<[PdEX1]> { 3930b57cec5SDimitry Andric let Latency = 3; 3945f757f3fSDimitry Andric let ReleaseAtCycles = [21]; 3950b57cec5SDimitry Andric let NumMicroOps = 6; 3960b57cec5SDimitry Andric} 3970b57cec5SDimitry Andricdef : InstRW<[PdWriteCMPXCHG16rm_CMPXCHG32rm_CMPXCHG64rm], 3980b57cec5SDimitry Andric (instrs CMPXCHG16rm, CMPXCHG32rm, CMPXCHG64rm)>; 3990b57cec5SDimitry Andric 4000b57cec5SDimitry Andricdef PdWriteCMPXCHG8B : SchedWriteRes<[PdEX1]> { 4010b57cec5SDimitry Andric let Latency = 3; 4025f757f3fSDimitry Andric let ReleaseAtCycles = [26]; 4030b57cec5SDimitry Andric let NumMicroOps = 18; 4040b57cec5SDimitry Andric} 4050b57cec5SDimitry Andricdef : InstRW<[PdWriteCMPXCHG8B], (instrs CMPXCHG8B)>; 4060b57cec5SDimitry Andric 4070b57cec5SDimitry Andricdef PdWriteCMPXCHG16B : SchedWriteRes<[PdEX1]> { 4080b57cec5SDimitry Andric let Latency = 3; 4095f757f3fSDimitry Andric let ReleaseAtCycles = [69]; 4100b57cec5SDimitry Andric let NumMicroOps = 22; 4110b57cec5SDimitry Andric} 4120b57cec5SDimitry Andricdef : InstRW<[PdWriteCMPXCHG16B], (instrs CMPXCHG16B)>; 4130b57cec5SDimitry Andric 4140b57cec5SDimitry Andricdef PdWriteXADDm : SchedWriteRes<[PdEX1]> { 4150b57cec5SDimitry Andric let Latency = 6; 4165f757f3fSDimitry Andric let ReleaseAtCycles = [20]; 4170b57cec5SDimitry Andric let NumMicroOps = 4; 4180b57cec5SDimitry Andric} 4190b57cec5SDimitry Andricdef : InstRW<[PdWriteXADDm], (instrs XADD8rm, XADD16rm, XADD32rm, XADD64rm)>; 4200b57cec5SDimitry Andric 4210b57cec5SDimitry Andricdefm : PdWriteResExPair<WriteIMul8, [PdEX1, PdMul], 4, [1, 4]>; 4220b57cec5SDimitry Andricdefm : PdWriteResExPair<WriteIMul16, [PdEX1, PdMul], 4, [1, 5], 2>; 4230b57cec5SDimitry Andricdefm : PdWriteResExPair<WriteIMul16Imm, [PdEX1, PdMul], 5, [1, 5], 2>; 4240b57cec5SDimitry Andricdefm : PdWriteResExPair<WriteIMul16Reg, [PdEX1, PdMul], 4, [1, 2]>; 4250b57cec5SDimitry Andricdefm : PdWriteResExPair<WriteIMul32, [PdEX1, PdMul], 4, [1, 4]>; 4260b57cec5SDimitry Andricdefm : PdWriteResExPair<WriteIMul32Imm, [PdEX1, PdMul], 4, [1, 2], 1, 1>; 4270b57cec5SDimitry Andricdefm : PdWriteResExPair<WriteIMul32Reg, [PdEX1, PdMul], 4, [1, 2]>; 4280b57cec5SDimitry Andricdefm : PdWriteResExPair<WriteIMul64, [PdEX1, PdMul], 6, [1, 6]>; 4290b57cec5SDimitry Andricdefm : PdWriteResExPair<WriteIMul64Imm, [PdEX1, PdMul], 6, [1, 4],1, 1>; 4300b57cec5SDimitry Andricdefm : PdWriteResExPair<WriteIMul64Reg, [PdEX1, PdMul], 6, [1, 4]>; 431349cc55cSDimitry Andric 432349cc55cSDimitry Andric// BMI2 MULX 433349cc55cSDimitry Andricdefm : X86WriteResUnsupported<WriteIMulH>; 434349cc55cSDimitry Andricdefm : X86WriteResUnsupported<WriteIMulHLd>; 435349cc55cSDimitry Andricdefm : X86WriteResPairUnsupported<WriteMULX32>; 436349cc55cSDimitry Andricdefm : X86WriteResPairUnsupported<WriteMULX64>; 4370b57cec5SDimitry Andric 4380b57cec5SDimitry Andricdefm : PdWriteResExPair<WriteDiv8, [PdEX1, PdDiv], 12, [1, 12]>; 4390b57cec5SDimitry Andricdefm : PdWriteResExPair<WriteDiv16, [PdEX1, PdDiv], 15, [1, 15], 2>; 4400b57cec5SDimitry Andricdefm : PdWriteResExPair<WriteDiv32, [PdEX1, PdDiv], 14, [1, 14], 2>; 4410b57cec5SDimitry Andricdefm : PdWriteResExPair<WriteDiv64, [PdEX1, PdDiv], 14, [1, 14], 2>; 4420b57cec5SDimitry Andric 4430b57cec5SDimitry Andricdefm : PdWriteResExPair<WriteIDiv8, [PdEX1, PdDiv], 12, [1, 12]>; 4440b57cec5SDimitry Andricdefm : PdWriteResExPair<WriteIDiv16, [PdEX1, PdDiv], 15, [1, 17], 2>; 4450b57cec5SDimitry Andricdefm : PdWriteResExPair<WriteIDiv32, [PdEX1, PdDiv], 14, [1, 25], 2>; 4460b57cec5SDimitry Andricdefm : PdWriteResExPair<WriteIDiv64, [PdEX1, PdDiv], 14, [1, 14], 2>; 4470b57cec5SDimitry Andric 4480b57cec5SDimitry Andricdefm : PdWriteResExPair<WriteCRC32, [PdEX01], 2, [4], 3>; 4490b57cec5SDimitry Andric 4500b57cec5SDimitry Andricdef PdWriteCRC32r32r16 : SchedWriteRes<[PdEX01]> { 4510b57cec5SDimitry Andric let Latency = 5; 4525f757f3fSDimitry Andric let ReleaseAtCycles = [10]; 4530b57cec5SDimitry Andric let NumMicroOps = 5; 4540b57cec5SDimitry Andric} 4550b57cec5SDimitry Andricdef : InstRW<[PdWriteCRC32r32r16], (instrs CRC32r32r16)>; 4560b57cec5SDimitry Andric 4570b57cec5SDimitry Andricdef PdWriteCRC32r32r32 : SchedWriteRes<[PdEX01]> { 4580b57cec5SDimitry Andric let Latency = 6; 4595f757f3fSDimitry Andric let ReleaseAtCycles = [12]; 4600b57cec5SDimitry Andric let NumMicroOps = 7; 4610b57cec5SDimitry Andric} 4620b57cec5SDimitry Andricdef : InstRW<[PdWriteCRC32r32r32], (instrs CRC32r32r32)>; 4630b57cec5SDimitry Andric 4640b57cec5SDimitry Andricdef PdWriteCRC32r64r64 : SchedWriteRes<[PdEX01]> { 4650b57cec5SDimitry Andric let Latency = 10; 4665f757f3fSDimitry Andric let ReleaseAtCycles = [17]; 4670b57cec5SDimitry Andric let NumMicroOps = 11; 4680b57cec5SDimitry Andric} 4690b57cec5SDimitry Andricdef : InstRW<[PdWriteCRC32r64r64], (instrs CRC32r64r64)>; 4700b57cec5SDimitry Andric 4710b57cec5SDimitry Andricdefm : PdWriteResExPair<WriteCMOV, [PdEX01]>; // Conditional move. 4720b57cec5SDimitry Andric 4730b57cec5SDimitry Andricdef PdWriteCMOVm : SchedWriteRes<[PdLoad, PdEX01]> { 4740b57cec5SDimitry Andric let Latency = 5; 4755f757f3fSDimitry Andric let ReleaseAtCycles = [3, 3]; 4760b57cec5SDimitry Andric let NumMicroOps = 2; 4770b57cec5SDimitry Andric} 4780b57cec5SDimitry Andric 4790b57cec5SDimitry Andricdef PdWriteCMOVmVar : SchedWriteVariant<[ 4800b57cec5SDimitry Andric SchedVar<MCSchedPredicate<CheckImmOperand_s<7, "X86::COND_BE">>, [PdWriteCMOVm]>, 4810b57cec5SDimitry Andric SchedVar<MCSchedPredicate<CheckImmOperand_s<7, "X86::COND_A">>, [PdWriteCMOVm]>, 4820b57cec5SDimitry Andric SchedVar<MCSchedPredicate<CheckImmOperand_s<7, "X86::COND_L">>, [PdWriteCMOVm]>, 4830b57cec5SDimitry Andric SchedVar<MCSchedPredicate<CheckImmOperand_s<7, "X86::COND_GE">>, [PdWriteCMOVm]>, 4840b57cec5SDimitry Andric SchedVar<MCSchedPredicate<CheckImmOperand_s<7, "X86::COND_LE">>, [PdWriteCMOVm]>, 4850b57cec5SDimitry Andric SchedVar<MCSchedPredicate<CheckImmOperand_s<7, "X86::COND_G">>, [PdWriteCMOVm]>, 4860b57cec5SDimitry Andric SchedVar<NoSchedPred, [WriteCMOV.Folded]> 4870b57cec5SDimitry Andric]>; 4880b57cec5SDimitry Andric 4890b57cec5SDimitry Andricdef : InstRW<[PdWriteCMOVmVar], (instrs CMOV16rm, CMOV32rm, CMOV64rm)>; 4900b57cec5SDimitry Andric 4910b57cec5SDimitry Andricdefm : PdWriteRes<WriteFCMOV, [PdFPU0, PdFPFMA]>; // x87 conditional move. 4920b57cec5SDimitry Andric 4930b57cec5SDimitry Andricdef : WriteRes<WriteSETCC, [PdEX01]>; // Setcc. 4940b57cec5SDimitry Andricdef : WriteRes<WriteSETCCStore, [PdEX01, PdStore]>; 4950b57cec5SDimitry Andric 4960b57cec5SDimitry Andricdef PdWriteSETGEmSETGmSETLEmSETLm : SchedWriteRes<[PdEX01]> { 4975f757f3fSDimitry Andric let ReleaseAtCycles = [2]; 4980b57cec5SDimitry Andric let NumMicroOps = 2; 4990b57cec5SDimitry Andric} 5000b57cec5SDimitry Andric 5010b57cec5SDimitry Andricdef PdSETGEmSETGmSETLEmSETLm : SchedWriteVariant<[ 5020b57cec5SDimitry Andric SchedVar<MCSchedPredicate<CheckImmOperand_s<5, "X86::COND_GE">>, [PdWriteSETGEmSETGmSETLEmSETLm]>, 5030b57cec5SDimitry Andric SchedVar<MCSchedPredicate<CheckImmOperand_s<5, "X86::COND_G">>, [PdWriteSETGEmSETGmSETLEmSETLm]>, 5040b57cec5SDimitry Andric SchedVar<MCSchedPredicate<CheckImmOperand_s<5, "X86::COND_LE">>, [PdWriteSETGEmSETGmSETLEmSETLm]>, 5050b57cec5SDimitry Andric SchedVar<MCSchedPredicate<CheckImmOperand_s<5, "X86::COND_L">>, [PdWriteSETGEmSETGmSETLEmSETLm]>, 5060b57cec5SDimitry Andric SchedVar<NoSchedPred, [WriteSETCCStore]> 5070b57cec5SDimitry Andric]>; 5080b57cec5SDimitry Andricdef : InstRW<[PdSETGEmSETGmSETLEmSETLm], (instrs SETCCm)>; 5090b57cec5SDimitry Andric 5100b57cec5SDimitry Andricdefm : PdWriteRes<WriteLAHFSAHF, [PdEX01], 2, [4], 2>; 5110b57cec5SDimitry Andric 5120b57cec5SDimitry Andricdef PdWriteLAHF : SchedWriteRes<[PdEX01]> { 5130b57cec5SDimitry Andric let Latency = 2; 5145f757f3fSDimitry Andric let ReleaseAtCycles = [4]; 5150b57cec5SDimitry Andric let NumMicroOps = 4; 5160b57cec5SDimitry Andric} 5170b57cec5SDimitry Andricdef : InstRW<[PdWriteLAHF], (instrs LAHF)>; 5180b57cec5SDimitry Andric 5190b57cec5SDimitry Andricdef PdWriteSAHF : SchedWriteRes<[PdEX01]> { 5200b57cec5SDimitry Andric let Latency = 2; 5215f757f3fSDimitry Andric let ReleaseAtCycles = [2]; 5220b57cec5SDimitry Andric let NumMicroOps = 2; 5230b57cec5SDimitry Andric} 5240b57cec5SDimitry Andricdef : InstRW<[PdWriteSAHF], (instrs SAHF)>; 5250b57cec5SDimitry Andric 5260b57cec5SDimitry Andricdefm : PdWriteRes<WriteBitTest, [PdEX01], 1, [2], 1>; 5270b57cec5SDimitry Andricdefm : PdWriteRes<WriteBitTestImmLd, [PdEX01, PdLoad], 5, [2, 3], 1>; 5280b57cec5SDimitry Andricdefm : PdWriteRes<WriteBitTestRegLd, [PdEX01, PdLoad], 5, [7, 2], 7>; 5290b57cec5SDimitry Andricdefm : PdWriteRes<WriteBitTestSet, [PdEX01], 2, [2], 2>; 5300b57cec5SDimitry Andricdefm : PdWriteRes<WriteBitTestSetImmLd, [PdEX01, PdLoad], 6, [1, 1], 4>; 5310b57cec5SDimitry Andricdefm : PdWriteRes<WriteBitTestSetRegLd, [PdEX01, PdLoad], 6, [1, 1], 10>; 5320b57cec5SDimitry Andric 5330b57cec5SDimitry Andricdef PdWriteBTSIm : SchedWriteRes<[PdEX01, PdLoad]> { 5340b57cec5SDimitry Andric let Latency = 7; 5355f757f3fSDimitry Andric let ReleaseAtCycles = [42, 1]; 5360b57cec5SDimitry Andric let NumMicroOps = 4; 5370b57cec5SDimitry Andric} 5380b57cec5SDimitry Andricdef : SchedAlias<WriteBitTestSetImmRMW, PdWriteBTSIm>; 5390b57cec5SDimitry Andricdef PdWriteBTSRm : SchedWriteRes<[PdEX01, PdLoad]> { 5400b57cec5SDimitry Andric let Latency = 7; 5415f757f3fSDimitry Andric let ReleaseAtCycles = [44, 1]; 5420b57cec5SDimitry Andric let NumMicroOps = 10; 5430b57cec5SDimitry Andric} 5440b57cec5SDimitry Andricdef : SchedAlias<WriteBitTestSetRegRMW, PdWriteBTSRm>; 5450b57cec5SDimitry Andric 5460b57cec5SDimitry Andric// This is for simple LEAs with one or two input operands. 5475f757f3fSDimitry Andricdef : WriteRes<WriteLEA, [PdEX01]> { let ReleaseAtCycles = [2]; } 5485ffd83dbSDimitry Andric 5495ffd83dbSDimitry Andric// This write is used for slow LEA instructions. 5505ffd83dbSDimitry Andricdef PdWrite3OpsLEA : SchedWriteRes<[PdEX01]> { 5515ffd83dbSDimitry Andric let Latency = 2; 5525f757f3fSDimitry Andric let ReleaseAtCycles = [2]; 5535ffd83dbSDimitry Andric} 5545ffd83dbSDimitry Andric 5555ffd83dbSDimitry Andric// On Piledriver, a slow LEA is either a 3Ops LEA (base, index, offset), 5565ffd83dbSDimitry Andric// or an LEA with a `Scale` value different than 1. 5575ffd83dbSDimitry Andricdef PdSlowLEAPredicate : MCSchedPredicate< 5585ffd83dbSDimitry Andric CheckAny<[ 5595ffd83dbSDimitry Andric // A 3-operand LEA (base, index, offset). 5605ffd83dbSDimitry Andric IsThreeOperandsLEAFn, 5615ffd83dbSDimitry Andric // An LEA with a "Scale" different than 1. 5625ffd83dbSDimitry Andric CheckAll<[ 5635ffd83dbSDimitry Andric CheckIsImmOperand<2>, 5645ffd83dbSDimitry Andric CheckNot<CheckImmOperand<2, 1>> 5655ffd83dbSDimitry Andric ]> 5665ffd83dbSDimitry Andric ]> 5675ffd83dbSDimitry Andric>; 5685ffd83dbSDimitry Andric 5695ffd83dbSDimitry Andricdef PdWriteLEA : SchedWriteVariant<[ 5705ffd83dbSDimitry Andric SchedVar<PdSlowLEAPredicate, [PdWrite3OpsLEA]>, 5715ffd83dbSDimitry Andric SchedVar<NoSchedPred, [WriteLEA]> 5725ffd83dbSDimitry Andric]>; 5735ffd83dbSDimitry Andric 5745ffd83dbSDimitry Andricdef : InstRW<[PdWriteLEA], (instrs LEA32r, LEA64r, LEA64_32r)>; 5755ffd83dbSDimitry Andric 5765ffd83dbSDimitry Andricdef PdWriteLEA16r : SchedWriteRes<[PdEX01]> { 5775f757f3fSDimitry Andric let ReleaseAtCycles = [3]; 5785ffd83dbSDimitry Andric let NumMicroOps = 2; 5795ffd83dbSDimitry Andric} 5805ffd83dbSDimitry Andricdef : InstRW<[PdWriteLEA16r], (instrs LEA16r)>; 5810b57cec5SDimitry Andric 5820b57cec5SDimitry Andric// Bit counts. 5830b57cec5SDimitry Andricdefm : PdWriteResExPair<WriteBSF, [PdEX01], 3, [6], 6, 2>; 5840b57cec5SDimitry Andricdefm : PdWriteResExPair<WriteBSR, [PdEX01], 4, [8], 7, 2>; 5850b57cec5SDimitry Andricdefm : PdWriteResExPair<WritePOPCNT, [PdEX01], 4, [4]>; 5860b57cec5SDimitry Andricdefm : PdWriteResExPair<WriteLZCNT, [PdEX0], 2, [2], 2>; 5870b57cec5SDimitry Andricdefm : PdWriteResExPair<WriteTZCNT, [PdEX0], 2, [2], 2>; 5880b57cec5SDimitry Andric 5890b57cec5SDimitry Andric// BMI1 BEXTR, BMI2 BZHI 5900b57cec5SDimitry Andricdefm : PdWriteResExPair<WriteBEXTR, [PdEX01], 2, [2], 2>; 5910b57cec5SDimitry Andricdefm : PdWriteResExPair<WriteBLS, [PdEX01], 2, [2], 2>; 5920b57cec5SDimitry Andricdefm : PdWriteResExPair<WriteBZHI, [PdEX01]>; 5930b57cec5SDimitry Andric 5940b57cec5SDimitry Andricdef PdWriteBEXTRI : SchedWriteRes<[PdEX01]> { 5950b57cec5SDimitry Andric let Latency = 2; 5965f757f3fSDimitry Andric let ReleaseAtCycles = [4]; 5970b57cec5SDimitry Andric let NumMicroOps = 2; 5980b57cec5SDimitry Andric} 5990b57cec5SDimitry Andricdef : InstRW<[PdWriteBEXTRI], (instrs BEXTRI32ri, BEXTRI64ri)>; 6000b57cec5SDimitry Andric 6010b57cec5SDimitry Andricdef PdWriteBEXTRIm : SchedWriteRes<[PdEX01]> { 6020b57cec5SDimitry Andric let Latency = 2; 6035f757f3fSDimitry Andric let ReleaseAtCycles = [5]; 6040b57cec5SDimitry Andric let NumMicroOps = 2; 6050b57cec5SDimitry Andric} 6060b57cec5SDimitry Andricdef : InstRW<[PdWriteBEXTRIm], (instrs BEXTRI32mi, BEXTRI64mi)>; 6070b57cec5SDimitry Andric 6080b57cec5SDimitry Andric//////////////////////////////////////////////////////////////////////////////// 6090b57cec5SDimitry Andric// Integer shifts and rotates. 6100b57cec5SDimitry Andric//////////////////////////////////////////////////////////////////////////////// 6110b57cec5SDimitry Andric 6120b57cec5SDimitry Andricdefm : PdWriteResExPair<WriteShift, [PdEX01], 1, [2]>; 6130b57cec5SDimitry Andricdefm : PdWriteResExPair<WriteShiftCL, [PdEX01]>; 6140b57cec5SDimitry Andricdefm : PdWriteResExPair<WriteRotate, [PdEX01], 1, [2]>; 6150b57cec5SDimitry Andricdefm : PdWriteResExPair<WriteRotateCL, [PdEX01]>; 6160b57cec5SDimitry Andric 6170b57cec5SDimitry Andricdef PdWriteRCL8rCL : SchedWriteRes<[PdEX01]> { 6180b57cec5SDimitry Andric let Latency = 12; 6195f757f3fSDimitry Andric let ReleaseAtCycles = [24]; 6200b57cec5SDimitry Andric let NumMicroOps = 26; 6210b57cec5SDimitry Andric} 6220b57cec5SDimitry Andricdef : InstRW<[PdWriteRCL8rCL], (instrs RCL8rCL)>; 6230b57cec5SDimitry Andric 6240b57cec5SDimitry Andricdef PdWriteRCR8ri : SchedWriteRes<[PdEX01]> { 6250b57cec5SDimitry Andric let Latency = 12; 6265f757f3fSDimitry Andric let ReleaseAtCycles = [23]; 6270b57cec5SDimitry Andric let NumMicroOps = 23; 6280b57cec5SDimitry Andric} 6290b57cec5SDimitry Andricdef : InstRW<[PdWriteRCR8ri], (instrs RCR8ri)>; 6300b57cec5SDimitry Andric 6310b57cec5SDimitry Andricdef PdWriteRCR8rCL : SchedWriteRes<[PdEX01]> { 6320b57cec5SDimitry Andric let Latency = 11; 6335f757f3fSDimitry Andric let ReleaseAtCycles = [22]; 6340b57cec5SDimitry Andric let NumMicroOps = 24; 6350b57cec5SDimitry Andric} 6360b57cec5SDimitry Andricdef : InstRW<[PdWriteRCR8rCL], (instrs RCR8rCL)>; 6370b57cec5SDimitry Andric 6380b57cec5SDimitry Andricdef PdWriteRCL16rCL : SchedWriteRes<[PdEX01]> { 6390b57cec5SDimitry Andric let Latency = 10; 6405f757f3fSDimitry Andric let ReleaseAtCycles = [20]; 6410b57cec5SDimitry Andric let NumMicroOps = 22; 6420b57cec5SDimitry Andric} 6430b57cec5SDimitry Andricdef : InstRW<[PdWriteRCL16rCL], (instrs RCL16rCL)>; 6440b57cec5SDimitry Andric 6450b57cec5SDimitry Andricdef PdWriteRCR16ri : SchedWriteRes<[PdEX01]> { 6460b57cec5SDimitry Andric let Latency = 10; 6475f757f3fSDimitry Andric let ReleaseAtCycles = [19]; 6480b57cec5SDimitry Andric let NumMicroOps = 19; 6490b57cec5SDimitry Andric} 6500b57cec5SDimitry Andricdef : InstRW<[PdWriteRCR16ri], (instrs RCR16ri)>; 6510b57cec5SDimitry Andric 6520b57cec5SDimitry Andricdef PdWriteRCL3264rCL : SchedWriteRes<[PdEX01]> { 6530b57cec5SDimitry Andric let Latency = 7; 6545f757f3fSDimitry Andric let ReleaseAtCycles = [14]; 6550b57cec5SDimitry Andric let NumMicroOps = 17; 6560b57cec5SDimitry Andric} 6570b57cec5SDimitry Andricdef : InstRW<[PdWriteRCL3264rCL], (instrs RCL32rCL, RCL64rCL)>; 6580b57cec5SDimitry Andric 6590b57cec5SDimitry Andricdef PdWriteRCR3264rCL : SchedWriteRes<[PdEX01]> { 6600b57cec5SDimitry Andric let Latency = 7; 6615f757f3fSDimitry Andric let ReleaseAtCycles = [13]; 6620b57cec5SDimitry Andric let NumMicroOps = 16; 6630b57cec5SDimitry Andric} 6640b57cec5SDimitry Andricdef : InstRW<[PdWriteRCR3264rCL], (instrs RCR32rCL, RCR64rCL)>; 6650b57cec5SDimitry Andric 6660b57cec5SDimitry Andricdef PdWriteRCR32riRCR64ri : SchedWriteRes<[PdEX01]> { 6670b57cec5SDimitry Andric let Latency = 7; 6685f757f3fSDimitry Andric let ReleaseAtCycles = [14]; 6690b57cec5SDimitry Andric let NumMicroOps = 15; 6700b57cec5SDimitry Andric} 6710b57cec5SDimitry Andricdef : InstRW<[PdWriteRCR32riRCR64ri], (instrs RCR32ri, RCR64ri)>; 6720b57cec5SDimitry Andric 6730b57cec5SDimitry Andric 6740b57cec5SDimitry Andricdef PdWriteRCR16rCL : SchedWriteRes<[PdEX01]> { 6750b57cec5SDimitry Andric let Latency = 9; 6765f757f3fSDimitry Andric let ReleaseAtCycles = [18]; 6770b57cec5SDimitry Andric let NumMicroOps = 20; 6780b57cec5SDimitry Andric} 6790b57cec5SDimitry Andricdef : InstRW<[PdWriteRCR16rCL], (instrs RCR16rCL)>; 6800b57cec5SDimitry Andric 6810b57cec5SDimitry Andricdef PdWriteRCL16ri : SchedWriteRes<[PdEX01]> { 6820b57cec5SDimitry Andric let Latency = 11; 6835f757f3fSDimitry Andric let ReleaseAtCycles = [21]; 6840b57cec5SDimitry Andric let NumMicroOps = 21; 6850b57cec5SDimitry Andric} 6860b57cec5SDimitry Andricdef : InstRW<[PdWriteRCL16ri], (instrs RCL16ri)>; 6870b57cec5SDimitry Andric 6880b57cec5SDimitry Andricdef PdWriteRCL3264ri : SchedWriteRes<[PdEX01]> { 6890b57cec5SDimitry Andric let Latency = 8; 6905f757f3fSDimitry Andric let ReleaseAtCycles = [15]; 6910b57cec5SDimitry Andric let NumMicroOps = 16; 6920b57cec5SDimitry Andric} 6930b57cec5SDimitry Andricdef : InstRW<[PdWriteRCL3264ri], (instrs RCL32ri, RCL64ri)>; 6940b57cec5SDimitry Andric 6950b57cec5SDimitry Andricdef PdWriteRCL8ri : SchedWriteRes<[PdEX01]> { 6960b57cec5SDimitry Andric let Latency = 13; 6975f757f3fSDimitry Andric let ReleaseAtCycles = [25]; 6980b57cec5SDimitry Andric let NumMicroOps = 25; 6990b57cec5SDimitry Andric} 7000b57cec5SDimitry Andricdef : InstRW<[PdWriteRCL8ri], (instrs RCL8ri)>; 7010b57cec5SDimitry Andric 7020b57cec5SDimitry Andric// SHLD/SHRD. 7030b57cec5SDimitry Andricdefm : PdWriteRes<WriteSHDrri, [PdEX01], 3, [6], 6>; 7040b57cec5SDimitry Andricdefm : PdWriteRes<WriteSHDrrcl, [PdEX01], 3, [8], 7>; 7050b57cec5SDimitry Andric 7060b57cec5SDimitry Andricdef PdWriteSHLD16rrCLSHLD32rrCLSHRD32rrCL : SchedWriteRes<[PdEX01]> { 7070b57cec5SDimitry Andric let Latency = 3; 7085f757f3fSDimitry Andric let ReleaseAtCycles = [6]; 7090b57cec5SDimitry Andric let NumMicroOps = 7; 7100b57cec5SDimitry Andric} 7110b57cec5SDimitry Andricdef : InstRW<[PdWriteSHLD16rrCLSHLD32rrCLSHRD32rrCL], (instrs SHLD16rrCL, 7120b57cec5SDimitry Andric SHLD32rrCL, 7130b57cec5SDimitry Andric SHRD32rrCL)>; 7140b57cec5SDimitry Andric 7150b57cec5SDimitry Andricdefm : PdWriteRes<WriteSHDmri, [PdLoad, PdEX01], 4, [1, 22], 8>; 7160b57cec5SDimitry Andricdefm : PdWriteRes<WriteSHDmrcl, [PdLoad, PdEX01], 4, [1, 22], 8>; 7170b57cec5SDimitry Andric 7180b57cec5SDimitry Andric//////////////////////////////////////////////////////////////////////////////// 7190b57cec5SDimitry Andric// Floating point. This covers both scalar and vector operations. 7200b57cec5SDimitry Andric//////////////////////////////////////////////////////////////////////////////// 7210b57cec5SDimitry Andric 7220b57cec5SDimitry Andricdefm : PdWriteRes<WriteFLD0, [PdFPU1, PdFPSTO], 3>; 7230b57cec5SDimitry Andricdefm : PdWriteRes<WriteFLD1, [PdFPU1, PdFPSTO], 3>; 7240b57cec5SDimitry Andricdefm : PdWriteRes<WriteFLDC, [PdFPU1, PdFPSTO], 3>; 7250b57cec5SDimitry Andric 7260b57cec5SDimitry Andricdefm : PdWriteRes<WriteFLoad, [PdLoad, PdFPU01, PdFPFMA], 5, [3, 1, 3]>; 7270b57cec5SDimitry Andricdefm : PdWriteRes<WriteFLoadX, [PdLoad, PdFPU01, PdFPFMA], 5, [3, 1, 3]>; 7280b57cec5SDimitry Andricdefm : PdWriteRes<WriteFLoadY, [PdLoad, PdFPU01, PdFPFMA], 5, [3, 1, 3], 2>; 7290b57cec5SDimitry Andric 7300b57cec5SDimitry Andricdefm : PdWriteRes<WriteFMaskedLoad, [PdLoad, PdFPU01, PdFPFMA], 6, [3, 1, 4]>; 7310b57cec5SDimitry Andricdefm : PdWriteRes<WriteFMaskedLoadY, [PdLoad, PdFPU01, PdFPFMA], 6, [3, 2, 4], 2>; 7320b57cec5SDimitry Andric 7330b57cec5SDimitry Andricdefm : PdWriteRes<WriteFStore, [PdStore, PdFPU23, PdFPSTO], 2, [1, 3, 1]>; 7340b57cec5SDimitry Andricdefm : PdWriteRes<WriteFStoreX, [PdStore, PdFPU23, PdFPSTO], 1, [1, 3, 1]>; 7350b57cec5SDimitry Andricdefm : PdWriteRes<WriteFStoreY, [PdStore, PdFPU23, PdFPSTO], 1, [1, 36, 2], 4>; 7360b57cec5SDimitry Andric 7370b57cec5SDimitry Andricdef PdWriteMOVHPm : SchedWriteRes<[PdStore, PdFPU23, PdFPSTO]> { 7380b57cec5SDimitry Andric let Latency = 2; 7395f757f3fSDimitry Andric let ReleaseAtCycles = [1, 3, 1]; 7400b57cec5SDimitry Andric let NumMicroOps = 2; 7410b57cec5SDimitry Andric} 7420b57cec5SDimitry Andricdef : InstRW<[PdWriteMOVHPm], (instrs MOVHPDmr, MOVHPSmr, VMOVHPDmr, VMOVHPSmr)>; 7430b57cec5SDimitry Andric 7440b57cec5SDimitry Andricdef PdWriteVMOVUPDYmrVMOVUPSYmr : SchedWriteRes<[PdStore, PdFPU1, PdFPSTO]> { 7450b57cec5SDimitry Andric let NumMicroOps = 8; 7460b57cec5SDimitry Andric} 7470b57cec5SDimitry Andricdef : InstRW<[PdWriteVMOVUPDYmrVMOVUPSYmr], (instrs VMOVUPDYmr, VMOVUPSYmr)>; 7480b57cec5SDimitry Andric 7490b57cec5SDimitry Andricdefm : PdWriteRes<WriteFStoreNT, [PdStore, PdFPU1, PdFPSTO], 3>; 7500b57cec5SDimitry Andricdefm : PdWriteRes<WriteFStoreNTX, [PdStore, PdFPU1, PdFPSTO], 3>; 7510b57cec5SDimitry Andricdefm : PdWriteRes<WriteFStoreNTY, [PdStore, PdFPU1, PdFPSTO], 3, [2, 2, 2], 4>; 7520b57cec5SDimitry Andric 7538bcb0991SDimitry Andricdefm : PdWriteRes<WriteFMaskedStore32, [PdStore, PdFPU01, PdFPFMA], 6, [1, 1, 188], 18>; 7548bcb0991SDimitry Andricdefm : PdWriteRes<WriteFMaskedStore64, [PdStore, PdFPU01, PdFPFMA], 6, [1, 1, 188], 18>; 7558bcb0991SDimitry Andricdefm : PdWriteRes<WriteFMaskedStore32Y, [PdStore, PdFPU01, PdFPFMA], 6, [2, 2, 376], 34>; 7568bcb0991SDimitry Andricdefm : PdWriteRes<WriteFMaskedStore64Y, [PdStore, PdFPU01, PdFPFMA], 6, [2, 2, 376], 34>; 7570b57cec5SDimitry Andric 7580b57cec5SDimitry Andricdefm : PdWriteRes<WriteFMove, [PdFPU01, PdFPFMA]>; 7590b57cec5SDimitry Andricdefm : PdWriteRes<WriteFMoveX, [PdFPU01, PdFPFMA], 1, [1, 2]>; 7600b57cec5SDimitry Andricdefm : PdWriteRes<WriteFMoveY, [PdFPU01, PdFPFMA], 2, [2, 2], 2>; 76104eeddc0SDimitry Andricdefm : X86WriteResUnsupported<WriteFMoveZ>; 7620b57cec5SDimitry Andric 7630b57cec5SDimitry Andricdefm : PdWriteRes<WriteEMMS, [PdFPU01, PdFPFMA], 2>; 7640b57cec5SDimitry Andric 7650b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WriteFAdd, [PdFPU0, PdFPFMA], 5>; 7660b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WriteFAddX, [PdFPU0, PdFPFMA], 5>; 7670b57cec5SDimitry Andricdefm : PdWriteResYMMPair<WriteFAddY, [PdFPU0, PdFPFMA], 5, [1, 2]>; 7680b57cec5SDimitry Andricdefm : X86WriteResPairUnsupported<WriteFAddZ>; 7690b57cec5SDimitry Andric 7700b57cec5SDimitry Andricdef PdWriteX87Add: SchedWriteRes<[PdLoad, PdFPU0, PdFPFMA]> { 7710b57cec5SDimitry Andric let Latency = 5; 7725f757f3fSDimitry Andric let ReleaseAtCycles = [3, 1, 10]; 7730b57cec5SDimitry Andric} 7740b57cec5SDimitry Andricdef : InstRW<[PdWriteX87Add], (instrs ADD_FI16m, ADD_FI32m, ADD_F32m, ADD_F64m, 7750b57cec5SDimitry Andric SUB_FI16m, SUB_FI32m, SUB_F32m, SUB_F64m, 7760b57cec5SDimitry Andric SUBR_FI16m, SUBR_FI32m, SUBR_F32m, SUBR_F64m)>; 7770b57cec5SDimitry Andric 7780b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WriteFAdd64, [PdFPU0, PdFPFMA], 5>; 7790b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WriteFAdd64X, [PdFPU0, PdFPFMA], 5>; 7800b57cec5SDimitry Andricdefm : PdWriteResYMMPair<WriteFAdd64Y, [PdFPU0, PdFPFMA], 5, [1, 2]>; 7810b57cec5SDimitry Andricdefm : X86WriteResPairUnsupported<WriteFAdd64Z>; 7820b57cec5SDimitry Andric 7830b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WriteFCmp, [PdFPU0, PdFPFMA], 2>; 7840b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WriteFCmpX, [PdFPU0, PdFPFMA], 2>; 7850b57cec5SDimitry Andricdefm : PdWriteResYMMPair<WriteFCmpY, [PdFPU0, PdFPFMA], 2, [1, 2]>; 7860b57cec5SDimitry Andricdefm : X86WriteResPairUnsupported<WriteFCmpZ>; 7870b57cec5SDimitry Andric 7880b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WriteFCmp64, [PdFPU0, PdFPFMA], 2>; 7890b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WriteFCmp64X, [PdFPU0, PdFPFMA], 2>; 7900b57cec5SDimitry Andricdefm : PdWriteResYMMPair<WriteFCmp64Y, [PdFPU0, PdFPFMA], 2, [1, 2]>; 7910b57cec5SDimitry Andricdefm : X86WriteResPairUnsupported<WriteFCmp64Z>; 7920b57cec5SDimitry Andric 7930b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WriteFCom, [PdFPU0, PdFPFMA, PdEX0], 1, [], 2>; 7945ffd83dbSDimitry Andricdefm : PdWriteResXMMPair<WriteFComX, [PdFPU0, PdFPFMA, PdEX0], 1, [], 2>; 7950b57cec5SDimitry Andric 7960b57cec5SDimitry Andricdef PdWriteFCOMPm : SchedWriteRes<[PdFPU1, PdFPFMA]> { 7970b57cec5SDimitry Andric let Latency = 6; 7980b57cec5SDimitry Andric} 7990b57cec5SDimitry Andricdef : InstRW<[PdWriteFCOMPm], (instrs FCOM32m, FCOM64m, FCOMP32m, FCOMP64m)>; 8000b57cec5SDimitry Andric 8010b57cec5SDimitry Andricdef PdWriteTST_F_UCOM_FPPr : SchedWriteRes<[PdFPU1, PdFPFMA]>; 8020b57cec5SDimitry Andricdef : InstRW<[PdWriteTST_F_UCOM_FPPr], (instrs TST_F, UCOM_FPPr)>; 8030b57cec5SDimitry Andric 8040b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WriteFMul, [PdFPU1, PdFPFMA], 5>; 8050b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WriteFMulX, [PdFPU1, PdFPFMA], 5>; 8060b57cec5SDimitry Andricdefm : PdWriteResYMMPair<WriteFMulY, [PdFPU1, PdFPFMA], 5, [1, 2]>; 8070b57cec5SDimitry Andricdefm : X86WriteResPairUnsupported<WriteFMulZ>; 8080b57cec5SDimitry Andric 8090b57cec5SDimitry Andricdef PdWriteX87Mul: SchedWriteRes<[PdLoad, PdFPU1, PdFPFMA]> { 8100b57cec5SDimitry Andric let Latency = 5; 8115f757f3fSDimitry Andric let ReleaseAtCycles = [3, 1, 10]; 8120b57cec5SDimitry Andric} 8130b57cec5SDimitry Andricdef : InstRW<[PdWriteX87Mul], (instrs MUL_FI16m, MUL_FI32m, MUL_F32m, MUL_F64m)>; 8140b57cec5SDimitry Andric 8150b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WriteFMul64, [PdFPU1, PdFPFMA], 5>; 8160b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WriteFMul64X, [PdFPU1, PdFPFMA], 5>; 8170b57cec5SDimitry Andricdefm : PdWriteResYMMPair<WriteFMul64Y, [PdFPU1, PdFPFMA], 5, [1, 2]>; 8180b57cec5SDimitry Andricdefm : X86WriteResPairUnsupported<WriteFMul64Z>; 8190b57cec5SDimitry Andric 8200b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WriteFMA, [PdFPU, PdFPFMA], 5, [1, 3]>; 8210b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WriteFMAX, [PdFPU, PdFPFMA], 5, [1, 3]>; 8220b57cec5SDimitry Andricdefm : PdWriteResYMMPair<WriteFMAY, [PdFPU, PdFPFMA], 5, [1, 3]>; 8230b57cec5SDimitry Andricdefm : X86WriteResPairUnsupported<WriteFMAZ>; 8240b57cec5SDimitry Andric 8250b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WriteDPPD, [PdFPU1, PdFPFMA], 15, [1, 10], 15, 2>; 8260b57cec5SDimitry Andric 8270b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WriteDPPS, [PdFPU1, PdFPFMA], 25, [1, 14], 16, 2>; 8280b57cec5SDimitry Andricdefm : PdWriteResYMMPair<WriteDPPSY, [PdFPU1, PdFPFMA], 27, [2, 25], /*or 29*/ 25, 4>; 8290b57cec5SDimitry Andric 8300b57cec5SDimitry Andricdef PdWriteVDPPSrri : SchedWriteRes<[PdFPU1, PdFPFMA]> { 8310b57cec5SDimitry Andric let Latency = 27; 8325f757f3fSDimitry Andric let ReleaseAtCycles = [1, 14]; 8330b57cec5SDimitry Andric let NumMicroOps = 17; 8340b57cec5SDimitry Andric} 8350b57cec5SDimitry Andricdef : InstRW<[PdWriteVDPPSrri], (instrs VDPPSrri)>; 8360b57cec5SDimitry Andric 8370b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WriteFRcp, [PdFPU1, PdFPFMA], 5>; 8380b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WriteFRcpX, [PdFPU1, PdFPFMA], 5>; 8390b57cec5SDimitry Andricdefm : PdWriteResYMMPair<WriteFRcpY, [PdFPU1, PdFPFMA], 5, [2, 1]>; 8400b57cec5SDimitry Andricdefm : X86WriteResPairUnsupported<WriteFRcpZ>; 8410b57cec5SDimitry Andric 8420b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WriteFRsqrt, [PdFPU1, PdFPFMA], 5, [1, 2]>; 8430b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WriteFRsqrtX, [PdFPU1, PdFPFMA], 5>; 8440b57cec5SDimitry Andricdefm : PdWriteResYMMPair<WriteFRsqrtY, [PdFPU1, PdFPFMA], 5, [2, 2]>; 8450b57cec5SDimitry Andricdefm : X86WriteResPairUnsupported<WriteFRsqrtZ>; 8460b57cec5SDimitry Andric 8470b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WriteFDiv, [PdFPU1, PdFPFMA], 9, [1, 9]>; 8480b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WriteFDivX, [PdFPU1, PdFPFMA], 9, [1, 9]>; 8490b57cec5SDimitry Andricdefm : PdWriteResYMMPair<WriteFDivY, [PdFPU1, PdFPFMA], 9, [2, 18]>; 8500b57cec5SDimitry Andricdefm : X86WriteResPairUnsupported<WriteFDivZ>; 8510b57cec5SDimitry Andric 8520b57cec5SDimitry Andricdef PdWriteX87Div: SchedWriteRes<[PdLoad, PdFPU0, PdFPFMA]> { 8530b57cec5SDimitry Andric let Latency = 9; 8545f757f3fSDimitry Andric let ReleaseAtCycles = [3, 1, 18]; 8550b57cec5SDimitry Andric} 8560b57cec5SDimitry Andricdef : InstRW<[PdWriteX87Div], (instrs DIV_FI16m, DIV_FI32m, 8570b57cec5SDimitry Andric DIVR_FI16m, DIVR_FI32m, 8580b57cec5SDimitry Andric DIV_F32m, DIV_F64m, 8590b57cec5SDimitry Andric DIVR_F32m, DIVR_F64m)>; 8600b57cec5SDimitry Andric 8610b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WriteFDiv64, [PdFPU1, PdFPFMA], 9, [1, 9]>; 8620b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WriteFDiv64X, [PdFPU1, PdFPFMA], 9, [1, 9]>; 8630b57cec5SDimitry Andricdefm : PdWriteResYMMPair<WriteFDiv64Y, [PdFPU1, PdFPFMA], 9, [2, 18]>; 8640b57cec5SDimitry Andricdefm : X86WriteResPairUnsupported<WriteFDiv64Z>; 8650b57cec5SDimitry Andric 8660b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WriteFSqrt, [PdFPU1, PdFPFMA], 9, [1, 9]>; 8670b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WriteFSqrtX, [PdFPU1, PdFPFMA], 9, [1, 9]>; 8680b57cec5SDimitry Andricdefm : PdWriteResYMMPair<WriteFSqrtY, [PdFPU1, PdFPFMA], 9, [2, 18]>; 8690b57cec5SDimitry Andricdefm : X86WriteResPairUnsupported<WriteFSqrtZ>; 8700b57cec5SDimitry Andric 8710b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WriteFSqrt64, [PdFPU1, PdFPFMA], 9, [1, 9]>; 8720b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WriteFSqrt64X, [PdFPU1, PdFPFMA], 9, [1, 9]>; 8730b57cec5SDimitry Andricdefm : PdWriteResYMMPair<WriteFSqrt64Y, [PdFPU1, PdFPFMA], 9, [2, 18]>; 8740b57cec5SDimitry Andricdefm : X86WriteResPairUnsupported<WriteFSqrt64Z>; 8750b57cec5SDimitry Andric 8760b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WriteFSqrt80, [PdFPU1, PdFPFMA], 1, [1, 18]>; 8770b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WriteFSign, [PdFPU1, PdFPFMA], 1, [1, 4]>; 8780b57cec5SDimitry Andric 8790b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WriteFRnd, [PdFPU1, PdFPSTO], 4, []>; 8800b57cec5SDimitry Andricdefm : PdWriteResYMMPair<WriteFRndY, [PdFPU1, PdFPSTO], 4, [2, 1], 2>; 8810b57cec5SDimitry Andricdefm : X86WriteResPairUnsupported<WriteFRndZ>; 8820b57cec5SDimitry Andric 8830b57cec5SDimitry Andricdef PdWriteVFRCZP : SchedWriteRes<[PdFPU1, PdFPSTO]> { 8840b57cec5SDimitry Andric let Latency = 10; 8855f757f3fSDimitry Andric let ReleaseAtCycles = [2, 1]; 8860b57cec5SDimitry Andric let NumMicroOps = 2; 8870b57cec5SDimitry Andric} 8880b57cec5SDimitry Andricdef : InstRW<[PdWriteVFRCZP], (instrs VFRCZPDrr, VFRCZPSrr)>; 8890b57cec5SDimitry Andric 8900b57cec5SDimitry Andricdef PdWriteVFRCZS : SchedWriteRes<[PdFPU1, PdFPSTO]> { 8910b57cec5SDimitry Andric let Latency = 10; 8925f757f3fSDimitry Andric let ReleaseAtCycles = [10, 1]; 8930b57cec5SDimitry Andric let NumMicroOps = 2; 8940b57cec5SDimitry Andric} 8950b57cec5SDimitry Andricdef : InstRW<[PdWriteVFRCZS], (instrs VFRCZSDrr, VFRCZSSrr)>; 8960b57cec5SDimitry Andric 8970b57cec5SDimitry Andricdef PdWriteVFRCZm : SchedWriteRes<[PdFPU1, PdFPSTO]> { 8980b57cec5SDimitry Andric let Latency = 15; 8995f757f3fSDimitry Andric let ReleaseAtCycles = [2, 1]; 9000b57cec5SDimitry Andric let NumMicroOps = 3; 9010b57cec5SDimitry Andric} 9020b57cec5SDimitry Andricdef : InstRW<[PdWriteVFRCZm], (instrs VFRCZPDrm, VFRCZPSrm, 9030b57cec5SDimitry Andric VFRCZSDrm, VFRCZSSrm)>; 9040b57cec5SDimitry Andric 9050b57cec5SDimitry Andricdef PdWriteVFRCZY : SchedWriteRes<[PdFPU1, PdFPSTO]> { 9060b57cec5SDimitry Andric let Latency = 10; 9075f757f3fSDimitry Andric let ReleaseAtCycles = [3, 1]; 9080b57cec5SDimitry Andric let NumMicroOps = 4; 9090b57cec5SDimitry Andric} 9100b57cec5SDimitry Andricdef : InstRW<[PdWriteVFRCZY], (instrs VFRCZPSYrr, VFRCZPDYrr)>; 9110b57cec5SDimitry Andric 9120b57cec5SDimitry Andricdef PdWriteVFRCZYm : SchedWriteRes<[PdFPU1, PdFPSTO]> { 9130b57cec5SDimitry Andric let Latency = 15; 9145f757f3fSDimitry Andric let ReleaseAtCycles = [4, 1]; 9150b57cec5SDimitry Andric let NumMicroOps = 8; 9160b57cec5SDimitry Andric} 9170b57cec5SDimitry Andricdef : InstRW<[PdWriteVFRCZYm], (instrs VFRCZPSYrm, VFRCZPDYrm)>; 9180b57cec5SDimitry Andric 919bdd1243dSDimitry Andricdefm : PdWriteResXMMPair<WriteFLogic, [PdFPU23, PdFPMAL], 2>; 920bdd1243dSDimitry Andricdefm : PdWriteResYMMPair<WriteFLogicY, [PdFPU23, PdFPMAL], 2, [2, 2]>; 9210b57cec5SDimitry Andricdefm : X86WriteResPairUnsupported<WriteFLogicZ>; 9220b57cec5SDimitry Andric 9230b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WriteFTest, [PdFPU0, PdFPFMA, PdEX0], 1, [], 2>; 9240b57cec5SDimitry Andricdefm : PdWriteResYMMPair<WriteFTestY, [PdFPU01, PdFPFMA, PdEX0], 1, [4, 4, 1], 4, 2>; 9250b57cec5SDimitry Andricdefm : X86WriteResPairUnsupported<WriteFTestZ>; 9260b57cec5SDimitry Andric 9270b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WriteFShuffle, [PdFPU01, PdFPFMA], 2, [1, 2]>; 9280b57cec5SDimitry Andricdefm : PdWriteResYMMPair<WriteFShuffleY, [PdFPU01, PdFPFMA], 2, [2, 4], 2>; 9290b57cec5SDimitry Andricdefm : X86WriteResPairUnsupported<WriteFShuffleZ>; 9300b57cec5SDimitry Andric 9310b57cec5SDimitry Andricdef PdWriteVBROADCASTF128 : SchedWriteRes<[PdFPU01, PdFPFMA]> { 9320b57cec5SDimitry Andric let Latency = 7; 9335f757f3fSDimitry Andric let ReleaseAtCycles = [1, 3]; 9340b57cec5SDimitry Andric let NumMicroOps = 2; 9350b57cec5SDimitry Andric} 9365f757f3fSDimitry Andricdef : InstRW<[PdWriteVBROADCASTF128], (instrs VBROADCASTF128rm)>; 9370b57cec5SDimitry Andric 938bdd1243dSDimitry Andricdefm : PdWriteResXMMPair<WriteFVarShuffle, [PdFPU1, PdFPXBR], 3>; 939bdd1243dSDimitry Andricdefm : PdWriteResYMMPair<WriteFVarShuffleY, [PdFPU1, PdFPXBR], 3, [2, 2], 2>; 9400b57cec5SDimitry Andricdefm : X86WriteResPairUnsupported<WriteFVarShuffleZ>; 9410b57cec5SDimitry Andric 942bdd1243dSDimitry Andricdefm : PdWriteResXMMPair<WriteFBlend, [PdFPU23, PdFPMAL], 2>; 943bdd1243dSDimitry Andricdefm : PdWriteResYMMPair<WriteFBlendY, [PdFPU23, PdFPMAL], 2, [2, 2], 2>; 9440b57cec5SDimitry Andricdefm : X86WriteResPairUnsupported<WriteFBlendZ>; 9450b57cec5SDimitry Andric 946bdd1243dSDimitry Andricdefm : PdWriteResXMMPair<WriteFVarBlend, [PdFPU1, PdFPXBR], 2>; 947bdd1243dSDimitry Andricdefm : PdWriteResYMMPair<WriteFVarBlendY, [PdFPU1, PdFPXBR], 2, [2, 2], 2>; 9480b57cec5SDimitry Andricdefm : X86WriteResPairUnsupported<WriteFVarBlendZ>; 9490b57cec5SDimitry Andric 9500b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WriteFShuffle256, [PdFPU01, PdFPFMA], 2, [1, 3], 2>; 9510b57cec5SDimitry Andricdefm : X86WriteResPairUnsupported<WriteFVarShuffle256>; 9520b57cec5SDimitry Andric 9530b57cec5SDimitry Andricdef PdWriteVEXTRACTF128rr : SchedWriteRes<[PdFPU01, PdFPFMA]> { 9540b57cec5SDimitry Andric let Latency = 2; 9555f757f3fSDimitry Andric let ReleaseAtCycles = [1, 2]; 9560b57cec5SDimitry Andric} 9570b57cec5SDimitry Andricdef : InstRW<[PdWriteVEXTRACTF128rr], (instrs VEXTRACTF128rr)>; 9580b57cec5SDimitry Andric 9590b57cec5SDimitry Andricdef PdWriteVEXTRACTF128mr : SchedWriteRes<[PdFPU01, PdFPFMA]> { 9600b57cec5SDimitry Andric let Latency = 7; 9615f757f3fSDimitry Andric let ReleaseAtCycles = [1, 4]; 9620b57cec5SDimitry Andric let NumMicroOps = 2; 9630b57cec5SDimitry Andric} 9640b57cec5SDimitry Andricdef : InstRW<[PdWriteVEXTRACTF128mr], (instrs VEXTRACTF128mr)>; 9650b57cec5SDimitry Andric 9660b57cec5SDimitry Andricdef PdWriteVPERM2F128rr : SchedWriteRes<[PdFPU01, PdFPFMA]> { 9670b57cec5SDimitry Andric let Latency = 4; 9685f757f3fSDimitry Andric let ReleaseAtCycles = [1, 6]; 9690b57cec5SDimitry Andric let NumMicroOps = 8; 9700b57cec5SDimitry Andric} 9710b57cec5SDimitry Andricdef : InstRW<[PdWriteVPERM2F128rr], (instrs VPERM2F128rr)>; 9720b57cec5SDimitry Andric 9730b57cec5SDimitry Andricdef PdWriteVPERM2F128rm : SchedWriteRes<[PdFPU01, PdFPFMA]> { 9740b57cec5SDimitry Andric let Latency = 8; // 4 + 4 9755f757f3fSDimitry Andric let ReleaseAtCycles = [1, 8]; 9760b57cec5SDimitry Andric let NumMicroOps = 10; 9770b57cec5SDimitry Andric} 9780b57cec5SDimitry Andricdef : InstRW<[PdWriteVPERM2F128rm], (instrs VPERM2F128rm)>; 9790b57cec5SDimitry Andric 9800b57cec5SDimitry Andric//////////////////////////////////////////////////////////////////////////////// 9810b57cec5SDimitry Andric// Conversions. 9820b57cec5SDimitry Andric//////////////////////////////////////////////////////////////////////////////// 9830b57cec5SDimitry Andric 9840b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WriteCvtSS2I, [PdFPU0, PdFPCVT, PdFPSTO, PdFPFMA, PdEX0], 13, [], 2>; 9850b57cec5SDimitry Andric 9860b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WriteCvtPS2I, [PdFPU0, PdFPCVT, PdFPSTO], 4>; 9870b57cec5SDimitry Andricdefm : PdWriteResYMMPair<WriteCvtPS2IY, [PdFPU0, PdFPCVT, PdFPSTO], 4, [1, 2, 1]>; 9880b57cec5SDimitry Andricdefm : X86WriteResPairUnsupported<WriteCvtPS2IZ>; 9890b57cec5SDimitry Andric 9900b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WriteCvtSD2I, [PdFPU0, PdFPCVT, PdFPSTO, PdFPFMA, PdEX0], 13, [], 2>; 9910b57cec5SDimitry Andric 9920b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WriteCvtPD2I, [PdFPU0, PdFPCVT, PdFPSTO], 8, [], 2>; 9930b57cec5SDimitry Andricdefm : PdWriteResYMMPair<WriteCvtPD2IY, [PdFPU0, PdFPCVT, PdFPSTO, PdFPFMA], 8, [1, 2, 1, 1], 4>; 9940b57cec5SDimitry Andricdefm : X86WriteResPairUnsupported<WriteCvtPD2IZ>; 9950b57cec5SDimitry Andric 9960eae32dcSDimitry Andricdef PdWriteMMX_CVTTPD2PIrr : SchedWriteRes<[PdFPU0, PdFPCVT, PdFPSTO]> { 9970b57cec5SDimitry Andric let Latency = 6; 9980b57cec5SDimitry Andric let NumMicroOps = 2; 9990b57cec5SDimitry Andric} 10000eae32dcSDimitry Andricdef : InstRW<[PdWriteMMX_CVTTPD2PIrr], (instrs MMX_CVTTPD2PIrr)>; 10010b57cec5SDimitry Andric 10020b57cec5SDimitry Andric// FIXME: f+3 ST, LD+STC latency 10030b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WriteCvtI2SS, [PdFPU0, PdFPCVT, PdFPSTO], 4, [], 2>; 10040b57cec5SDimitry Andric// FIXME: .Folded version is one NumMicroOp *less*.. 10050b57cec5SDimitry Andric 10060b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WriteCvtI2PS, [PdFPU0, PdFPCVT, PdFPSTO], 4>; 10070b57cec5SDimitry Andricdefm : PdWriteResYMMPair<WriteCvtI2PSY, [PdFPU0, PdFPCVT, PdFPSTO], 4, [1, 2, 1]>; 10080b57cec5SDimitry Andricdefm : X86WriteResPairUnsupported<WriteCvtI2PSZ>; 10090b57cec5SDimitry Andric 10100b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WriteCvtI2SD, [PdFPU0, PdFPCVT, PdFPSTO], 4, [], 2>; 10110b57cec5SDimitry Andric// FIXME: .Folded version is one NumMicroOp *less*.. 10120b57cec5SDimitry Andric 10130b57cec5SDimitry Andricdef PdWriteCVTSI642SDrr_CVTSI642SSrr_CVTSI2SDr_CVTSI2SSrr : SchedWriteRes<[PdFPU0, PdFPCVT, PdFPSTO]> { 10140b57cec5SDimitry Andric let Latency = 13; 10155f757f3fSDimitry Andric let ReleaseAtCycles = [1, 3, 1]; 10160b57cec5SDimitry Andric let NumMicroOps = 2; 10170b57cec5SDimitry Andric} 10180b57cec5SDimitry Andricdef : InstRW<[PdWriteCVTSI642SDrr_CVTSI642SSrr_CVTSI2SDr_CVTSI2SSrr], (instrs CVTSI642SDrr, CVTSI642SSrr, CVTSI2SDrr, CVTSI2SSrr)>; 10190b57cec5SDimitry Andric 10200b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WriteCvtI2PD, [PdFPU0, PdFPCVT, PdFPSTO], 8, [], 2>; 10210b57cec5SDimitry Andricdefm : PdWriteResYMMPair<WriteCvtI2PDY, [PdFPU0, PdFPCVT, PdFPSTO], 8, [1, 2, 1], 4, 1>; 10220b57cec5SDimitry Andricdefm : X86WriteResPairUnsupported<WriteCvtI2PDZ>; 10230b57cec5SDimitry Andric 10240b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WriteCvtSS2SD, [PdFPU0, PdFPCVT, PdFPSTO], 4, [1, 2, 1]>; 10250b57cec5SDimitry Andric 10260b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WriteCvtPS2PD, [PdFPU0, PdFPCVT, PdFPSTO], 8, [], 2>; 10270b57cec5SDimitry Andricdefm : PdWriteResYMMPair<WriteCvtPS2PDY, [PdFPU0, PdFPCVT, PdFPSTO], 8, [1, 2, 1], 4, 1>; 10280b57cec5SDimitry Andricdefm : X86WriteResPairUnsupported<WriteCvtPS2PDZ>; 10290b57cec5SDimitry Andric 10300b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WriteCvtSD2SS, [PdFPU0, PdFPCVT, PdFPSTO], 4, [1, 2, 1]>; 10310b57cec5SDimitry Andric 10320b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WriteCvtPD2PS, [PdFPU0, PdFPCVT, PdFPSTO], 8, [], 2>; 10330b57cec5SDimitry Andricdefm : PdWriteResYMMPair<WriteCvtPD2PSY, [PdFPU0, PdFPCVT, PdFPSTO, PdFPFMA], 8, [1, 2, 1, 1], 4>; 10340b57cec5SDimitry Andricdefm : X86WriteResPairUnsupported<WriteCvtPD2PSZ>; 10350b57cec5SDimitry Andric 10360eae32dcSDimitry Andricdef PdWriteMMX_CVTPD2PIrrMMX_CVTPI2PDrr : SchedWriteRes<[PdFPU0, PdFPCVT, PdFPSTO]> { 10370b57cec5SDimitry Andric let Latency = 6; 10380b57cec5SDimitry Andric let NumMicroOps = 2; 10390b57cec5SDimitry Andric} 10400eae32dcSDimitry Andricdef : InstRW<[PdWriteMMX_CVTPD2PIrrMMX_CVTPI2PDrr], (instrs MMX_CVTPD2PIrr, 10410eae32dcSDimitry Andric MMX_CVTPI2PDrr)>; 10420b57cec5SDimitry Andric 10430eae32dcSDimitry Andricdef PdWriteMMX_CVTPI2PSrr : SchedWriteRes<[PdFPU0, PdFPCVT, PdFPSTO]> { 10440b57cec5SDimitry Andric let Latency = 4; 10450b57cec5SDimitry Andric let NumMicroOps = 2; 10460b57cec5SDimitry Andric} 10470eae32dcSDimitry Andricdef : InstRW<[PdWriteMMX_CVTPI2PSrr], (instrs MMX_CVTPI2PSrr)>; 10480b57cec5SDimitry Andric 10490b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WriteCvtPH2PS, [PdFPU0, PdFPCVT, PdFPSTO], 8, [1, 2, 1], 2, 1>; 10500b57cec5SDimitry Andricdefm : PdWriteResYMMPair<WriteCvtPH2PSY, [PdFPU0, PdFPCVT, PdFPSTO], 8, [1, 2, 1], 4, 3>; 10510b57cec5SDimitry Andricdefm : X86WriteResPairUnsupported<WriteCvtPH2PSZ>; 10520b57cec5SDimitry Andric 10530b57cec5SDimitry Andricdefm : PdWriteRes<WriteCvtPS2PH, [PdFPU0, PdFPCVT, PdFPSTO], 8, [1, 2, 1], 2>; 10540b57cec5SDimitry Andricdefm : PdWriteRes<WriteCvtPS2PHY, [PdFPU0, PdFPCVT, PdFPSTO, PdFPFMA], 8, [1, 2, 1, 1], 4>; 10550b57cec5SDimitry Andricdefm : X86WriteResUnsupported<WriteCvtPS2PHZ>; 10560b57cec5SDimitry Andric 10570b57cec5SDimitry Andricdefm : PdWriteRes<WriteCvtPS2PHSt, [PdFPU0, PdFPCVT, PdFPSTO, PdStore], 4, [1, 2, 1, 1], 3>; 10580b57cec5SDimitry Andricdefm : PdWriteRes<WriteCvtPS2PHYSt, [PdFPU0, PdFPCVT, PdFPSTO, PdFPFMA, PdStore], 4, [1, 2, 1, 1, 1], 4>; 10590b57cec5SDimitry Andricdefm : X86WriteResUnsupported<WriteCvtPS2PHZSt>; 10600b57cec5SDimitry Andric 10610b57cec5SDimitry Andric//////////////////////////////////////////////////////////////////////////////// 10620b57cec5SDimitry Andric// Vector integer operations. 10630b57cec5SDimitry Andric//////////////////////////////////////////////////////////////////////////////// 10640b57cec5SDimitry Andric 10650b57cec5SDimitry Andricdefm : PdWriteRes<WriteVecLoad, [PdLoad, PdFPU01, PdFPMAL], 5, [3, 1, 3]>; 10660b57cec5SDimitry Andricdefm : PdWriteRes<WriteVecLoadX, [PdLoad, PdFPU01, PdFPMAL], 5, [3, 1, 3]>; 10670b57cec5SDimitry Andricdefm : PdWriteRes<WriteVecLoadY, [PdLoad, PdFPU01, PdFPMAL], 5, [3, 2, 3], 2>; 10680b57cec5SDimitry Andric 10690b57cec5SDimitry Andricdefm : PdWriteRes<WriteVecLoadNT, [PdLoad, PdFPU01, PdFPMAL], 5, [3, 1, 4]>; 10700b57cec5SDimitry Andricdefm : PdWriteRes<WriteVecLoadNTY, [PdLoad, PdFPU01, PdFPMAL], 5, [3, 2, 4]>; 10710b57cec5SDimitry Andric 10720b57cec5SDimitry Andricdefm : PdWriteRes<WriteVecMaskedLoad, [PdLoad, PdFPU01, PdFPMAL], 6, [3, 1, 2]>; 10730b57cec5SDimitry Andricdefm : PdWriteRes<WriteVecMaskedLoadY, [PdLoad, PdFPU01, PdFPMAL], 6, [3, 2, 4], 2>; 10740b57cec5SDimitry Andric 10750b57cec5SDimitry Andricdefm : PdWriteRes<WriteVecStore, [PdStore, PdFPU23, PdFPSTO], 2, [1, 3, 1]>; 10760b57cec5SDimitry Andricdefm : PdWriteRes<WriteVecStoreX, [PdStore, PdFPU23, PdFPSTO], 1, [1, 3, 1]>; 10770b57cec5SDimitry Andricdefm : PdWriteRes<WriteVecStoreY, [PdStore, PdFPU23, PdFPSTO], 1, [2, 36, 2], 4>; 10780b57cec5SDimitry Andric 10790b57cec5SDimitry Andricdef PdWriteVMOVDQUYmr : SchedWriteRes<[PdStore, PdFPU1, PdFPSTO]> { 10800b57cec5SDimitry Andric let NumMicroOps = 8; 10810b57cec5SDimitry Andric} 10820b57cec5SDimitry Andricdef : InstRW<[PdWriteVMOVDQUYmr], (instrs VMOVDQUYmr)>; 10830b57cec5SDimitry Andric 10840b57cec5SDimitry Andricdefm : PdWriteRes<WriteVecStoreNT, [PdStore, PdFPU1, PdFPSTO], 2>; 10850b57cec5SDimitry Andricdefm : PdWriteRes<WriteVecStoreNTY, [PdStore, PdFPU1, PdFPSTO], 2, [2, 2, 2], 4>; 10860b57cec5SDimitry Andric 10875ffd83dbSDimitry Andricdefm : X86WriteResUnsupported<WriteVecMaskedStore32>; 10885ffd83dbSDimitry Andricdefm : X86WriteResUnsupported<WriteVecMaskedStore32Y>; 10895ffd83dbSDimitry Andricdefm : X86WriteResUnsupported<WriteVecMaskedStore64>; 10905ffd83dbSDimitry Andricdefm : X86WriteResUnsupported<WriteVecMaskedStore64Y>; 10910b57cec5SDimitry Andric 10920b57cec5SDimitry Andricdefm : PdWriteRes<WriteVecMove, [PdFPU01, PdFPMAL], 2>; 10930b57cec5SDimitry Andricdefm : PdWriteRes<WriteVecMoveX, [PdFPU01, PdFPMAL], 1, [1, 2]>; 10940b57cec5SDimitry Andricdefm : PdWriteRes<WriteVecMoveY, [PdFPU01, PdFPMAL], 2, [2, 2], 2>; 109504eeddc0SDimitry Andricdefm : X86WriteResUnsupported<WriteVecMoveZ>; 10960b57cec5SDimitry Andric 10970b57cec5SDimitry Andricdef PdWriteMOVDQArr : SchedWriteRes<[PdFPU01, PdFPMAL]> { 10980b57cec5SDimitry Andric} 10990b57cec5SDimitry Andricdef : InstRW<[PdWriteMOVDQArr], (instrs MOVDQArr)>; 11000b57cec5SDimitry Andric 11010b57cec5SDimitry Andricdef PdWriteMOVQ2DQrr : SchedWriteRes<[PdFPU01, PdFPMAL]> { 11020b57cec5SDimitry Andric let Latency = 4; 11030b57cec5SDimitry Andric} 11040b57cec5SDimitry Andricdef : InstRW<[PdWriteMOVQ2DQrr], (instrs MMX_MOVQ2DQrr)>; 11050b57cec5SDimitry Andric 11060b57cec5SDimitry Andricdefm : PdWriteRes<WriteVecMoveToGpr, [PdFPU0, PdFPFMA, PdEX0], 11>; 11070b57cec5SDimitry Andricdefm : PdWriteRes<WriteVecMoveFromGpr, [PdFPU01, PdFPFMA], 11, [1, 2], 2>; 11080b57cec5SDimitry Andric 1109bdd1243dSDimitry Andricdefm : PdWriteResXMMPair<WriteVecALU, [PdFPU23, PdFPMAL], 2>; 1110bdd1243dSDimitry Andricdefm : PdWriteResXMMPair<WriteVecALUX, [PdFPU23, PdFPMAL], 2>; 11110b57cec5SDimitry Andricdefm : X86WriteResPairUnsupported<WriteVecALUY>; 11120b57cec5SDimitry Andricdefm : X86WriteResPairUnsupported<WriteVecALUZ>; 11130b57cec5SDimitry Andric 1114bdd1243dSDimitry Andricdefm : PdWriteResXMMPair<WriteVecShift, [PdFPU1, PdFPXBR], 3>; 1115bdd1243dSDimitry Andricdefm : PdWriteResXMMPair<WriteVecShiftX, [PdFPU1, PdFPXBR], 3>; 11160b57cec5SDimitry Andricdefm : X86WriteResPairUnsupported<WriteVecShiftY>; 11170b57cec5SDimitry Andricdefm : X86WriteResPairUnsupported<WriteVecShiftZ>; 11180b57cec5SDimitry Andric 1119bdd1243dSDimitry Andricdefm : PdWriteResXMMPair<WriteVecShiftImm, [PdFPU1, PdFPXBR], 2>; 1120bdd1243dSDimitry Andricdefm : PdWriteResXMMPair<WriteVecShiftImmX, [PdFPU1, PdFPXBR], 2>; 11210b57cec5SDimitry Andricdefm : X86WriteResPairUnsupported<WriteVecShiftImmY>; 11220b57cec5SDimitry Andricdefm : X86WriteResPairUnsupported<WriteVecShiftImmZ>; 11230b57cec5SDimitry Andric 11240b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WriteVecIMul, [PdFPU0, PdFPMMA], 4>; 11250b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WriteVecIMulX, [PdFPU0, PdFPMMA], 4>; 11260b57cec5SDimitry Andricdefm : X86WriteResPairUnsupported<WriteVecIMulY>; 11270b57cec5SDimitry Andricdefm : X86WriteResPairUnsupported<WriteVecIMulZ>; 11280b57cec5SDimitry Andric 11290b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WritePMULLD, [PdFPU0, PdFPU01, PdFPMMA, PdFPMAL], 5, [2, 1, 2, 1]>; 11300b57cec5SDimitry Andricdefm : X86WriteResPairUnsupported<WritePMULLDY>; 11310b57cec5SDimitry Andricdefm : X86WriteResPairUnsupported<WritePMULLDZ>; 11320b57cec5SDimitry Andric 11330b57cec5SDimitry Andricdef PdWriteVPMACS : SchedWriteRes<[PdFPU0, PdFPMMA, PdFPMAL]> { 11340b57cec5SDimitry Andric let Latency = 4; 11350b57cec5SDimitry Andric} 11360b57cec5SDimitry Andricdef : InstRW<[PdWriteVPMACS], (instrs VPMACSDQHrr, VPMACSDQLrr, VPMACSSDQHrr, 11370b57cec5SDimitry Andric VPMACSSDQLrr)>; 11380b57cec5SDimitry Andric 1139bdd1243dSDimitry Andric// FIXME: Investigate RR vs RM differences. 1140bdd1243dSDimitry Andricdefm : PdWriteRes<WriteMPSAD, [PdFPU0, PdFPMMA], 8, [1, 4], 8>; 1141bdd1243dSDimitry Andricdefm : PdWriteRes<WriteMPSADLd, [PdFPU0, PdFPMMA, PdLoad], 14, [1, 4, 3], 8>; 11420b57cec5SDimitry Andricdefm : X86WriteResPairUnsupported<WriteMPSADY>; 11430b57cec5SDimitry Andricdefm : X86WriteResPairUnsupported<WriteMPSADZ>; 11440b57cec5SDimitry Andric 11450b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WritePSADBW, [PdFPU01, PdFPMAL], 4, [1, 2], 2>; 11460b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WritePSADBWX, [PdFPU01, PdFPMAL], 4, [1, 2], 2>; 11470b57cec5SDimitry Andricdefm : X86WriteResPairUnsupported<WritePSADBWY>; 11480b57cec5SDimitry Andricdefm : X86WriteResPairUnsupported<WritePSADBWZ>; 11490b57cec5SDimitry Andric 11500b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WritePHMINPOS, [PdFPU0, PdFPMAL], 4, [], 2>; 11510b57cec5SDimitry Andric 1152bdd1243dSDimitry Andricdefm : PdWriteResXMMPair<WriteShuffle, [PdFPU1, PdFPXBR], 2>; 1153bdd1243dSDimitry Andricdefm : PdWriteResXMMPair<WriteShuffleX, [PdFPU1, PdFPXBR], 2>; 1154bdd1243dSDimitry Andricdefm : PdWriteResYMMPair<WriteShuffleY, [PdFPU1, PdFPXBR], 2, [2, 2]>; 11550b57cec5SDimitry Andricdefm : X86WriteResPairUnsupported<WriteShuffleZ>; 11560b57cec5SDimitry Andric 1157bdd1243dSDimitry Andricdefm : PdWriteResXMMPair<WriteVarShuffle, [PdFPU1, PdFPXBR], 3>; 1158bdd1243dSDimitry Andricdefm : PdWriteResXMMPair<WriteVarShuffleX, [PdFPU1, PdFPXBR], 3>; 11590b57cec5SDimitry Andricdefm : X86WriteResPairUnsupported<WriteVarShuffleY>; 11600b57cec5SDimitry Andricdefm : X86WriteResPairUnsupported<WriteVarShuffleZ>; 11610b57cec5SDimitry Andric 1162bdd1243dSDimitry Andricdef PdWriteVPPERM : SchedWriteRes<[PdFPU1, PdFPXBR]> { 11630b57cec5SDimitry Andric let Latency = 2; 11645f757f3fSDimitry Andric let ReleaseAtCycles = [1, 1]; 11650b57cec5SDimitry Andric} 11660b57cec5SDimitry Andricdef : InstRW<[PdWriteVPPERM], (instrs VPPERMrrr, VPPERMrrr_REV)>; 11670b57cec5SDimitry Andric 1168bdd1243dSDimitry Andricdef PdWriteVPPERMLd : SchedWriteRes<[PdFPU1, PdFPXBR, PdLoad]> { 1169bdd1243dSDimitry Andric let Latency = 7; 11705f757f3fSDimitry Andric let ReleaseAtCycles = [1, 1, 3]; 1171bdd1243dSDimitry Andric} 1172bdd1243dSDimitry Andricdef : InstRW<[PdWriteVPPERMLd], (instrs VPPERMrrm, VPPERMrmr)>; 1173bdd1243dSDimitry Andric 1174bdd1243dSDimitry Andricdefm : PdWriteResXMMPair<WriteBlend, [PdFPU23, PdFPMAL], 2>; 11750b57cec5SDimitry Andricdefm : X86WriteResPairUnsupported<WriteBlendY>; 11760b57cec5SDimitry Andricdefm : X86WriteResPairUnsupported<WriteBlendZ>; 11770b57cec5SDimitry Andric 1178bdd1243dSDimitry Andricdefm : PdWriteResXMMPair<WriteVarBlend, [PdFPU1, PdFPXBR], 2>; 11790b57cec5SDimitry Andricdefm : X86WriteResPairUnsupported<WriteVarBlendY>; 11800b57cec5SDimitry Andricdefm : X86WriteResPairUnsupported<WriteVarBlendZ>; 11810b57cec5SDimitry Andric 1182bdd1243dSDimitry Andricdefm : PdWriteResXMMPair<WriteVecLogic, [PdFPU23, PdFPMAL], 2>; 1183bdd1243dSDimitry Andricdefm : PdWriteResXMMPair<WriteVecLogicX, [PdFPU23, PdFPMAL], 2>; 11840b57cec5SDimitry Andricdefm : X86WriteResPairUnsupported<WriteVecLogicY>; 11850b57cec5SDimitry Andricdefm : X86WriteResPairUnsupported<WriteVecLogicZ>; 11860b57cec5SDimitry Andric 11870b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WriteVecTest, [PdFPU0, PdFPFMA, PdEX0], 1, [], 2>; 11880b57cec5SDimitry Andricdefm : PdWriteResYMMPair<WriteVecTestY, [PdFPU01, PdFPFMA, PdEX0], 1, [2, 4, 1], 4, 2>; 11890b57cec5SDimitry Andricdefm : X86WriteResPairUnsupported<WriteVecTestZ>; 11900b57cec5SDimitry Andric 11910b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WriteShuffle256, [PdFPU01, PdFPMAL]>; 1192fe6060f1SDimitry Andricdefm : PdWriteResXMMPair<WriteVPMOV256, [PdFPU01, PdFPMAL]>; 11930b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WriteVarShuffle256, [PdFPU01, PdFPMAL]>; 11940b57cec5SDimitry Andric 1195bdd1243dSDimitry Andricdefm : PdWriteResXMMPair<WriteVarVecShift, [PdFPU1, PdFPXBR], 3>; 11960b57cec5SDimitry Andricdefm : X86WriteResPairUnsupported<WriteVarVecShiftY>; 11970b57cec5SDimitry Andricdefm : X86WriteResPairUnsupported<WriteVarVecShiftZ>; 11980b57cec5SDimitry Andric 11990b57cec5SDimitry Andric//////////////////////////////////////////////////////////////////////////////// 12000b57cec5SDimitry Andric// Vector insert/extract operations. 12010b57cec5SDimitry Andric//////////////////////////////////////////////////////////////////////////////// 12020b57cec5SDimitry Andric 12030b57cec5SDimitry Andricdefm : PdWriteRes<WriteVecInsert, [PdFPU01, PdFPMAL], 2, [1, 3], 2>; 12040b57cec5SDimitry Andricdefm : PdWriteRes<WriteVecInsertLd, [PdFPU01, PdFPMAL, PdLoad], 6, [1, 4, 3], 2>; 12050b57cec5SDimitry Andric 12060b57cec5SDimitry Andricdefm : PdWriteRes<WriteVecExtract, [PdFPU0, PdFPFMA, PdEX0], 12, [1, 3, 1], 2>; 12070b57cec5SDimitry Andricdefm : PdWriteRes<WriteVecExtractSt, [PdFPU1, PdFPSTO, PdStore], 13, [2, 1, 1], 2>; 12080b57cec5SDimitry Andric 12090b57cec5SDimitry Andricdef PdWriteEXTRQ : SchedWriteRes<[PdFPU01, PdFPMAL]> { 12100b57cec5SDimitry Andric let Latency = 3; 12115f757f3fSDimitry Andric let ReleaseAtCycles = [1, 3]; 12120b57cec5SDimitry Andric} 12130b57cec5SDimitry Andricdef : InstRW<[PdWriteEXTRQ], (instrs EXTRQ, EXTRQI)>; 12140b57cec5SDimitry Andric 12150b57cec5SDimitry Andric//////////////////////////////////////////////////////////////////////////////// 12160b57cec5SDimitry Andric// SSE42 String instructions. 12170b57cec5SDimitry Andric//////////////////////////////////////////////////////////////////////////////// 12180b57cec5SDimitry Andric 12190b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WritePCmpIStrI, [PdFPU1, PdFPFMA, PdEX0], 11, [1, 6, 1], 7, 1>; 12200b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WritePCmpIStrM, [PdFPU1, PdFPFMA, PdEX0], 7, [1, 8, 1], 7, 2>; 12210b57cec5SDimitry Andric 12220b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WritePCmpEStrI, [PdFPU1, PdStore, PdLoad, PdFPMAL, PdFPFMA, PdEX0], 14, [1, 10, 10, 10, 1, 1], 27, 1>; 12230b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WritePCmpEStrM, [PdFPU1, PdStore, PdLoad, PdFPMAL, PdFPFMA, PdEX0], 10, [1, 10, 10, 10, 1, 1], 27, 1>; 12240b57cec5SDimitry Andric 12250b57cec5SDimitry Andric//////////////////////////////////////////////////////////////////////////////// 12260b57cec5SDimitry Andric// MOVMSK Instructions. 12270b57cec5SDimitry Andric//////////////////////////////////////////////////////////////////////////////// 12280b57cec5SDimitry Andric 12290b57cec5SDimitry Andricdefm : PdWriteRes<WriteFMOVMSK, [PdFPU0, PdFPFMA, PdEX0], 12, [], 2>; 12300b57cec5SDimitry Andric 12310b57cec5SDimitry Andricdefm : PdWriteRes<WriteVecMOVMSK, [PdFPU0, PdFPFMA, PdEX0], 12, [], 2>; 12320b57cec5SDimitry Andricdefm : X86WriteResUnsupported<WriteVecMOVMSKY>; 12330b57cec5SDimitry Andric// defm : X86WriteResUnsupported<WriteVecMOVMSKZ>; 12340b57cec5SDimitry Andric 12350b57cec5SDimitry Andricdefm : PdWriteRes<WriteMMXMOVMSK, [PdFPU0, PdFPFMA, PdEX0], 10, [], 2>; 12360b57cec5SDimitry Andric 12370b57cec5SDimitry Andric//////////////////////////////////////////////////////////////////////////////// 12380b57cec5SDimitry Andric// AES Instructions. 12390b57cec5SDimitry Andric//////////////////////////////////////////////////////////////////////////////// 12400b57cec5SDimitry Andric 12410b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WriteAESIMC, [PdFPU0, PdFPMMA], 5>; 12420b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WriteAESKeyGen, [PdFPU0, PdFPMMA], 5>; 12430b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WriteAESDecEnc, [PdFPU0, PdFPMMA], 9, [], 2>; 12440b57cec5SDimitry Andric 12450b57cec5SDimitry Andric//////////////////////////////////////////////////////////////////////////////// 12460b57cec5SDimitry Andric// Horizontal add/sub instructions. 12470b57cec5SDimitry Andric//////////////////////////////////////////////////////////////////////////////// 12480b57cec5SDimitry Andric 12490b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WriteFHAdd, [PdFPU0, PdFPFMA], 11, [1, 5], 3, 1>; 12500b57cec5SDimitry Andricdefm : PdWriteResYMMPair<WriteFHAddY, [PdFPU0, PdFPFMA], 11, [1, 8], 8, 2>; 12510b57cec5SDimitry Andricdefm : X86WriteResPairUnsupported<WriteFHAddZ>; 12520b57cec5SDimitry Andric 12530b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WritePHAdd, [PdFPU01, PdFPMAL], 5, [1, 4], 3, 1>; 12540b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WritePHAddX, [PdFPU01, PdFPMAL], 2, [1, 2]>; 12550b57cec5SDimitry Andricdefm : X86WriteResPairUnsupported<WritePHAddY>; 12560b57cec5SDimitry Andricdefm : X86WriteResPairUnsupported<WritePHAddZ>; 12570b57cec5SDimitry Andric 12580b57cec5SDimitry Andricdef : InstRW<[WritePHAdd], (instrs PHADDDrr, PHSUBDrr, 12590b57cec5SDimitry Andric PHADDWrr, PHSUBWrr, 12600b57cec5SDimitry Andric PHADDSWrr, PHSUBSWrr, 12610b57cec5SDimitry Andric VPHADDDrr, VPHSUBDrr, 12620b57cec5SDimitry Andric VPHADDWrr, VPHSUBWrr, 12630b57cec5SDimitry Andric VPHADDSWrr, VPHSUBSWrr)>; 12640b57cec5SDimitry Andric 12650b57cec5SDimitry Andricdef : InstRW<[WritePHAdd.Folded], (instrs PHADDDrm, PHSUBDrm, 12660b57cec5SDimitry Andric PHADDWrm, PHSUBWrm, 12670b57cec5SDimitry Andric PHADDSWrm, PHSUBSWrm, 12680b57cec5SDimitry Andric VPHADDDrm, VPHSUBDrm, 12690b57cec5SDimitry Andric VPHADDWrm, VPHSUBWrm, 12700b57cec5SDimitry Andric VPHADDSWrm, VPHSUBSWrm)>; 12710b57cec5SDimitry Andric 12720b57cec5SDimitry Andric//////////////////////////////////////////////////////////////////////////////// 12730b57cec5SDimitry Andric// Carry-less multiplication instructions. 12740b57cec5SDimitry Andric//////////////////////////////////////////////////////////////////////////////// 12750b57cec5SDimitry Andric 12760b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WriteCLMul, [PdFPU0, PdFPMMA], 12, [1, 7], 5, 1>; 12770b57cec5SDimitry Andric 1278*0fca6ea1SDimitry Andricdef PdWriteVPCLMULQDQrri : SchedWriteRes<[PdFPU0, PdFPMMA]> { 12790b57cec5SDimitry Andric let Latency = 12; 12805f757f3fSDimitry Andric let ReleaseAtCycles = [1, 7]; 12810b57cec5SDimitry Andric let NumMicroOps = 6; 12820b57cec5SDimitry Andric} 1283*0fca6ea1SDimitry Andricdef : InstRW<[PdWriteVPCLMULQDQrri], (instrs VPCLMULQDQrri)>; 12840b57cec5SDimitry Andric 12850b57cec5SDimitry Andric//////////////////////////////////////////////////////////////////////////////// 12860b57cec5SDimitry Andric// SSE4A instructions. 12870b57cec5SDimitry Andric//////////////////////////////////////////////////////////////////////////////// 12880b57cec5SDimitry Andric 12890b57cec5SDimitry Andricdef PdWriteINSERTQ : SchedWriteRes<[PdFPU01, PdFPMAL]> { 12900b57cec5SDimitry Andric let Latency = 3; 12915f757f3fSDimitry Andric let ReleaseAtCycles = [1, 2]; 12920b57cec5SDimitry Andric} 12930b57cec5SDimitry Andricdef : InstRW<[PdWriteINSERTQ], (instrs INSERTQ)>; 12940b57cec5SDimitry Andric 12950b57cec5SDimitry Andricdef PdWriteINSERTQI : SchedWriteRes<[PdFPU01, PdFPMAL]> { 12960b57cec5SDimitry Andric let Latency = 3; 12975f757f3fSDimitry Andric let ReleaseAtCycles = [1, 3]; 12980b57cec5SDimitry Andric} 12990b57cec5SDimitry Andricdef : InstRW<[PdWriteINSERTQI], (instrs INSERTQI)>; 13000b57cec5SDimitry Andric 13010b57cec5SDimitry Andric//////////////////////////////////////////////////////////////////////////////// 13020b57cec5SDimitry Andric// AVX instructions. 13030b57cec5SDimitry Andric//////////////////////////////////////////////////////////////////////////////// 13040b57cec5SDimitry Andric 13050b57cec5SDimitry Andricdef PdWriteVBROADCASTYLd : SchedWriteRes<[PdLoad, PdFPU01, PdFPFMA]> { 13060b57cec5SDimitry Andric let Latency = 6; 13075f757f3fSDimitry Andric let ReleaseAtCycles = [1, 2, 4]; 13080b57cec5SDimitry Andric let NumMicroOps = 2; 13090b57cec5SDimitry Andric} 13100b57cec5SDimitry Andricdef : InstRW<[PdWriteVBROADCASTYLd, ReadAfterLd], (instrs VBROADCASTSDYrm, 13110b57cec5SDimitry Andric VBROADCASTSSYrm)>; 13120b57cec5SDimitry Andric 13130b57cec5SDimitry Andricdef PdWriteVZEROALL : SchedWriteRes<[]> { 13140b57cec5SDimitry Andric let Latency = 90; 13150b57cec5SDimitry Andric let NumMicroOps = 32; 13160b57cec5SDimitry Andric} 13170b57cec5SDimitry Andricdef : InstRW<[PdWriteVZEROALL], (instrs VZEROALL)>; 13180b57cec5SDimitry Andric 13190b57cec5SDimitry Andricdef PdWriteVZEROUPPER : SchedWriteRes<[]> { 13200b57cec5SDimitry Andric let Latency = 46; 13210b57cec5SDimitry Andric let NumMicroOps = 16; 13220b57cec5SDimitry Andric} 13230b57cec5SDimitry Andricdef : InstRW<[PdWriteVZEROUPPER], (instrs VZEROUPPER)>; 13240b57cec5SDimitry Andric 13250b57cec5SDimitry Andric/////////////////////////////////////////////////////////////////////////////// 13260b57cec5SDimitry Andric// SchedWriteVariant definitions. 13270b57cec5SDimitry Andric/////////////////////////////////////////////////////////////////////////////// 13280b57cec5SDimitry Andric 13290b57cec5SDimitry Andricdef PdWriteZeroLatency : SchedWriteRes<[]> { 13300b57cec5SDimitry Andric let Latency = 0; 13310b57cec5SDimitry Andric} 13320b57cec5SDimitry Andric 13330b57cec5SDimitry Andricdef PdWriteZeroIdiom : SchedWriteVariant<[ 13340b57cec5SDimitry Andric SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [PdWriteZeroLatency]>, 13350b57cec5SDimitry Andric SchedVar<MCSchedPredicate<TruePred>, [WriteALU]> 13360b57cec5SDimitry Andric]>; 13370b57cec5SDimitry Andricdef : InstRW<[PdWriteZeroIdiom], (instrs SUB32rr, SUB64rr, 13380b57cec5SDimitry Andric XOR32rr, XOR64rr)>; 13390b57cec5SDimitry Andric 13400b57cec5SDimitry Andricdef PdWriteFZeroIdiom : SchedWriteVariant<[ 13410b57cec5SDimitry Andric SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [PdWriteZeroLatency]>, 13420b57cec5SDimitry Andric SchedVar<MCSchedPredicate<TruePred>, [WriteFLogic]> 13430b57cec5SDimitry Andric]>; 13440b57cec5SDimitry Andricdef : InstRW<[PdWriteFZeroIdiom], (instrs XORPSrr, VXORPSrr, 13450b57cec5SDimitry Andric XORPDrr, VXORPDrr, 13460b57cec5SDimitry Andric ANDNPSrr, VANDNPSrr, 13470b57cec5SDimitry Andric ANDNPDrr, VANDNPDrr)>; 13480b57cec5SDimitry Andric 13490b57cec5SDimitry Andric// VXORPSYrr, VXORPDYrr, VANDNPSYrr, VANDNPDYrr "zero-idioms" have latency of 1. 13500b57cec5SDimitry Andric 13510b57cec5SDimitry Andricdef PdWriteVZeroIdiomLogic : SchedWriteVariant<[ 13520b57cec5SDimitry Andric SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [PdWriteZeroLatency]>, 13530b57cec5SDimitry Andric SchedVar<MCSchedPredicate<TruePred>, [WriteVecLogic]> 13540b57cec5SDimitry Andric]>; 13550eae32dcSDimitry Andricdef : InstRW<[PdWriteVZeroIdiomLogic], (instrs MMX_PXORrr, MMX_PANDNrr)>; 13560b57cec5SDimitry Andric 13570b57cec5SDimitry Andricdef PdWriteVZeroIdiomLogicX : SchedWriteVariant<[ 13580b57cec5SDimitry Andric SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [PdWriteZeroLatency]>, 13590b57cec5SDimitry Andric SchedVar<MCSchedPredicate<TruePred>, [WriteVecLogicX]> 13600b57cec5SDimitry Andric]>; 13610b57cec5SDimitry Andricdef : InstRW<[PdWriteVZeroIdiomLogicX], (instrs PXORrr, VPXORrr, 13620b57cec5SDimitry Andric PANDNrr, VPANDNrr)>; 13630b57cec5SDimitry Andric 13640b57cec5SDimitry Andricdef PdWriteVZeroIdiomALU : SchedWriteVariant<[ 13650b57cec5SDimitry Andric SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [PdWriteZeroLatency]>, 13660b57cec5SDimitry Andric SchedVar<MCSchedPredicate<TruePred>, [WriteVecALU]> 13670b57cec5SDimitry Andric]>; 13680eae32dcSDimitry Andricdef : InstRW<[PdWriteVZeroIdiomALU], (instrs MMX_PSUBBrr, MMX_PSUBDrr, 13690eae32dcSDimitry Andric MMX_PSUBQrr, MMX_PSUBWrr, 13700eae32dcSDimitry Andric MMX_PCMPGTBrr, 13710eae32dcSDimitry Andric MMX_PCMPGTDrr, 13720eae32dcSDimitry Andric MMX_PCMPGTWrr)>; 13730b57cec5SDimitry Andric 13740b57cec5SDimitry Andricdef PdWriteVZeroIdiomALUX : SchedWriteVariant<[ 13750b57cec5SDimitry Andric SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [PdWriteZeroLatency]>, 13760b57cec5SDimitry Andric SchedVar<MCSchedPredicate<TruePred>, [WriteVecALUX]> 13770b57cec5SDimitry Andric]>; 13780b57cec5SDimitry Andricdef : InstRW<[PdWriteVZeroIdiomALUX], (instrs PSUBBrr, VPSUBBrr, 13790b57cec5SDimitry Andric PSUBDrr, VPSUBDrr, 13800b57cec5SDimitry Andric PSUBQrr, VPSUBQrr, 13810b57cec5SDimitry Andric PSUBWrr, VPSUBWrr, 13820b57cec5SDimitry Andric PCMPGTBrr, VPCMPGTBrr, 13830b57cec5SDimitry Andric PCMPGTDrr, VPCMPGTDrr, 13840b57cec5SDimitry Andric PCMPGTWrr, VPCMPGTWrr)>; 13850b57cec5SDimitry Andric 13860b57cec5SDimitry Andric/////////////////////////////////////////////////////////////////////////////// 13870b57cec5SDimitry Andric// Dependency breaking instructions. 13880b57cec5SDimitry Andric/////////////////////////////////////////////////////////////////////////////// 13890b57cec5SDimitry Andric 13900b57cec5SDimitry Andric// VPCMPGTQ, but not PCMPGTQ! 13910b57cec5SDimitry Andric 13920b57cec5SDimitry Andricdef : IsZeroIdiomFunction<[ 13930b57cec5SDimitry Andric // GPR Zero-idioms. 13940b57cec5SDimitry Andric DepBreakingClass<[ SUB32rr, SUB64rr, XOR32rr, XOR64rr ], ZeroIdiomPredicate>, 13950b57cec5SDimitry Andric 13960b57cec5SDimitry Andric // MMX Zero-idioms. 13970b57cec5SDimitry Andric DepBreakingClass<[ 13980eae32dcSDimitry Andric MMX_PXORrr, MMX_PANDNrr, MMX_PSUBBrr, 13990eae32dcSDimitry Andric MMX_PSUBDrr, MMX_PSUBQrr, MMX_PSUBWrr, 14000eae32dcSDimitry Andric MMX_PSUBSBrr, MMX_PSUBSWrr, MMX_PSUBUSBrr, MMX_PSUBUSWrr, 14010eae32dcSDimitry Andric MMX_PCMPGTBrr, MMX_PCMPGTDrr, MMX_PCMPGTWrr 14020b57cec5SDimitry Andric ], ZeroIdiomPredicate>, 14030b57cec5SDimitry Andric 14040b57cec5SDimitry Andric // SSE Zero-idioms. 14050b57cec5SDimitry Andric DepBreakingClass<[ 14060b57cec5SDimitry Andric // fp variants. 14070b57cec5SDimitry Andric XORPSrr, XORPDrr, ANDNPSrr, ANDNPDrr, 14080b57cec5SDimitry Andric 14090b57cec5SDimitry Andric // int variants. 14100b57cec5SDimitry Andric PXORrr, PANDNrr, 14110b57cec5SDimitry Andric PSUBBrr, PSUBWrr, PSUBDrr, PSUBQrr, 14120b57cec5SDimitry Andric PSUBSBrr, PSUBSWrr, PSUBUSBrr, PSUBUSWrr, 14130b57cec5SDimitry Andric PCMPGTBrr, PCMPGTDrr, PCMPGTWrr 14140b57cec5SDimitry Andric ], ZeroIdiomPredicate>, 14150b57cec5SDimitry Andric 14160b57cec5SDimitry Andric // AVX Zero-idioms. 14170b57cec5SDimitry Andric DepBreakingClass<[ 14180b57cec5SDimitry Andric // xmm fp variants. 14190b57cec5SDimitry Andric VXORPSrr, VXORPDrr, VANDNPSrr, VANDNPDrr, 14200b57cec5SDimitry Andric 14210b57cec5SDimitry Andric // xmm int variants. 14220b57cec5SDimitry Andric VPXORrr, VPANDNrr, 14230b57cec5SDimitry Andric VPSUBBrr, VPSUBWrr, VPSUBDrr, VPSUBQrr, 14240b57cec5SDimitry Andric VPSUBSBrr, VPSUBSWrr, VPSUBUSBrr, VPSUBUSWrr, 14250b57cec5SDimitry Andric VPCMPGTBrr, VPCMPGTWrr, VPCMPGTDrr, VPCMPGTQrr, 14260b57cec5SDimitry Andric 14270b57cec5SDimitry Andric // ymm variants. 14280b57cec5SDimitry Andric VXORPSYrr, VXORPDYrr, VANDNPSYrr, VANDNPDYrr 14290b57cec5SDimitry Andric ], ZeroIdiomPredicate> 14300b57cec5SDimitry Andric]>; 14310b57cec5SDimitry Andric 14320b57cec5SDimitry Andricdef : IsDepBreakingFunction<[ 14330b57cec5SDimitry Andric // GPR 14340b57cec5SDimitry Andric DepBreakingClass<[ SBB32rr, SBB64rr ], ZeroIdiomPredicate>, 14350b57cec5SDimitry Andric DepBreakingClass<[ CMP32rr, CMP64rr ], CheckSameRegOperand<0, 1> >, 14360b57cec5SDimitry Andric 14370b57cec5SDimitry Andric // MMX 14380b57cec5SDimitry Andric DepBreakingClass<[ 14390eae32dcSDimitry Andric MMX_PCMPEQBrr, MMX_PCMPEQDrr, MMX_PCMPEQWrr 14400b57cec5SDimitry Andric ], ZeroIdiomPredicate>, 14410b57cec5SDimitry Andric 14420b57cec5SDimitry Andric // SSE 14430b57cec5SDimitry Andric DepBreakingClass<[ 14440b57cec5SDimitry Andric PCMPEQBrr, PCMPEQWrr, PCMPEQDrr 14450b57cec5SDimitry Andric // But not PCMPEQQrr. 14460b57cec5SDimitry Andric ], ZeroIdiomPredicate>, 14470b57cec5SDimitry Andric 14480b57cec5SDimitry Andric // AVX 14490b57cec5SDimitry Andric DepBreakingClass<[ 14500b57cec5SDimitry Andric VPCMPEQBrr, VPCMPEQWrr, VPCMPEQDrr 14510b57cec5SDimitry Andric // But not VPCMPEQQrr. 14520b57cec5SDimitry Andric ], ZeroIdiomPredicate> 14530b57cec5SDimitry Andric]>; 14540b57cec5SDimitry Andric 14550b57cec5SDimitry Andric 14560b57cec5SDimitry Andric} // SchedModel 1457