xref: /freebsd-src/contrib/llvm-project/llvm/lib/Target/X86/X86ScheduleBdVer2.td (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
10b57cec5SDimitry Andric//=- X86ScheduleBdVer2.td - X86 BdVer2 (Piledriver) Scheduling * tablegen -*-=//
20b57cec5SDimitry Andric//
30b57cec5SDimitry Andric// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric// See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric//
70b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric//
90b57cec5SDimitry Andric// This file defines the machine model for AMD bdver2 (Piledriver) to support
100b57cec5SDimitry Andric// instruction scheduling and other instruction cost heuristics.
110b57cec5SDimitry Andric// Based on:
120b57cec5SDimitry Andric//  * AMD Software Optimization Guide for AMD Family 15h Processors.
130b57cec5SDimitry Andric//    https://support.amd.com/TechDocs/47414_15h_sw_opt_guide.pdf
140b57cec5SDimitry Andric//  * The microarchitecture of Intel, AMD and VIA CPUs, By Agner Fog
150b57cec5SDimitry Andric//    http://www.agner.org/optimize/microarchitecture.pdf
160b57cec5SDimitry Andric//  * https://www.realworldtech.com/bulldozer/
170b57cec5SDimitry Andric//    Yes, that is for Bulldozer aka bdver1, not Piledriver aka bdver2.
180b57cec5SDimitry Andric//
190b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
200b57cec5SDimitry Andric
210b57cec5SDimitry Andricdef BdVer2Model : SchedMachineModel {
220b57cec5SDimitry Andric  let IssueWidth = 4; // Up to 4 IPC can be decoded, issued, retired.
230b57cec5SDimitry Andric  let MicroOpBufferSize = 128; // RCU reorder buffer size, which is unconfirmed.
240b57cec5SDimitry Andric  let LoopMicroOpBufferSize = -1; // There does not seem to be a loop buffer.
250b57cec5SDimitry Andric  let LoadLatency = 4; // L1 data cache has a 4-cycle load-to-use latency.
260b57cec5SDimitry Andric  let HighLatency = 25; // FIXME: any better choice?
270b57cec5SDimitry Andric  let MispredictPenalty = 20; // Minimum branch misdirection penalty.
280b57cec5SDimitry Andric
290b57cec5SDimitry Andric  let PostRAScheduler = 1; // Enable Post RegAlloc Scheduler pass.
300b57cec5SDimitry Andric
310b57cec5SDimitry Andric  // FIXME: Incomplete. This flag is set to allow the scheduler to assign
320b57cec5SDimitry Andric  //        a default model to unrecognized opcodes.
330b57cec5SDimitry Andric  let CompleteModel = 0;
340b57cec5SDimitry Andric} // SchedMachineModel
350b57cec5SDimitry Andric
360b57cec5SDimitry Andriclet SchedModel = BdVer2Model in {
370b57cec5SDimitry Andric
380b57cec5SDimitry Andric
390b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
400b57cec5SDimitry Andric// Pipes
410b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
420b57cec5SDimitry Andric
430b57cec5SDimitry Andric// There are total of eight pipes.
440b57cec5SDimitry Andric
450b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
460b57cec5SDimitry Andric// Integer execution pipes
470b57cec5SDimitry Andric//
480b57cec5SDimitry Andric
490b57cec5SDimitry Andric// Two EX (ALU) pipes.
500b57cec5SDimitry Andricdef PdEX0  : ProcResource<1>; // ALU, Integer Pipe0
510b57cec5SDimitry Andricdef PdEX1  : ProcResource<1>; // ALU, Integer Pipe1
520b57cec5SDimitry Andricdef PdEX01 : ProcResGroup<[PdEX0, PdEX1]>;
530b57cec5SDimitry Andric
540b57cec5SDimitry Andric// Two AGLU pipes, identical.
550b57cec5SDimitry Andricdef PdAGLU01 : ProcResource<2>; // AGU, Integer Pipe[23]
560b57cec5SDimitry Andric
570b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
580b57cec5SDimitry Andric// Floating point execution pipes
590b57cec5SDimitry Andric//
600b57cec5SDimitry Andric
610b57cec5SDimitry Andric// Four FPU pipes.
620b57cec5SDimitry Andric
630b57cec5SDimitry Andricdef PdFPU0 : ProcResource<1>; // Vector/FPU Pipe0
640b57cec5SDimitry Andricdef PdFPU1 : ProcResource<1>; // Vector/FPU Pipe1
650b57cec5SDimitry Andricdef PdFPU2 : ProcResource<1>; // Vector/FPU Pipe2
660b57cec5SDimitry Andricdef PdFPU3 : ProcResource<1>; // Vector/FPU Pipe3
670b57cec5SDimitry Andric
680b57cec5SDimitry Andric// FPU grouping
690b57cec5SDimitry Andricdef PdFPU01 : ProcResGroup<[PdFPU0, PdFPU1]>;
700b57cec5SDimitry Andricdef PdFPU23 : ProcResGroup<[PdFPU2, PdFPU3]>;
710b57cec5SDimitry Andric
720b57cec5SDimitry Andric
730b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
740b57cec5SDimitry Andric// RCU
750b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
760b57cec5SDimitry Andric
770b57cec5SDimitry Andric// The Retire Control Unit on Piledriver can retire up to 4 macro-ops per cycle.
780b57cec5SDimitry Andric// On the other hand, the RCU reorder buffer size for Piledriver does not
790b57cec5SDimitry Andric// seem be specified in any trustworthy source.
800b57cec5SDimitry Andric// But as per https://www.realworldtech.com/bulldozer/6/ the Bulldozer had
810b57cec5SDimitry Andric// RCU reorder buffer size of 128. So that is a good guess for now.
820b57cec5SDimitry Andricdef PdRCU : RetireControlUnit<128, 4>;
830b57cec5SDimitry Andric
840b57cec5SDimitry Andric
850b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
860b57cec5SDimitry Andric// Pipelines
870b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
880b57cec5SDimitry Andric
890b57cec5SDimitry Andric// There are total of two pipelines, each one with it's own scheduler.
900b57cec5SDimitry Andric
910b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
920b57cec5SDimitry Andric// Integer Pipeline Scheduling
930b57cec5SDimitry Andric//
940b57cec5SDimitry Andric
950b57cec5SDimitry Andric// There is one Integer Scheduler per core.
960b57cec5SDimitry Andric
970b57cec5SDimitry Andric// Integer physical register file has 96 registers of 64-bit.
980b57cec5SDimitry Andricdef PdIntegerPRF : RegisterFile<96, [GR64, CCR]>;
990b57cec5SDimitry Andric
1000b57cec5SDimitry Andric// Unified Integer, Memory Scheduler has 40 entries.
1010b57cec5SDimitry Andricdef PdEX : ProcResGroup<[PdEX0, PdEX1, PdAGLU01]> {
1020b57cec5SDimitry Andric  // Up to 4 IPC can be decoded, issued, retired.
1030b57cec5SDimitry Andric  let BufferSize = 40;
1040b57cec5SDimitry Andric}
1050b57cec5SDimitry Andric
1060b57cec5SDimitry Andric
1070b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
1080b57cec5SDimitry Andric// FPU Pipeline Scheduling
1090b57cec5SDimitry Andric//
1100b57cec5SDimitry Andric
1110b57cec5SDimitry Andric// The FPU unit is shared between the two cores.
1120b57cec5SDimitry Andric
1130b57cec5SDimitry Andric// FP physical register file has 160 registers of 128-bit.
1140b57cec5SDimitry Andric// Operations on 256-bit data types are cracked into two COPs.
1150b57cec5SDimitry Andricdef PdFpuPRF : RegisterFile<160, [VR64, VR128, VR256], [1, 1, 2]>;
1160b57cec5SDimitry Andric
1170b57cec5SDimitry Andric// Unified FP Scheduler has 64 entries,
1180b57cec5SDimitry Andricdef PdFPU : ProcResGroup<[PdFPU0, PdFPU1, PdFPU2, PdFPU3]> {
1190b57cec5SDimitry Andric  // Up to 4 IPC can be decoded, issued, retired.
1200b57cec5SDimitry Andric  let BufferSize = 64;
1210b57cec5SDimitry Andric}
1220b57cec5SDimitry Andric
1230b57cec5SDimitry Andric
1240b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
1250b57cec5SDimitry Andric// Functional units
1260b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
1270b57cec5SDimitry Andric
1280b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
1290b57cec5SDimitry Andric// Load-Store Units
1300b57cec5SDimitry Andric//
1310b57cec5SDimitry Andric
1320b57cec5SDimitry Andriclet Super = PdAGLU01 in
1330b57cec5SDimitry Andricdef PdLoad  : ProcResource<2> {
1340b57cec5SDimitry Andric  // For Piledriver, the load queue is 40 entries deep.
1350b57cec5SDimitry Andric  let BufferSize = 40;
1360b57cec5SDimitry Andric}
1370b57cec5SDimitry Andric
1380b57cec5SDimitry Andricdef PdLoadQueue : LoadQueue<PdLoad>;
1390b57cec5SDimitry Andric
1400b57cec5SDimitry Andriclet Super = PdAGLU01 in
1410b57cec5SDimitry Andricdef PdStore : ProcResource<1> {
1420b57cec5SDimitry Andric  // For Piledriver, the store queue is 24 entries deep.
1430b57cec5SDimitry Andric  let BufferSize = 24;
1440b57cec5SDimitry Andric}
1450b57cec5SDimitry Andric
1460b57cec5SDimitry Andricdef PdStoreQueue : StoreQueue<PdStore>;
1470b57cec5SDimitry Andric
1480b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
1490b57cec5SDimitry Andric// Integer Execution Units
1500b57cec5SDimitry Andric//
1510b57cec5SDimitry Andric
1520b57cec5SDimitry Andricdef PdDiv    : ProcResource<1>; // PdEX0; unpipelined integer division
1530b57cec5SDimitry Andricdef PdCount  : ProcResource<1>; // PdEX0; POPCNT, LZCOUNT
1540b57cec5SDimitry Andric
1550b57cec5SDimitry Andricdef PdMul    : ProcResource<1>; // PdEX1; integer multiplication
1560b57cec5SDimitry Andricdef PdBranch : ProcResource<1>; // PdEX1; JMP, fused branches
1570b57cec5SDimitry Andric
1580b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
1590b57cec5SDimitry Andric// Floating-Point Units
1600b57cec5SDimitry Andric//
1610b57cec5SDimitry Andric
1620b57cec5SDimitry Andric// Two FMAC/FPFMA units.
1630b57cec5SDimitry Andricdef PdFPFMA  : ProcResource<2>; // PdFPU0, PdFPU1
1640b57cec5SDimitry Andric
1650b57cec5SDimitry Andric// One 128-bit integer multiply-accumulate unit.
1660b57cec5SDimitry Andricdef PdFPMMA  : ProcResource<1>; // PdFPU0
1670b57cec5SDimitry Andric
1680b57cec5SDimitry Andric// One fp conversion unit.
1690b57cec5SDimitry Andricdef PdFPCVT  : ProcResource<1>; // PdFPU0
1700b57cec5SDimitry Andric
1710b57cec5SDimitry Andric// One unit for shuffles, packs, permutes, shifts.
1720b57cec5SDimitry Andricdef PdFPXBR  : ProcResource<1>; // PdFPU1
1730b57cec5SDimitry Andric
1740b57cec5SDimitry Andric// Two 128-bit packed integer units.
1750b57cec5SDimitry Andricdef PdFPMAL  : ProcResource<2>; // PdFPU2, PdFPU3
1760b57cec5SDimitry Andric
1770b57cec5SDimitry Andric// One FP store unit.
1780b57cec5SDimitry Andricdef PdFPSTO  : ProcResource<1>; // PdFPU3
1790b57cec5SDimitry Andric
1800b57cec5SDimitry Andric
1810b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
1820b57cec5SDimitry Andric// Basic helper classes.
1830b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
1840b57cec5SDimitry Andric
1850b57cec5SDimitry Andric// Many SchedWrites are defined in pairs with and without a folded load.
1860b57cec5SDimitry Andric// Instructions with folded loads are usually micro-fused, so they only appear
1870b57cec5SDimitry Andric// as two micro-ops when dispatched by the schedulers.
1880b57cec5SDimitry Andric// This multiclass defines the resource usage for variants with and without
1890b57cec5SDimitry Andric// folded loads.
1900b57cec5SDimitry Andricmulticlass PdWriteRes<SchedWrite SchedRW,
1910b57cec5SDimitry Andric                      list<ProcResourceKind> ExePorts, int Lat = 1,
1920b57cec5SDimitry Andric                      list<int> Res = [], int UOps = 1> {
1930b57cec5SDimitry Andric  def : WriteRes<SchedRW, ExePorts> {
1940b57cec5SDimitry Andric    let Latency = Lat;
1955f757f3fSDimitry Andric    let ReleaseAtCycles = Res;
1960b57cec5SDimitry Andric    let NumMicroOps = UOps;
1970b57cec5SDimitry Andric  }
1980b57cec5SDimitry Andric}
1990b57cec5SDimitry Andric
2000b57cec5SDimitry Andricmulticlass __pdWriteResPair<X86FoldableSchedWrite SchedRW,
2010b57cec5SDimitry Andric                            list<ProcResourceKind> ExePorts, int Lat,
2020b57cec5SDimitry Andric                            list<int> Res, int UOps,
2030b57cec5SDimitry Andric                            int LoadLat, int LoadRes, int LoadUOps> {
2040b57cec5SDimitry Andric  defm : PdWriteRes<SchedRW, ExePorts, Lat, Res, UOps>;
2050b57cec5SDimitry Andric
2060b57cec5SDimitry Andric  defm : PdWriteRes<SchedRW.Folded,
2070b57cec5SDimitry Andric                    !listconcat([PdLoad], ExePorts),
2080b57cec5SDimitry Andric                    !add(Lat, LoadLat),
2090b57cec5SDimitry Andric                    !if(!and(!empty(Res), !eq(LoadRes, 1)),
2100b57cec5SDimitry Andric                      [],
2110b57cec5SDimitry Andric                      !listconcat([LoadRes],
2120b57cec5SDimitry Andric                        !if(!empty(Res),
2130b57cec5SDimitry Andric                          !listsplat(1, !size(ExePorts)),
2140b57cec5SDimitry Andric                          Res))),
2150b57cec5SDimitry Andric                    !add(UOps, LoadUOps)>;
2160b57cec5SDimitry Andric}
2170b57cec5SDimitry Andric
2180b57cec5SDimitry Andricmulticlass PdWriteResExPair<X86FoldableSchedWrite SchedRW,
2190b57cec5SDimitry Andric                            list<ProcResourceKind> ExePorts, int Lat = 1,
2200b57cec5SDimitry Andric                            list<int> Res = [], int UOps = 1,
2210b57cec5SDimitry Andric                            int LoadUOps = 0> {
2220b57cec5SDimitry Andric  defm : __pdWriteResPair<SchedRW, ExePorts, Lat, Res, UOps,
2230b57cec5SDimitry Andric                          /*LoadLat*/4, /*LoadRes*/3, LoadUOps>;
2240b57cec5SDimitry Andric}
2250b57cec5SDimitry Andric
2260b57cec5SDimitry Andricmulticlass PdWriteResXMMPair<X86FoldableSchedWrite SchedRW,
2270b57cec5SDimitry Andric                             list<ProcResourceKind> ExePorts, int Lat = 1,
2280b57cec5SDimitry Andric                             list<int> Res = [], int UOps = 1,
2290b57cec5SDimitry Andric                             int LoadUOps = 0> {
2300b57cec5SDimitry Andric  defm : __pdWriteResPair<SchedRW, ExePorts, Lat, Res, UOps,
2310b57cec5SDimitry Andric                           /*LoadLat*/5, /*LoadRes*/3, LoadUOps>;
2320b57cec5SDimitry Andric}
2330b57cec5SDimitry Andric
2340b57cec5SDimitry Andricmulticlass PdWriteResYMMPair<X86FoldableSchedWrite SchedRW,
2350b57cec5SDimitry Andric                             list<ProcResourceKind> ExePorts, int Lat,
2360b57cec5SDimitry Andric                             list<int> Res = [], int UOps = 2,
2370b57cec5SDimitry Andric                             int LoadUOps = 0> {
2380b57cec5SDimitry Andric  defm : __pdWriteResPair<SchedRW, ExePorts, Lat, Res, UOps,
2390b57cec5SDimitry Andric                           /*LoadLat*/5, /*LoadRes*/3, LoadUOps>;
2400b57cec5SDimitry Andric}
2410b57cec5SDimitry Andric
2420b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
2430b57cec5SDimitry Andric// Here be dragons.
2440b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
2450b57cec5SDimitry Andric
2460b57cec5SDimitry Andric// L1 data cache has a 4-cycle load-to-use latency, so ReadAfterLd registers
2470b57cec5SDimitry Andric// needn't be available until 4 cycles after the memory operand.
2480b57cec5SDimitry Andricdef : ReadAdvance<ReadAfterLd, 4>;
2490b57cec5SDimitry Andric
2500b57cec5SDimitry Andric// Vector loads are 5 cycles, so ReadAfterVec*Ld registers needn't be available
2510b57cec5SDimitry Andric// until 5 cycles after the memory operand.
2520b57cec5SDimitry Andricdef : ReadAdvance<ReadAfterVecLd, 5>;
2530b57cec5SDimitry Andricdef : ReadAdvance<ReadAfterVecXLd, 5>;
2540b57cec5SDimitry Andricdef : ReadAdvance<ReadAfterVecYLd, 5>;
2550b57cec5SDimitry Andric
2560b57cec5SDimitry Andric// Transfer from int domain to ivec domain incurs additional latency of 8..10cy
2570b57cec5SDimitry Andric// Reference: Agner, Microarchitecture, "AMD Bulldozer, Piledriver, Steamroller
2580b57cec5SDimitry Andric// and Excavator pipeline", "Data delay between different execution domains"
2590b57cec5SDimitry Andricdef : ReadAdvance<ReadInt2Fpu, -10>;
2600b57cec5SDimitry Andric
2610b57cec5SDimitry Andric// A folded store needs a cycle on the PdStore for the store data.
2620b57cec5SDimitry Andricdef : WriteRes<WriteRMW, [PdStore]>;
2630b57cec5SDimitry Andric
2640b57cec5SDimitry Andric////////////////////////////////////////////////////////////////////////////////
2650b57cec5SDimitry Andric// Loads, stores, and moves, not folded with other operations.
2660b57cec5SDimitry Andric////////////////////////////////////////////////////////////////////////////////
2670b57cec5SDimitry Andric
2685f757f3fSDimitry Andricdef : WriteRes<WriteLoad,    [PdLoad]> { let Latency = 5; let ReleaseAtCycles = [2]; }
2690b57cec5SDimitry Andricdef : WriteRes<WriteStore,   [PdStore]>;
2700b57cec5SDimitry Andricdef : WriteRes<WriteStoreNT, [PdStore]>;
2715f757f3fSDimitry Andricdef : WriteRes<WriteMove,    [PdEX01]> { let ReleaseAtCycles = [2]; }
272fe6060f1SDimitry Andricdefm : X86WriteResUnsupported<WriteVecMaskedGatherWriteback>;
2730b57cec5SDimitry Andric
2740b57cec5SDimitry Andric// Load/store MXCSR.
2750b57cec5SDimitry Andric// FIXME: These are copy and pasted from WriteLoad/Store.
2760b57cec5SDimitry Andricdef : WriteRes<WriteLDMXCSR, [PdLoad]> { let Latency = 5; }
2775f757f3fSDimitry Andricdef : WriteRes<WriteSTMXCSR, [PdStore]> { let NumMicroOps = 2; let ReleaseAtCycles = [18]; }
2780b57cec5SDimitry Andric
2790b57cec5SDimitry Andric// Treat misc copies as a move.
2800b57cec5SDimitry Andricdef : InstRW<[WriteMove], (instrs COPY)>;
2810b57cec5SDimitry Andric
2820b57cec5SDimitry Andric////////////////////////////////////////////////////////////////////////////////
2830b57cec5SDimitry Andric// Idioms that clear a register, like xorps %xmm0, %xmm0.
2840b57cec5SDimitry Andric// These can often bypass execution ports completely.
2850b57cec5SDimitry Andric////////////////////////////////////////////////////////////////////////////////
2860b57cec5SDimitry Andric
2870b57cec5SDimitry Andricdef : WriteRes<WriteZero, [/*No ExePorts*/]>;
2880b57cec5SDimitry Andric
2890b57cec5SDimitry Andric////////////////////////////////////////////////////////////////////////////////
2900b57cec5SDimitry Andric// Branches don't produce values, so they have no latency, but they still
2910b57cec5SDimitry Andric// consume resources. Indirect branches can fold loads.
2920b57cec5SDimitry Andric////////////////////////////////////////////////////////////////////////////////
2930b57cec5SDimitry Andric
2940b57cec5SDimitry Andricdefm : PdWriteResExPair<WriteJump,  [PdEX1, PdBranch]>;
2950b57cec5SDimitry Andric
2960b57cec5SDimitry Andric////////////////////////////////////////////////////////////////////////////////
2970b57cec5SDimitry Andric// Special case scheduling classes.
2980b57cec5SDimitry Andric////////////////////////////////////////////////////////////////////////////////
2990b57cec5SDimitry Andric
3000b57cec5SDimitry Andricdef : WriteRes<WriteSystem,     [PdEX01]> { let Latency = 100; }
3010b57cec5SDimitry Andricdef : WriteRes<WriteMicrocoded, [PdEX01]> { let Latency = 100; }
3020b57cec5SDimitry Andricdef : WriteRes<WriteFence,      [PdStore]>;
3030b57cec5SDimitry Andric
3040b57cec5SDimitry Andricdef PdWriteXLAT : SchedWriteRes<[PdEX01]> {
3050b57cec5SDimitry Andric  let Latency = 6;
3060b57cec5SDimitry Andric}
3070b57cec5SDimitry Andricdef : InstRW<[PdWriteXLAT], (instrs XLAT)>;
3080b57cec5SDimitry Andric
3090b57cec5SDimitry Andricdef PdWriteLARrr : SchedWriteRes<[PdEX01]> {
3100b57cec5SDimitry Andric  let Latency = 184;
3115f757f3fSDimitry Andric  let ReleaseAtCycles = [375];
3120b57cec5SDimitry Andric  let NumMicroOps = 45;
3130b57cec5SDimitry Andric}
3140b57cec5SDimitry Andricdef : InstRW<[PdWriteLARrr], (instregex "LAR(16|32|64)rr",
3150b57cec5SDimitry Andric                                        "LSL(16|32|64)rr")>;
3160b57cec5SDimitry Andric
3170b57cec5SDimitry Andric// Nops don't have dependencies, so there's no actual latency, but we set this
3180b57cec5SDimitry Andric// to '1' to tell the scheduler that the nop uses an ALU slot for a cycle.
3195f757f3fSDimitry Andricdef : WriteRes<WriteNop, [PdEX01]> { let ReleaseAtCycles = [2]; }
3200b57cec5SDimitry Andric
3210b57cec5SDimitry Andric////////////////////////////////////////////////////////////////////////////////
3220b57cec5SDimitry Andric// Arithmetic.
3230b57cec5SDimitry Andric////////////////////////////////////////////////////////////////////////////////
3240b57cec5SDimitry Andric
3250b57cec5SDimitry Andricdefm : PdWriteResExPair<WriteALU,     [PdEX01], 1, [2]>;
3260b57cec5SDimitry Andric
3270b57cec5SDimitry Andricdef PdWriteALURMW : SchedWriteRes<[PdLoad, PdEX01, PdStore]> {
3280b57cec5SDimitry Andric  let Latency = 6;
3295f757f3fSDimitry Andric  let ReleaseAtCycles = [3, 2, 1];
3300b57cec5SDimitry Andric  let NumMicroOps = 1;
3310b57cec5SDimitry Andric}
3320b57cec5SDimitry Andricdef : SchedAlias<WriteALURMW, PdWriteALURMW>;
3330b57cec5SDimitry Andric
3340b57cec5SDimitry Andricdef PdWriteLXADD : SchedWriteRes<[PdEX01]> {
3350b57cec5SDimitry Andric  let Latency = 6;
3365f757f3fSDimitry Andric  let ReleaseAtCycles = [88];
3370b57cec5SDimitry Andric  let NumMicroOps = 4;
3380b57cec5SDimitry Andric}
3390b57cec5SDimitry Andricdef : InstRW<[PdWriteLXADD], (instrs LXADD8, LXADD16, LXADD32, LXADD64)>;
3400b57cec5SDimitry Andric
3410b57cec5SDimitry Andricdef PdWriteBMI1 : SchedWriteRes<[PdEX01]> {
3420b57cec5SDimitry Andric  let Latency = 2;
3435f757f3fSDimitry Andric  let ReleaseAtCycles = [2];
3440b57cec5SDimitry Andric  let NumMicroOps = 2;
3450b57cec5SDimitry Andric}
3460b57cec5SDimitry Andricdef : InstRW<[PdWriteBMI1],
3470b57cec5SDimitry Andric             (instrs BLCFILL32rr, BLCFILL64rr, BLCI32rr, BLCI64rr,
3480b57cec5SDimitry Andric                     BLCIC32rr, BLCIC64rr, BLCMSK32rr, BLCMSK64rr,
3490b57cec5SDimitry Andric                     BLCS32rr, BLCS64rr, BLSFILL32rr, BLSFILL64rr,
3500b57cec5SDimitry Andric                     BLSIC32rr, BLSIC64rr, T1MSKC32rr, T1MSKC64rr,
3510b57cec5SDimitry Andric                     TZMSK32rr, TZMSK64rr)>;
3520b57cec5SDimitry Andric
3530b57cec5SDimitry Andricdef PdWriteBMI1m : SchedWriteRes<[PdLoad, PdEX01]> {
3540b57cec5SDimitry Andric  let Latency = 6;
3555f757f3fSDimitry Andric  let ReleaseAtCycles = [3, 3];
3560b57cec5SDimitry Andric  let NumMicroOps = 2;
3570b57cec5SDimitry Andric}
3580b57cec5SDimitry Andricdef : InstRW<[PdWriteBMI1m],
3590b57cec5SDimitry Andric             (instrs BLCFILL32rm, BLCFILL64rm, BLCI32rm, BLCI64rm,
3600b57cec5SDimitry Andric                     BLCIC32rm, BLCIC64rm, BLCMSK32rm, BLCMSK64rm,
3610b57cec5SDimitry Andric                     BLCS32rm, BLCS64rm, BLSFILL32rm, BLSFILL64rm,
3620b57cec5SDimitry Andric                     BLSIC32rm, BLSIC64rm, T1MSKC32rm, T1MSKC64rm,
3630b57cec5SDimitry Andric                     TZMSK32rm, TZMSK64rm)>;
3640b57cec5SDimitry Andric
3650b57cec5SDimitry Andricdefm : PdWriteResExPair<WriteADC,    [PdEX01],                  1,  [2]>;
3660b57cec5SDimitry Andric
3670b57cec5SDimitry Andricdef PdWriteADCSBB64ri32 : SchedWriteRes<[PdEX01]> {
3685f757f3fSDimitry Andric  let ReleaseAtCycles = [3];
3690b57cec5SDimitry Andric}
3700b57cec5SDimitry Andricdef : InstRW<[PdWriteADCSBB64ri32], (instrs ADC64ri32, SBB64ri32)>;
3710b57cec5SDimitry Andric
3720b57cec5SDimitry Andricdefm : PdWriteRes<WriteBSWAP32,      [PdEX01]>;
3730b57cec5SDimitry Andricdefm : PdWriteRes<WriteBSWAP64,      [PdEX01]>;
3740b57cec5SDimitry Andricdefm : PdWriteRes<WriteCMPXCHG,      [PdEX1],                   3,  [3],        5>;
3750b57cec5SDimitry Andricdefm : PdWriteRes<WriteCMPXCHGRMW,   [PdEX1, PdStore, PdLoad],  3,  [44, 1, 1], 2>;
3760b57cec5SDimitry Andricdefm : PdWriteRes<WriteXCHG,         [PdEX1],                   1,  [],         2>;
3770b57cec5SDimitry Andric
3780b57cec5SDimitry Andricdef PdWriteCMPXCHG8rr : SchedWriteRes<[PdEX1]> {
3790b57cec5SDimitry Andric  let Latency = 3;
3805f757f3fSDimitry Andric  let ReleaseAtCycles = [3];
3810b57cec5SDimitry Andric  let NumMicroOps = 3;
3820b57cec5SDimitry Andric}
3830b57cec5SDimitry Andricdef : InstRW<[PdWriteCMPXCHG8rr], (instrs CMPXCHG8rr)>;
3840b57cec5SDimitry Andric
3850b57cec5SDimitry Andricdef PdWriteCMPXCHG8rm : SchedWriteRes<[PdEX1]> {
3860b57cec5SDimitry Andric  let Latency = 3;
3875f757f3fSDimitry Andric  let ReleaseAtCycles = [23];
3880b57cec5SDimitry Andric  let NumMicroOps = 5;
3890b57cec5SDimitry Andric}
3900b57cec5SDimitry Andricdef : InstRW<[PdWriteCMPXCHG8rm], (instrs CMPXCHG8rm)>;
3910b57cec5SDimitry Andric
3920b57cec5SDimitry Andricdef PdWriteCMPXCHG16rm_CMPXCHG32rm_CMPXCHG64rm : SchedWriteRes<[PdEX1]> {
3930b57cec5SDimitry Andric  let Latency = 3;
3945f757f3fSDimitry Andric  let ReleaseAtCycles = [21];
3950b57cec5SDimitry Andric  let NumMicroOps = 6;
3960b57cec5SDimitry Andric}
3970b57cec5SDimitry Andricdef : InstRW<[PdWriteCMPXCHG16rm_CMPXCHG32rm_CMPXCHG64rm],
3980b57cec5SDimitry Andric             (instrs CMPXCHG16rm, CMPXCHG32rm, CMPXCHG64rm)>;
3990b57cec5SDimitry Andric
4000b57cec5SDimitry Andricdef PdWriteCMPXCHG8B : SchedWriteRes<[PdEX1]> {
4010b57cec5SDimitry Andric  let Latency = 3;
4025f757f3fSDimitry Andric  let ReleaseAtCycles = [26];
4030b57cec5SDimitry Andric  let NumMicroOps = 18;
4040b57cec5SDimitry Andric}
4050b57cec5SDimitry Andricdef : InstRW<[PdWriteCMPXCHG8B], (instrs CMPXCHG8B)>;
4060b57cec5SDimitry Andric
4070b57cec5SDimitry Andricdef PdWriteCMPXCHG16B : SchedWriteRes<[PdEX1]> {
4080b57cec5SDimitry Andric  let Latency = 3;
4095f757f3fSDimitry Andric  let ReleaseAtCycles = [69];
4100b57cec5SDimitry Andric  let NumMicroOps = 22;
4110b57cec5SDimitry Andric}
4120b57cec5SDimitry Andricdef : InstRW<[PdWriteCMPXCHG16B], (instrs CMPXCHG16B)>;
4130b57cec5SDimitry Andric
4140b57cec5SDimitry Andricdef PdWriteXADDm : SchedWriteRes<[PdEX1]> {
4150b57cec5SDimitry Andric  let Latency = 6;
4165f757f3fSDimitry Andric  let ReleaseAtCycles = [20];
4170b57cec5SDimitry Andric  let NumMicroOps = 4;
4180b57cec5SDimitry Andric}
4190b57cec5SDimitry Andricdef : InstRW<[PdWriteXADDm], (instrs XADD8rm, XADD16rm, XADD32rm, XADD64rm)>;
4200b57cec5SDimitry Andric
4210b57cec5SDimitry Andricdefm : PdWriteResExPair<WriteIMul8,     [PdEX1, PdMul],          4,  [1, 4]>;
4220b57cec5SDimitry Andricdefm : PdWriteResExPair<WriteIMul16,    [PdEX1, PdMul],          4,  [1, 5],    2>;
4230b57cec5SDimitry Andricdefm : PdWriteResExPair<WriteIMul16Imm, [PdEX1, PdMul],          5,  [1, 5],    2>;
4240b57cec5SDimitry Andricdefm : PdWriteResExPair<WriteIMul16Reg, [PdEX1, PdMul],          4,  [1, 2]>;
4250b57cec5SDimitry Andricdefm : PdWriteResExPair<WriteIMul32,    [PdEX1, PdMul],          4,  [1, 4]>;
4260b57cec5SDimitry Andricdefm : PdWriteResExPair<WriteIMul32Imm, [PdEX1, PdMul],          4,  [1, 2],    1, 1>;
4270b57cec5SDimitry Andricdefm : PdWriteResExPair<WriteIMul32Reg, [PdEX1, PdMul],          4,  [1, 2]>;
4280b57cec5SDimitry Andricdefm : PdWriteResExPair<WriteIMul64,    [PdEX1, PdMul],          6,  [1, 6]>;
4290b57cec5SDimitry Andricdefm : PdWriteResExPair<WriteIMul64Imm, [PdEX1, PdMul],          6,  [1, 4],1, 1>;
4300b57cec5SDimitry Andricdefm : PdWriteResExPair<WriteIMul64Reg, [PdEX1, PdMul],          6,  [1, 4]>;
431349cc55cSDimitry Andric
432349cc55cSDimitry Andric// BMI2 MULX
433349cc55cSDimitry Andricdefm : X86WriteResUnsupported<WriteIMulH>;
434349cc55cSDimitry Andricdefm : X86WriteResUnsupported<WriteIMulHLd>;
435349cc55cSDimitry Andricdefm : X86WriteResPairUnsupported<WriteMULX32>;
436349cc55cSDimitry Andricdefm : X86WriteResPairUnsupported<WriteMULX64>;
4370b57cec5SDimitry Andric
4380b57cec5SDimitry Andricdefm : PdWriteResExPair<WriteDiv8,    [PdEX1, PdDiv],           12,  [1, 12]>;
4390b57cec5SDimitry Andricdefm : PdWriteResExPair<WriteDiv16,   [PdEX1, PdDiv],           15,  [1, 15],   2>;
4400b57cec5SDimitry Andricdefm : PdWriteResExPair<WriteDiv32,   [PdEX1, PdDiv],           14,  [1, 14],   2>;
4410b57cec5SDimitry Andricdefm : PdWriteResExPair<WriteDiv64,   [PdEX1, PdDiv],           14,  [1, 14],   2>;
4420b57cec5SDimitry Andric
4430b57cec5SDimitry Andricdefm : PdWriteResExPair<WriteIDiv8,   [PdEX1, PdDiv],           12,  [1, 12]>;
4440b57cec5SDimitry Andricdefm : PdWriteResExPair<WriteIDiv16,  [PdEX1, PdDiv],           15,  [1, 17],   2>;
4450b57cec5SDimitry Andricdefm : PdWriteResExPair<WriteIDiv32,  [PdEX1, PdDiv],           14,  [1, 25],   2>;
4460b57cec5SDimitry Andricdefm : PdWriteResExPair<WriteIDiv64,  [PdEX1, PdDiv],           14,  [1, 14],   2>;
4470b57cec5SDimitry Andric
4480b57cec5SDimitry Andricdefm : PdWriteResExPair<WriteCRC32,   [PdEX01],                  2,  [4],       3>;
4490b57cec5SDimitry Andric
4500b57cec5SDimitry Andricdef PdWriteCRC32r32r16 : SchedWriteRes<[PdEX01]> {
4510b57cec5SDimitry Andric  let Latency = 5;
4525f757f3fSDimitry Andric  let ReleaseAtCycles = [10];
4530b57cec5SDimitry Andric  let NumMicroOps = 5;
4540b57cec5SDimitry Andric}
4550b57cec5SDimitry Andricdef : InstRW<[PdWriteCRC32r32r16], (instrs CRC32r32r16)>;
4560b57cec5SDimitry Andric
4570b57cec5SDimitry Andricdef PdWriteCRC32r32r32 : SchedWriteRes<[PdEX01]> {
4580b57cec5SDimitry Andric  let Latency = 6;
4595f757f3fSDimitry Andric  let ReleaseAtCycles = [12];
4600b57cec5SDimitry Andric  let NumMicroOps = 7;
4610b57cec5SDimitry Andric}
4620b57cec5SDimitry Andricdef : InstRW<[PdWriteCRC32r32r32], (instrs CRC32r32r32)>;
4630b57cec5SDimitry Andric
4640b57cec5SDimitry Andricdef PdWriteCRC32r64r64 : SchedWriteRes<[PdEX01]> {
4650b57cec5SDimitry Andric  let Latency = 10;
4665f757f3fSDimitry Andric  let ReleaseAtCycles = [17];
4670b57cec5SDimitry Andric  let NumMicroOps = 11;
4680b57cec5SDimitry Andric}
4690b57cec5SDimitry Andricdef : InstRW<[PdWriteCRC32r64r64], (instrs CRC32r64r64)>;
4700b57cec5SDimitry Andric
4710b57cec5SDimitry Andricdefm : PdWriteResExPair<WriteCMOV,    [PdEX01]>; // Conditional move.
4720b57cec5SDimitry Andric
4730b57cec5SDimitry Andricdef PdWriteCMOVm : SchedWriteRes<[PdLoad, PdEX01]> {
4740b57cec5SDimitry Andric  let Latency = 5;
4755f757f3fSDimitry Andric  let ReleaseAtCycles = [3, 3];
4760b57cec5SDimitry Andric  let NumMicroOps = 2;
4770b57cec5SDimitry Andric}
4780b57cec5SDimitry Andric
4790b57cec5SDimitry Andricdef PdWriteCMOVmVar : SchedWriteVariant<[
4800b57cec5SDimitry Andric  SchedVar<MCSchedPredicate<CheckImmOperand_s<7, "X86::COND_BE">>, [PdWriteCMOVm]>,
4810b57cec5SDimitry Andric  SchedVar<MCSchedPredicate<CheckImmOperand_s<7, "X86::COND_A">>,  [PdWriteCMOVm]>,
4820b57cec5SDimitry Andric  SchedVar<MCSchedPredicate<CheckImmOperand_s<7, "X86::COND_L">>,  [PdWriteCMOVm]>,
4830b57cec5SDimitry Andric  SchedVar<MCSchedPredicate<CheckImmOperand_s<7, "X86::COND_GE">>, [PdWriteCMOVm]>,
4840b57cec5SDimitry Andric  SchedVar<MCSchedPredicate<CheckImmOperand_s<7, "X86::COND_LE">>, [PdWriteCMOVm]>,
4850b57cec5SDimitry Andric  SchedVar<MCSchedPredicate<CheckImmOperand_s<7, "X86::COND_G">>,  [PdWriteCMOVm]>,
4860b57cec5SDimitry Andric  SchedVar<NoSchedPred, [WriteCMOV.Folded]>
4870b57cec5SDimitry Andric]>;
4880b57cec5SDimitry Andric
4890b57cec5SDimitry Andricdef : InstRW<[PdWriteCMOVmVar], (instrs CMOV16rm, CMOV32rm, CMOV64rm)>;
4900b57cec5SDimitry Andric
4910b57cec5SDimitry Andricdefm : PdWriteRes<WriteFCMOV,        [PdFPU0, PdFPFMA]>; // x87 conditional move.
4920b57cec5SDimitry Andric
4930b57cec5SDimitry Andricdef : WriteRes<WriteSETCC,           [PdEX01]>; // Setcc.
4940b57cec5SDimitry Andricdef : WriteRes<WriteSETCCStore,      [PdEX01, PdStore]>;
4950b57cec5SDimitry Andric
4960b57cec5SDimitry Andricdef PdWriteSETGEmSETGmSETLEmSETLm : SchedWriteRes<[PdEX01]> {
4975f757f3fSDimitry Andric  let ReleaseAtCycles = [2];
4980b57cec5SDimitry Andric  let NumMicroOps = 2;
4990b57cec5SDimitry Andric}
5000b57cec5SDimitry Andric
5010b57cec5SDimitry Andricdef PdSETGEmSETGmSETLEmSETLm :  SchedWriteVariant<[
5020b57cec5SDimitry Andric  SchedVar<MCSchedPredicate<CheckImmOperand_s<5, "X86::COND_GE">>, [PdWriteSETGEmSETGmSETLEmSETLm]>,
5030b57cec5SDimitry Andric  SchedVar<MCSchedPredicate<CheckImmOperand_s<5, "X86::COND_G">>,  [PdWriteSETGEmSETGmSETLEmSETLm]>,
5040b57cec5SDimitry Andric  SchedVar<MCSchedPredicate<CheckImmOperand_s<5, "X86::COND_LE">>, [PdWriteSETGEmSETGmSETLEmSETLm]>,
5050b57cec5SDimitry Andric  SchedVar<MCSchedPredicate<CheckImmOperand_s<5, "X86::COND_L">>,  [PdWriteSETGEmSETGmSETLEmSETLm]>,
5060b57cec5SDimitry Andric  SchedVar<NoSchedPred,                                            [WriteSETCCStore]>
5070b57cec5SDimitry Andric]>;
5080b57cec5SDimitry Andricdef : InstRW<[PdSETGEmSETGmSETLEmSETLm], (instrs SETCCm)>;
5090b57cec5SDimitry Andric
5100b57cec5SDimitry Andricdefm : PdWriteRes<WriteLAHFSAHF,      [PdEX01],          2,  [4],       2>;
5110b57cec5SDimitry Andric
5120b57cec5SDimitry Andricdef PdWriteLAHF : SchedWriteRes<[PdEX01]> {
5130b57cec5SDimitry Andric  let Latency = 2;
5145f757f3fSDimitry Andric  let ReleaseAtCycles = [4];
5150b57cec5SDimitry Andric  let NumMicroOps = 4;
5160b57cec5SDimitry Andric}
5170b57cec5SDimitry Andricdef : InstRW<[PdWriteLAHF], (instrs LAHF)>;
5180b57cec5SDimitry Andric
5190b57cec5SDimitry Andricdef PdWriteSAHF : SchedWriteRes<[PdEX01]> {
5200b57cec5SDimitry Andric  let Latency = 2;
5215f757f3fSDimitry Andric  let ReleaseAtCycles = [2];
5220b57cec5SDimitry Andric  let NumMicroOps = 2;
5230b57cec5SDimitry Andric}
5240b57cec5SDimitry Andricdef : InstRW<[PdWriteSAHF], (instrs SAHF)>;
5250b57cec5SDimitry Andric
5260b57cec5SDimitry Andricdefm : PdWriteRes<WriteBitTest,          [PdEX01],         1, [2],      1>;
5270b57cec5SDimitry Andricdefm : PdWriteRes<WriteBitTestImmLd,     [PdEX01, PdLoad], 5, [2,  3],  1>;
5280b57cec5SDimitry Andricdefm : PdWriteRes<WriteBitTestRegLd,     [PdEX01, PdLoad], 5, [7,  2],  7>;
5290b57cec5SDimitry Andricdefm : PdWriteRes<WriteBitTestSet,       [PdEX01],         2, [2],      2>;
5300b57cec5SDimitry Andricdefm : PdWriteRes<WriteBitTestSetImmLd,  [PdEX01, PdLoad], 6, [1,  1],  4>;
5310b57cec5SDimitry Andricdefm : PdWriteRes<WriteBitTestSetRegLd,  [PdEX01, PdLoad], 6, [1,  1], 10>;
5320b57cec5SDimitry Andric
5330b57cec5SDimitry Andricdef PdWriteBTSIm : SchedWriteRes<[PdEX01, PdLoad]> {
5340b57cec5SDimitry Andric  let Latency = 7;
5355f757f3fSDimitry Andric  let ReleaseAtCycles = [42, 1];
5360b57cec5SDimitry Andric  let NumMicroOps = 4;
5370b57cec5SDimitry Andric}
5380b57cec5SDimitry Andricdef : SchedAlias<WriteBitTestSetImmRMW, PdWriteBTSIm>;
5390b57cec5SDimitry Andricdef PdWriteBTSRm : SchedWriteRes<[PdEX01, PdLoad]> {
5400b57cec5SDimitry Andric  let Latency = 7;
5415f757f3fSDimitry Andric  let ReleaseAtCycles = [44, 1];
5420b57cec5SDimitry Andric  let NumMicroOps = 10;
5430b57cec5SDimitry Andric}
5440b57cec5SDimitry Andricdef : SchedAlias<WriteBitTestSetRegRMW, PdWriteBTSRm>;
5450b57cec5SDimitry Andric
5460b57cec5SDimitry Andric// This is for simple LEAs with one or two input operands.
5475f757f3fSDimitry Andricdef : WriteRes<WriteLEA,              [PdEX01]> { let ReleaseAtCycles = [2]; }
5485ffd83dbSDimitry Andric
5495ffd83dbSDimitry Andric// This write is used for slow LEA instructions.
5505ffd83dbSDimitry Andricdef PdWrite3OpsLEA : SchedWriteRes<[PdEX01]> {
5515ffd83dbSDimitry Andric  let Latency = 2;
5525f757f3fSDimitry Andric  let ReleaseAtCycles = [2];
5535ffd83dbSDimitry Andric}
5545ffd83dbSDimitry Andric
5555ffd83dbSDimitry Andric// On Piledriver, a slow LEA is either a 3Ops LEA (base, index, offset),
5565ffd83dbSDimitry Andric// or an LEA with a `Scale` value different than 1.
5575ffd83dbSDimitry Andricdef PdSlowLEAPredicate : MCSchedPredicate<
5585ffd83dbSDimitry Andric  CheckAny<[
5595ffd83dbSDimitry Andric    // A 3-operand LEA (base, index, offset).
5605ffd83dbSDimitry Andric    IsThreeOperandsLEAFn,
5615ffd83dbSDimitry Andric    // An LEA with a "Scale" different than 1.
5625ffd83dbSDimitry Andric    CheckAll<[
5635ffd83dbSDimitry Andric      CheckIsImmOperand<2>,
5645ffd83dbSDimitry Andric      CheckNot<CheckImmOperand<2, 1>>
5655ffd83dbSDimitry Andric    ]>
5665ffd83dbSDimitry Andric  ]>
5675ffd83dbSDimitry Andric>;
5685ffd83dbSDimitry Andric
5695ffd83dbSDimitry Andricdef PdWriteLEA : SchedWriteVariant<[
5705ffd83dbSDimitry Andric    SchedVar<PdSlowLEAPredicate, [PdWrite3OpsLEA]>,
5715ffd83dbSDimitry Andric    SchedVar<NoSchedPred,        [WriteLEA]>
5725ffd83dbSDimitry Andric]>;
5735ffd83dbSDimitry Andric
5745ffd83dbSDimitry Andricdef : InstRW<[PdWriteLEA], (instrs LEA32r, LEA64r, LEA64_32r)>;
5755ffd83dbSDimitry Andric
5765ffd83dbSDimitry Andricdef PdWriteLEA16r : SchedWriteRes<[PdEX01]> {
5775f757f3fSDimitry Andric  let ReleaseAtCycles = [3];
5785ffd83dbSDimitry Andric  let NumMicroOps = 2;
5795ffd83dbSDimitry Andric}
5805ffd83dbSDimitry Andricdef : InstRW<[PdWriteLEA16r], (instrs LEA16r)>;
5810b57cec5SDimitry Andric
5820b57cec5SDimitry Andric// Bit counts.
5830b57cec5SDimitry Andricdefm : PdWriteResExPair<WriteBSF,     [PdEX01],          3,  [6],     6, 2>;
5840b57cec5SDimitry Andricdefm : PdWriteResExPair<WriteBSR,     [PdEX01],          4,  [8],     7, 2>;
5850b57cec5SDimitry Andricdefm : PdWriteResExPair<WritePOPCNT,  [PdEX01],          4,  [4]>;
5860b57cec5SDimitry Andricdefm : PdWriteResExPair<WriteLZCNT,   [PdEX0],           2,  [2],     2>;
5870b57cec5SDimitry Andricdefm : PdWriteResExPair<WriteTZCNT,   [PdEX0],           2,  [2],     2>;
5880b57cec5SDimitry Andric
5890b57cec5SDimitry Andric// BMI1 BEXTR, BMI2 BZHI
5900b57cec5SDimitry Andricdefm : PdWriteResExPair<WriteBEXTR,   [PdEX01],          2,  [2],    2>;
5910b57cec5SDimitry Andricdefm : PdWriteResExPair<WriteBLS,     [PdEX01],          2,  [2],    2>;
5920b57cec5SDimitry Andricdefm : PdWriteResExPair<WriteBZHI,    [PdEX01]>;
5930b57cec5SDimitry Andric
5940b57cec5SDimitry Andricdef PdWriteBEXTRI : SchedWriteRes<[PdEX01]> {
5950b57cec5SDimitry Andric  let Latency = 2;
5965f757f3fSDimitry Andric  let ReleaseAtCycles = [4];
5970b57cec5SDimitry Andric  let NumMicroOps = 2;
5980b57cec5SDimitry Andric}
5990b57cec5SDimitry Andricdef : InstRW<[PdWriteBEXTRI], (instrs BEXTRI32ri, BEXTRI64ri)>;
6000b57cec5SDimitry Andric
6010b57cec5SDimitry Andricdef PdWriteBEXTRIm : SchedWriteRes<[PdEX01]> {
6020b57cec5SDimitry Andric  let Latency = 2;
6035f757f3fSDimitry Andric  let ReleaseAtCycles = [5];
6040b57cec5SDimitry Andric  let NumMicroOps = 2;
6050b57cec5SDimitry Andric}
6060b57cec5SDimitry Andricdef : InstRW<[PdWriteBEXTRIm], (instrs BEXTRI32mi, BEXTRI64mi)>;
6070b57cec5SDimitry Andric
6080b57cec5SDimitry Andric////////////////////////////////////////////////////////////////////////////////
6090b57cec5SDimitry Andric// Integer shifts and rotates.
6100b57cec5SDimitry Andric////////////////////////////////////////////////////////////////////////////////
6110b57cec5SDimitry Andric
6120b57cec5SDimitry Andricdefm : PdWriteResExPair<WriteShift,    [PdEX01], 1, [2]>;
6130b57cec5SDimitry Andricdefm : PdWriteResExPair<WriteShiftCL,  [PdEX01]>;
6140b57cec5SDimitry Andricdefm : PdWriteResExPair<WriteRotate,   [PdEX01], 1, [2]>;
6150b57cec5SDimitry Andricdefm : PdWriteResExPair<WriteRotateCL, [PdEX01]>;
6160b57cec5SDimitry Andric
6170b57cec5SDimitry Andricdef PdWriteRCL8rCL : SchedWriteRes<[PdEX01]> {
6180b57cec5SDimitry Andric  let Latency = 12;
6195f757f3fSDimitry Andric  let ReleaseAtCycles = [24];
6200b57cec5SDimitry Andric  let NumMicroOps = 26;
6210b57cec5SDimitry Andric}
6220b57cec5SDimitry Andricdef : InstRW<[PdWriteRCL8rCL], (instrs RCL8rCL)>;
6230b57cec5SDimitry Andric
6240b57cec5SDimitry Andricdef PdWriteRCR8ri : SchedWriteRes<[PdEX01]> {
6250b57cec5SDimitry Andric  let Latency = 12;
6265f757f3fSDimitry Andric  let ReleaseAtCycles = [23];
6270b57cec5SDimitry Andric  let NumMicroOps = 23;
6280b57cec5SDimitry Andric}
6290b57cec5SDimitry Andricdef : InstRW<[PdWriteRCR8ri], (instrs RCR8ri)>;
6300b57cec5SDimitry Andric
6310b57cec5SDimitry Andricdef PdWriteRCR8rCL : SchedWriteRes<[PdEX01]> {
6320b57cec5SDimitry Andric  let Latency = 11;
6335f757f3fSDimitry Andric  let ReleaseAtCycles = [22];
6340b57cec5SDimitry Andric  let NumMicroOps = 24;
6350b57cec5SDimitry Andric}
6360b57cec5SDimitry Andricdef : InstRW<[PdWriteRCR8rCL], (instrs RCR8rCL)>;
6370b57cec5SDimitry Andric
6380b57cec5SDimitry Andricdef PdWriteRCL16rCL : SchedWriteRes<[PdEX01]> {
6390b57cec5SDimitry Andric  let Latency = 10;
6405f757f3fSDimitry Andric  let ReleaseAtCycles = [20];
6410b57cec5SDimitry Andric  let NumMicroOps = 22;
6420b57cec5SDimitry Andric}
6430b57cec5SDimitry Andricdef : InstRW<[PdWriteRCL16rCL], (instrs RCL16rCL)>;
6440b57cec5SDimitry Andric
6450b57cec5SDimitry Andricdef PdWriteRCR16ri : SchedWriteRes<[PdEX01]> {
6460b57cec5SDimitry Andric  let Latency = 10;
6475f757f3fSDimitry Andric  let ReleaseAtCycles = [19];
6480b57cec5SDimitry Andric  let NumMicroOps = 19;
6490b57cec5SDimitry Andric}
6500b57cec5SDimitry Andricdef : InstRW<[PdWriteRCR16ri], (instrs RCR16ri)>;
6510b57cec5SDimitry Andric
6520b57cec5SDimitry Andricdef PdWriteRCL3264rCL : SchedWriteRes<[PdEX01]> {
6530b57cec5SDimitry Andric  let Latency = 7;
6545f757f3fSDimitry Andric  let ReleaseAtCycles = [14];
6550b57cec5SDimitry Andric  let NumMicroOps = 17;
6560b57cec5SDimitry Andric}
6570b57cec5SDimitry Andricdef : InstRW<[PdWriteRCL3264rCL], (instrs RCL32rCL, RCL64rCL)>;
6580b57cec5SDimitry Andric
6590b57cec5SDimitry Andricdef PdWriteRCR3264rCL : SchedWriteRes<[PdEX01]> {
6600b57cec5SDimitry Andric  let Latency = 7;
6615f757f3fSDimitry Andric  let ReleaseAtCycles = [13];
6620b57cec5SDimitry Andric  let NumMicroOps = 16;
6630b57cec5SDimitry Andric}
6640b57cec5SDimitry Andricdef : InstRW<[PdWriteRCR3264rCL], (instrs RCR32rCL, RCR64rCL)>;
6650b57cec5SDimitry Andric
6660b57cec5SDimitry Andricdef PdWriteRCR32riRCR64ri : SchedWriteRes<[PdEX01]> {
6670b57cec5SDimitry Andric  let Latency = 7;
6685f757f3fSDimitry Andric  let ReleaseAtCycles = [14];
6690b57cec5SDimitry Andric  let NumMicroOps = 15;
6700b57cec5SDimitry Andric}
6710b57cec5SDimitry Andricdef : InstRW<[PdWriteRCR32riRCR64ri], (instrs RCR32ri, RCR64ri)>;
6720b57cec5SDimitry Andric
6730b57cec5SDimitry Andric
6740b57cec5SDimitry Andricdef PdWriteRCR16rCL : SchedWriteRes<[PdEX01]> {
6750b57cec5SDimitry Andric  let Latency = 9;
6765f757f3fSDimitry Andric  let ReleaseAtCycles = [18];
6770b57cec5SDimitry Andric  let NumMicroOps = 20;
6780b57cec5SDimitry Andric}
6790b57cec5SDimitry Andricdef : InstRW<[PdWriteRCR16rCL], (instrs RCR16rCL)>;
6800b57cec5SDimitry Andric
6810b57cec5SDimitry Andricdef PdWriteRCL16ri : SchedWriteRes<[PdEX01]> {
6820b57cec5SDimitry Andric  let Latency = 11;
6835f757f3fSDimitry Andric  let ReleaseAtCycles = [21];
6840b57cec5SDimitry Andric  let NumMicroOps = 21;
6850b57cec5SDimitry Andric}
6860b57cec5SDimitry Andricdef : InstRW<[PdWriteRCL16ri], (instrs RCL16ri)>;
6870b57cec5SDimitry Andric
6880b57cec5SDimitry Andricdef PdWriteRCL3264ri : SchedWriteRes<[PdEX01]> {
6890b57cec5SDimitry Andric  let Latency = 8;
6905f757f3fSDimitry Andric  let ReleaseAtCycles = [15];
6910b57cec5SDimitry Andric  let NumMicroOps = 16;
6920b57cec5SDimitry Andric}
6930b57cec5SDimitry Andricdef : InstRW<[PdWriteRCL3264ri], (instrs RCL32ri, RCL64ri)>;
6940b57cec5SDimitry Andric
6950b57cec5SDimitry Andricdef PdWriteRCL8ri : SchedWriteRes<[PdEX01]> {
6960b57cec5SDimitry Andric  let Latency = 13;
6975f757f3fSDimitry Andric  let ReleaseAtCycles = [25];
6980b57cec5SDimitry Andric  let NumMicroOps = 25;
6990b57cec5SDimitry Andric}
7000b57cec5SDimitry Andricdef : InstRW<[PdWriteRCL8ri], (instrs RCL8ri)>;
7010b57cec5SDimitry Andric
7020b57cec5SDimitry Andric// SHLD/SHRD.
7030b57cec5SDimitry Andricdefm : PdWriteRes<WriteSHDrri,       [PdEX01],         3, [6], 6>;
7040b57cec5SDimitry Andricdefm : PdWriteRes<WriteSHDrrcl,      [PdEX01],         3, [8], 7>;
7050b57cec5SDimitry Andric
7060b57cec5SDimitry Andricdef PdWriteSHLD16rrCLSHLD32rrCLSHRD32rrCL : SchedWriteRes<[PdEX01]> {
7070b57cec5SDimitry Andric  let Latency = 3;
7085f757f3fSDimitry Andric  let ReleaseAtCycles = [6];
7090b57cec5SDimitry Andric  let NumMicroOps = 7;
7100b57cec5SDimitry Andric}
7110b57cec5SDimitry Andricdef : InstRW<[PdWriteSHLD16rrCLSHLD32rrCLSHRD32rrCL], (instrs SHLD16rrCL,
7120b57cec5SDimitry Andric                                                              SHLD32rrCL,
7130b57cec5SDimitry Andric                                                              SHRD32rrCL)>;
7140b57cec5SDimitry Andric
7150b57cec5SDimitry Andricdefm : PdWriteRes<WriteSHDmri,       [PdLoad, PdEX01], 4, [1, 22], 8>;
7160b57cec5SDimitry Andricdefm : PdWriteRes<WriteSHDmrcl,      [PdLoad, PdEX01], 4, [1, 22], 8>;
7170b57cec5SDimitry Andric
7180b57cec5SDimitry Andric////////////////////////////////////////////////////////////////////////////////
7190b57cec5SDimitry Andric// Floating point. This covers both scalar and vector operations.
7200b57cec5SDimitry Andric////////////////////////////////////////////////////////////////////////////////
7210b57cec5SDimitry Andric
7220b57cec5SDimitry Andricdefm : PdWriteRes<WriteFLD0,               [PdFPU1, PdFPSTO], 3>;
7230b57cec5SDimitry Andricdefm : PdWriteRes<WriteFLD1,               [PdFPU1, PdFPSTO], 3>;
7240b57cec5SDimitry Andricdefm : PdWriteRes<WriteFLDC,               [PdFPU1, PdFPSTO], 3>;
7250b57cec5SDimitry Andric
7260b57cec5SDimitry Andricdefm : PdWriteRes<WriteFLoad,              [PdLoad, PdFPU01, PdFPFMA], 5, [3, 1, 3]>;
7270b57cec5SDimitry Andricdefm : PdWriteRes<WriteFLoadX,             [PdLoad, PdFPU01, PdFPFMA], 5, [3, 1, 3]>;
7280b57cec5SDimitry Andricdefm : PdWriteRes<WriteFLoadY,             [PdLoad, PdFPU01, PdFPFMA], 5, [3, 1, 3], 2>;
7290b57cec5SDimitry Andric
7300b57cec5SDimitry Andricdefm : PdWriteRes<WriteFMaskedLoad,        [PdLoad, PdFPU01, PdFPFMA], 6, [3, 1, 4]>;
7310b57cec5SDimitry Andricdefm : PdWriteRes<WriteFMaskedLoadY,       [PdLoad, PdFPU01, PdFPFMA], 6, [3, 2, 4], 2>;
7320b57cec5SDimitry Andric
7330b57cec5SDimitry Andricdefm : PdWriteRes<WriteFStore,             [PdStore, PdFPU23, PdFPSTO], 2, [1,  3, 1]>;
7340b57cec5SDimitry Andricdefm : PdWriteRes<WriteFStoreX,            [PdStore, PdFPU23, PdFPSTO], 1, [1,  3, 1]>;
7350b57cec5SDimitry Andricdefm : PdWriteRes<WriteFStoreY,            [PdStore, PdFPU23, PdFPSTO], 1, [1, 36, 2], 4>;
7360b57cec5SDimitry Andric
7370b57cec5SDimitry Andricdef PdWriteMOVHPm : SchedWriteRes<[PdStore, PdFPU23,  PdFPSTO]> {
7380b57cec5SDimitry Andric  let Latency = 2;
7395f757f3fSDimitry Andric  let ReleaseAtCycles = [1, 3, 1];
7400b57cec5SDimitry Andric  let NumMicroOps = 2;
7410b57cec5SDimitry Andric}
7420b57cec5SDimitry Andricdef : InstRW<[PdWriteMOVHPm], (instrs MOVHPDmr, MOVHPSmr, VMOVHPDmr, VMOVHPSmr)>;
7430b57cec5SDimitry Andric
7440b57cec5SDimitry Andricdef PdWriteVMOVUPDYmrVMOVUPSYmr : SchedWriteRes<[PdStore, PdFPU1,  PdFPSTO]> {
7450b57cec5SDimitry Andric  let NumMicroOps = 8;
7460b57cec5SDimitry Andric}
7470b57cec5SDimitry Andricdef : InstRW<[PdWriteVMOVUPDYmrVMOVUPSYmr], (instrs VMOVUPDYmr, VMOVUPSYmr)>;
7480b57cec5SDimitry Andric
7490b57cec5SDimitry Andricdefm : PdWriteRes<WriteFStoreNT,           [PdStore, PdFPU1,  PdFPSTO], 3>;
7500b57cec5SDimitry Andricdefm : PdWriteRes<WriteFStoreNTX,          [PdStore, PdFPU1,  PdFPSTO], 3>;
7510b57cec5SDimitry Andricdefm : PdWriteRes<WriteFStoreNTY,          [PdStore, PdFPU1,  PdFPSTO], 3, [2, 2, 2], 4>;
7520b57cec5SDimitry Andric
7538bcb0991SDimitry Andricdefm : PdWriteRes<WriteFMaskedStore32,     [PdStore, PdFPU01, PdFPFMA], 6, [1, 1, 188], 18>;
7548bcb0991SDimitry Andricdefm : PdWriteRes<WriteFMaskedStore64,     [PdStore, PdFPU01, PdFPFMA], 6, [1, 1, 188], 18>;
7558bcb0991SDimitry Andricdefm : PdWriteRes<WriteFMaskedStore32Y,    [PdStore, PdFPU01, PdFPFMA], 6, [2, 2, 376], 34>;
7568bcb0991SDimitry Andricdefm : PdWriteRes<WriteFMaskedStore64Y,    [PdStore, PdFPU01, PdFPFMA], 6, [2, 2, 376], 34>;
7570b57cec5SDimitry Andric
7580b57cec5SDimitry Andricdefm : PdWriteRes<WriteFMove,              [PdFPU01, PdFPFMA]>;
7590b57cec5SDimitry Andricdefm : PdWriteRes<WriteFMoveX,             [PdFPU01, PdFPFMA], 1, [1, 2]>;
7600b57cec5SDimitry Andricdefm : PdWriteRes<WriteFMoveY,             [PdFPU01, PdFPFMA], 2, [2, 2], 2>;
76104eeddc0SDimitry Andricdefm : X86WriteResUnsupported<WriteFMoveZ>;
7620b57cec5SDimitry Andric
7630b57cec5SDimitry Andricdefm : PdWriteRes<WriteEMMS,               [PdFPU01, PdFPFMA], 2>;
7640b57cec5SDimitry Andric
7650b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WriteFAdd,         [PdFPU0, PdFPFMA],  5>;
7660b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WriteFAddX,        [PdFPU0, PdFPFMA],  5>;
7670b57cec5SDimitry Andricdefm : PdWriteResYMMPair<WriteFAddY,        [PdFPU0, PdFPFMA],  5, [1, 2]>;
7680b57cec5SDimitry Andricdefm : X86WriteResPairUnsupported<WriteFAddZ>;
7690b57cec5SDimitry Andric
7700b57cec5SDimitry Andricdef PdWriteX87Add: SchedWriteRes<[PdLoad, PdFPU0, PdFPFMA]> {
7710b57cec5SDimitry Andric  let Latency = 5;
7725f757f3fSDimitry Andric  let ReleaseAtCycles = [3, 1, 10];
7730b57cec5SDimitry Andric}
7740b57cec5SDimitry Andricdef : InstRW<[PdWriteX87Add], (instrs ADD_FI16m,  ADD_FI32m,  ADD_F32m,  ADD_F64m,
7750b57cec5SDimitry Andric                                      SUB_FI16m,  SUB_FI32m,  SUB_F32m,  SUB_F64m,
7760b57cec5SDimitry Andric                                      SUBR_FI16m, SUBR_FI32m, SUBR_F32m, SUBR_F64m)>;
7770b57cec5SDimitry Andric
7780b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WriteFAdd64,       [PdFPU0, PdFPFMA],  5>;
7790b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WriteFAdd64X,      [PdFPU0, PdFPFMA],  5>;
7800b57cec5SDimitry Andricdefm : PdWriteResYMMPair<WriteFAdd64Y,      [PdFPU0, PdFPFMA],  5, [1, 2]>;
7810b57cec5SDimitry Andricdefm : X86WriteResPairUnsupported<WriteFAdd64Z>;
7820b57cec5SDimitry Andric
7830b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WriteFCmp,         [PdFPU0, PdFPFMA],  2>;
7840b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WriteFCmpX,        [PdFPU0, PdFPFMA],  2>;
7850b57cec5SDimitry Andricdefm : PdWriteResYMMPair<WriteFCmpY,        [PdFPU0, PdFPFMA],  2, [1, 2]>;
7860b57cec5SDimitry Andricdefm : X86WriteResPairUnsupported<WriteFCmpZ>;
7870b57cec5SDimitry Andric
7880b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WriteFCmp64,       [PdFPU0, PdFPFMA],  2>;
7890b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WriteFCmp64X,      [PdFPU0, PdFPFMA],  2>;
7900b57cec5SDimitry Andricdefm : PdWriteResYMMPair<WriteFCmp64Y,      [PdFPU0, PdFPFMA],  2, [1, 2]>;
7910b57cec5SDimitry Andricdefm : X86WriteResPairUnsupported<WriteFCmp64Z>;
7920b57cec5SDimitry Andric
7930b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WriteFCom,         [PdFPU0, PdFPFMA, PdEX0], 1, [], 2>;
7945ffd83dbSDimitry Andricdefm : PdWriteResXMMPair<WriteFComX,        [PdFPU0, PdFPFMA, PdEX0], 1, [], 2>;
7950b57cec5SDimitry Andric
7960b57cec5SDimitry Andricdef PdWriteFCOMPm : SchedWriteRes<[PdFPU1, PdFPFMA]> {
7970b57cec5SDimitry Andric  let Latency = 6;
7980b57cec5SDimitry Andric}
7990b57cec5SDimitry Andricdef : InstRW<[PdWriteFCOMPm], (instrs FCOM32m, FCOM64m, FCOMP32m, FCOMP64m)>;
8000b57cec5SDimitry Andric
8010b57cec5SDimitry Andricdef PdWriteTST_F_UCOM_FPPr : SchedWriteRes<[PdFPU1, PdFPFMA]>;
8020b57cec5SDimitry Andricdef : InstRW<[PdWriteTST_F_UCOM_FPPr], (instrs TST_F, UCOM_FPPr)>;
8030b57cec5SDimitry Andric
8040b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WriteFMul,         [PdFPU1, PdFPFMA],  5>;
8050b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WriteFMulX,        [PdFPU1, PdFPFMA],  5>;
8060b57cec5SDimitry Andricdefm : PdWriteResYMMPair<WriteFMulY,        [PdFPU1, PdFPFMA],  5, [1, 2]>;
8070b57cec5SDimitry Andricdefm : X86WriteResPairUnsupported<WriteFMulZ>;
8080b57cec5SDimitry Andric
8090b57cec5SDimitry Andricdef PdWriteX87Mul: SchedWriteRes<[PdLoad, PdFPU1, PdFPFMA]> {
8100b57cec5SDimitry Andric  let Latency = 5;
8115f757f3fSDimitry Andric  let ReleaseAtCycles = [3, 1, 10];
8120b57cec5SDimitry Andric}
8130b57cec5SDimitry Andricdef : InstRW<[PdWriteX87Mul], (instrs MUL_FI16m, MUL_FI32m, MUL_F32m, MUL_F64m)>;
8140b57cec5SDimitry Andric
8150b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WriteFMul64,       [PdFPU1, PdFPFMA],  5>;
8160b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WriteFMul64X,      [PdFPU1, PdFPFMA],  5>;
8170b57cec5SDimitry Andricdefm : PdWriteResYMMPair<WriteFMul64Y,      [PdFPU1, PdFPFMA],  5, [1, 2]>;
8180b57cec5SDimitry Andricdefm : X86WriteResPairUnsupported<WriteFMul64Z>;
8190b57cec5SDimitry Andric
8200b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WriteFMA,          [PdFPU, PdFPFMA], 5, [1, 3]>;
8210b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WriteFMAX,         [PdFPU, PdFPFMA], 5, [1, 3]>;
8220b57cec5SDimitry Andricdefm : PdWriteResYMMPair<WriteFMAY,         [PdFPU, PdFPFMA], 5, [1, 3]>;
8230b57cec5SDimitry Andricdefm : X86WriteResPairUnsupported<WriteFMAZ>;
8240b57cec5SDimitry Andric
8250b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WriteDPPD,         [PdFPU1, PdFPFMA], 15, [1, 10], 15, 2>;
8260b57cec5SDimitry Andric
8270b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WriteDPPS,         [PdFPU1, PdFPFMA], 25, [1, 14],  16, 2>;
8280b57cec5SDimitry Andricdefm : PdWriteResYMMPair<WriteDPPSY,        [PdFPU1, PdFPFMA], 27, [2, 25], /*or 29*/ 25, 4>;
8290b57cec5SDimitry Andric
8300b57cec5SDimitry Andricdef PdWriteVDPPSrri : SchedWriteRes<[PdFPU1, PdFPFMA]> {
8310b57cec5SDimitry Andric  let Latency = 27;
8325f757f3fSDimitry Andric  let ReleaseAtCycles = [1, 14];
8330b57cec5SDimitry Andric  let NumMicroOps = 17;
8340b57cec5SDimitry Andric}
8350b57cec5SDimitry Andricdef : InstRW<[PdWriteVDPPSrri], (instrs VDPPSrri)>;
8360b57cec5SDimitry Andric
8370b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WriteFRcp,         [PdFPU1, PdFPFMA],  5>;
8380b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WriteFRcpX,        [PdFPU1, PdFPFMA],  5>;
8390b57cec5SDimitry Andricdefm : PdWriteResYMMPair<WriteFRcpY,        [PdFPU1, PdFPFMA],  5, [2, 1]>;
8400b57cec5SDimitry Andricdefm : X86WriteResPairUnsupported<WriteFRcpZ>;
8410b57cec5SDimitry Andric
8420b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WriteFRsqrt,       [PdFPU1, PdFPFMA],  5, [1, 2]>;
8430b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WriteFRsqrtX,      [PdFPU1, PdFPFMA],  5>;
8440b57cec5SDimitry Andricdefm : PdWriteResYMMPair<WriteFRsqrtY,      [PdFPU1, PdFPFMA],  5, [2, 2]>;
8450b57cec5SDimitry Andricdefm : X86WriteResPairUnsupported<WriteFRsqrtZ>;
8460b57cec5SDimitry Andric
8470b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WriteFDiv,         [PdFPU1, PdFPFMA], 9, [1, 9]>;
8480b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WriteFDivX,        [PdFPU1, PdFPFMA], 9, [1, 9]>;
8490b57cec5SDimitry Andricdefm : PdWriteResYMMPair<WriteFDivY,        [PdFPU1, PdFPFMA], 9, [2, 18]>;
8500b57cec5SDimitry Andricdefm : X86WriteResPairUnsupported<WriteFDivZ>;
8510b57cec5SDimitry Andric
8520b57cec5SDimitry Andricdef PdWriteX87Div: SchedWriteRes<[PdLoad, PdFPU0, PdFPFMA]> {
8530b57cec5SDimitry Andric  let Latency = 9;
8545f757f3fSDimitry Andric  let ReleaseAtCycles = [3, 1, 18];
8550b57cec5SDimitry Andric}
8560b57cec5SDimitry Andricdef : InstRW<[PdWriteX87Div], (instrs DIV_FI16m,  DIV_FI32m,
8570b57cec5SDimitry Andric                                      DIVR_FI16m, DIVR_FI32m,
8580b57cec5SDimitry Andric                                      DIV_F32m,   DIV_F64m,
8590b57cec5SDimitry Andric                                      DIVR_F32m,  DIVR_F64m)>;
8600b57cec5SDimitry Andric
8610b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WriteFDiv64,       [PdFPU1, PdFPFMA], 9, [1, 9]>;
8620b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WriteFDiv64X,      [PdFPU1, PdFPFMA], 9, [1, 9]>;
8630b57cec5SDimitry Andricdefm : PdWriteResYMMPair<WriteFDiv64Y,      [PdFPU1, PdFPFMA], 9, [2, 18]>;
8640b57cec5SDimitry Andricdefm : X86WriteResPairUnsupported<WriteFDiv64Z>;
8650b57cec5SDimitry Andric
8660b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WriteFSqrt,        [PdFPU1, PdFPFMA], 9, [1, 9]>;
8670b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WriteFSqrtX,       [PdFPU1, PdFPFMA], 9, [1, 9]>;
8680b57cec5SDimitry Andricdefm : PdWriteResYMMPair<WriteFSqrtY,       [PdFPU1, PdFPFMA], 9, [2, 18]>;
8690b57cec5SDimitry Andricdefm : X86WriteResPairUnsupported<WriteFSqrtZ>;
8700b57cec5SDimitry Andric
8710b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WriteFSqrt64,      [PdFPU1, PdFPFMA], 9, [1, 9]>;
8720b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WriteFSqrt64X,     [PdFPU1, PdFPFMA], 9, [1, 9]>;
8730b57cec5SDimitry Andricdefm : PdWriteResYMMPair<WriteFSqrt64Y,     [PdFPU1, PdFPFMA], 9, [2, 18]>;
8740b57cec5SDimitry Andricdefm : X86WriteResPairUnsupported<WriteFSqrt64Z>;
8750b57cec5SDimitry Andric
8760b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WriteFSqrt80,      [PdFPU1, PdFPFMA],  1, [1, 18]>;
8770b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WriteFSign,        [PdFPU1, PdFPFMA],  1, [1, 4]>;
8780b57cec5SDimitry Andric
8790b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WriteFRnd,         [PdFPU1, PdFPSTO],  4, []>;
8800b57cec5SDimitry Andricdefm : PdWriteResYMMPair<WriteFRndY,        [PdFPU1, PdFPSTO],  4, [2, 1], 2>;
8810b57cec5SDimitry Andricdefm : X86WriteResPairUnsupported<WriteFRndZ>;
8820b57cec5SDimitry Andric
8830b57cec5SDimitry Andricdef PdWriteVFRCZP : SchedWriteRes<[PdFPU1, PdFPSTO]> {
8840b57cec5SDimitry Andric  let Latency = 10;
8855f757f3fSDimitry Andric  let ReleaseAtCycles = [2, 1];
8860b57cec5SDimitry Andric  let NumMicroOps = 2;
8870b57cec5SDimitry Andric}
8880b57cec5SDimitry Andricdef : InstRW<[PdWriteVFRCZP], (instrs VFRCZPDrr, VFRCZPSrr)>;
8890b57cec5SDimitry Andric
8900b57cec5SDimitry Andricdef PdWriteVFRCZS : SchedWriteRes<[PdFPU1, PdFPSTO]> {
8910b57cec5SDimitry Andric  let Latency = 10;
8925f757f3fSDimitry Andric  let ReleaseAtCycles = [10, 1];
8930b57cec5SDimitry Andric  let NumMicroOps = 2;
8940b57cec5SDimitry Andric}
8950b57cec5SDimitry Andricdef : InstRW<[PdWriteVFRCZS], (instrs VFRCZSDrr, VFRCZSSrr)>;
8960b57cec5SDimitry Andric
8970b57cec5SDimitry Andricdef PdWriteVFRCZm : SchedWriteRes<[PdFPU1, PdFPSTO]> {
8980b57cec5SDimitry Andric  let Latency = 15;
8995f757f3fSDimitry Andric  let ReleaseAtCycles = [2, 1];
9000b57cec5SDimitry Andric  let NumMicroOps = 3;
9010b57cec5SDimitry Andric}
9020b57cec5SDimitry Andricdef : InstRW<[PdWriteVFRCZm], (instrs VFRCZPDrm, VFRCZPSrm,
9030b57cec5SDimitry Andric                                      VFRCZSDrm, VFRCZSSrm)>;
9040b57cec5SDimitry Andric
9050b57cec5SDimitry Andricdef PdWriteVFRCZY : SchedWriteRes<[PdFPU1, PdFPSTO]> {
9060b57cec5SDimitry Andric  let Latency = 10;
9075f757f3fSDimitry Andric  let ReleaseAtCycles = [3, 1];
9080b57cec5SDimitry Andric  let NumMicroOps = 4;
9090b57cec5SDimitry Andric}
9100b57cec5SDimitry Andricdef : InstRW<[PdWriteVFRCZY], (instrs VFRCZPSYrr, VFRCZPDYrr)>;
9110b57cec5SDimitry Andric
9120b57cec5SDimitry Andricdef PdWriteVFRCZYm : SchedWriteRes<[PdFPU1, PdFPSTO]> {
9130b57cec5SDimitry Andric  let Latency = 15;
9145f757f3fSDimitry Andric  let ReleaseAtCycles = [4, 1];
9150b57cec5SDimitry Andric  let NumMicroOps = 8;
9160b57cec5SDimitry Andric}
9170b57cec5SDimitry Andricdef : InstRW<[PdWriteVFRCZYm], (instrs VFRCZPSYrm, VFRCZPDYrm)>;
9180b57cec5SDimitry Andric
919bdd1243dSDimitry Andricdefm : PdWriteResXMMPair<WriteFLogic,       [PdFPU23, PdFPMAL],  2>;
920bdd1243dSDimitry Andricdefm : PdWriteResYMMPair<WriteFLogicY,      [PdFPU23, PdFPMAL],  2, [2, 2]>;
9210b57cec5SDimitry Andricdefm : X86WriteResPairUnsupported<WriteFLogicZ>;
9220b57cec5SDimitry Andric
9230b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WriteFTest,        [PdFPU0, PdFPFMA, PdEX0],  1, [], 2>;
9240b57cec5SDimitry Andricdefm : PdWriteResYMMPair<WriteFTestY,       [PdFPU01, PdFPFMA, PdEX0], 1, [4, 4, 1], 4, 2>;
9250b57cec5SDimitry Andricdefm : X86WriteResPairUnsupported<WriteFTestZ>;
9260b57cec5SDimitry Andric
9270b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WriteFShuffle,     [PdFPU01, PdFPFMA],  2, [1, 2]>;
9280b57cec5SDimitry Andricdefm : PdWriteResYMMPair<WriteFShuffleY,    [PdFPU01, PdFPFMA],  2, [2, 4], 2>;
9290b57cec5SDimitry Andricdefm : X86WriteResPairUnsupported<WriteFShuffleZ>;
9300b57cec5SDimitry Andric
9310b57cec5SDimitry Andricdef PdWriteVBROADCASTF128 : SchedWriteRes<[PdFPU01, PdFPFMA]> {
9320b57cec5SDimitry Andric  let Latency = 7;
9335f757f3fSDimitry Andric  let ReleaseAtCycles = [1, 3];
9340b57cec5SDimitry Andric  let NumMicroOps = 2;
9350b57cec5SDimitry Andric}
9365f757f3fSDimitry Andricdef : InstRW<[PdWriteVBROADCASTF128], (instrs VBROADCASTF128rm)>;
9370b57cec5SDimitry Andric
938bdd1243dSDimitry Andricdefm : PdWriteResXMMPair<WriteFVarShuffle,  [PdFPU1, PdFPXBR],  3>;
939bdd1243dSDimitry Andricdefm : PdWriteResYMMPair<WriteFVarShuffleY, [PdFPU1, PdFPXBR],  3, [2, 2], 2>;
9400b57cec5SDimitry Andricdefm : X86WriteResPairUnsupported<WriteFVarShuffleZ>;
9410b57cec5SDimitry Andric
942bdd1243dSDimitry Andricdefm : PdWriteResXMMPair<WriteFBlend,       [PdFPU23, PdFPMAL],  2>;
943bdd1243dSDimitry Andricdefm : PdWriteResYMMPair<WriteFBlendY,      [PdFPU23, PdFPMAL],  2, [2, 2], 2>;
9440b57cec5SDimitry Andricdefm : X86WriteResPairUnsupported<WriteFBlendZ>;
9450b57cec5SDimitry Andric
946bdd1243dSDimitry Andricdefm : PdWriteResXMMPair<WriteFVarBlend,    [PdFPU1, PdFPXBR],  2>;
947bdd1243dSDimitry Andricdefm : PdWriteResYMMPair<WriteFVarBlendY,   [PdFPU1, PdFPXBR],  2, [2, 2], 2>;
9480b57cec5SDimitry Andricdefm : X86WriteResPairUnsupported<WriteFVarBlendZ>;
9490b57cec5SDimitry Andric
9500b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WriteFShuffle256,  [PdFPU01, PdFPFMA],  2, [1, 3], 2>;
9510b57cec5SDimitry Andricdefm : X86WriteResPairUnsupported<WriteFVarShuffle256>;
9520b57cec5SDimitry Andric
9530b57cec5SDimitry Andricdef PdWriteVEXTRACTF128rr : SchedWriteRes<[PdFPU01, PdFPFMA]> {
9540b57cec5SDimitry Andric  let Latency = 2;
9555f757f3fSDimitry Andric  let ReleaseAtCycles = [1, 2];
9560b57cec5SDimitry Andric}
9570b57cec5SDimitry Andricdef : InstRW<[PdWriteVEXTRACTF128rr], (instrs VEXTRACTF128rr)>;
9580b57cec5SDimitry Andric
9590b57cec5SDimitry Andricdef PdWriteVEXTRACTF128mr : SchedWriteRes<[PdFPU01, PdFPFMA]> {
9600b57cec5SDimitry Andric  let Latency = 7;
9615f757f3fSDimitry Andric  let ReleaseAtCycles = [1, 4];
9620b57cec5SDimitry Andric  let NumMicroOps = 2;
9630b57cec5SDimitry Andric}
9640b57cec5SDimitry Andricdef : InstRW<[PdWriteVEXTRACTF128mr], (instrs VEXTRACTF128mr)>;
9650b57cec5SDimitry Andric
9660b57cec5SDimitry Andricdef PdWriteVPERM2F128rr : SchedWriteRes<[PdFPU01, PdFPFMA]> {
9670b57cec5SDimitry Andric  let Latency = 4;
9685f757f3fSDimitry Andric  let ReleaseAtCycles = [1, 6];
9690b57cec5SDimitry Andric  let NumMicroOps = 8;
9700b57cec5SDimitry Andric}
9710b57cec5SDimitry Andricdef : InstRW<[PdWriteVPERM2F128rr], (instrs VPERM2F128rr)>;
9720b57cec5SDimitry Andric
9730b57cec5SDimitry Andricdef PdWriteVPERM2F128rm : SchedWriteRes<[PdFPU01, PdFPFMA]> {
9740b57cec5SDimitry Andric  let Latency = 8; // 4 + 4
9755f757f3fSDimitry Andric  let ReleaseAtCycles = [1, 8];
9760b57cec5SDimitry Andric  let NumMicroOps = 10;
9770b57cec5SDimitry Andric}
9780b57cec5SDimitry Andricdef : InstRW<[PdWriteVPERM2F128rm], (instrs VPERM2F128rm)>;
9790b57cec5SDimitry Andric
9800b57cec5SDimitry Andric////////////////////////////////////////////////////////////////////////////////
9810b57cec5SDimitry Andric// Conversions.
9820b57cec5SDimitry Andric////////////////////////////////////////////////////////////////////////////////
9830b57cec5SDimitry Andric
9840b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WriteCvtSS2I,   [PdFPU0, PdFPCVT, PdFPSTO, PdFPFMA, PdEX0], 13, [], 2>;
9850b57cec5SDimitry Andric
9860b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WriteCvtPS2I,   [PdFPU0, PdFPCVT, PdFPSTO], 4>;
9870b57cec5SDimitry Andricdefm : PdWriteResYMMPair<WriteCvtPS2IY,  [PdFPU0, PdFPCVT, PdFPSTO], 4, [1, 2, 1]>;
9880b57cec5SDimitry Andricdefm : X86WriteResPairUnsupported<WriteCvtPS2IZ>;
9890b57cec5SDimitry Andric
9900b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WriteCvtSD2I,   [PdFPU0, PdFPCVT, PdFPSTO, PdFPFMA, PdEX0], 13, [], 2>;
9910b57cec5SDimitry Andric
9920b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WriteCvtPD2I,   [PdFPU0, PdFPCVT, PdFPSTO],          8, [],        2>;
9930b57cec5SDimitry Andricdefm : PdWriteResYMMPair<WriteCvtPD2IY,  [PdFPU0, PdFPCVT, PdFPSTO, PdFPFMA], 8, [1, 2, 1, 1], 4>;
9940b57cec5SDimitry Andricdefm : X86WriteResPairUnsupported<WriteCvtPD2IZ>;
9950b57cec5SDimitry Andric
9960eae32dcSDimitry Andricdef PdWriteMMX_CVTTPD2PIrr : SchedWriteRes<[PdFPU0, PdFPCVT, PdFPSTO]> {
9970b57cec5SDimitry Andric  let Latency = 6;
9980b57cec5SDimitry Andric  let NumMicroOps = 2;
9990b57cec5SDimitry Andric}
10000eae32dcSDimitry Andricdef : InstRW<[PdWriteMMX_CVTTPD2PIrr], (instrs MMX_CVTTPD2PIrr)>;
10010b57cec5SDimitry Andric
10020b57cec5SDimitry Andric// FIXME: f+3 ST, LD+STC latency
10030b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WriteCvtI2SS,   [PdFPU0, PdFPCVT, PdFPSTO], 4, [], 2>;
10040b57cec5SDimitry Andric// FIXME: .Folded version is one NumMicroOp *less*..
10050b57cec5SDimitry Andric
10060b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WriteCvtI2PS,   [PdFPU0, PdFPCVT, PdFPSTO], 4>;
10070b57cec5SDimitry Andricdefm : PdWriteResYMMPair<WriteCvtI2PSY,  [PdFPU0, PdFPCVT, PdFPSTO], 4, [1, 2, 1]>;
10080b57cec5SDimitry Andricdefm : X86WriteResPairUnsupported<WriteCvtI2PSZ>;
10090b57cec5SDimitry Andric
10100b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WriteCvtI2SD,   [PdFPU0, PdFPCVT, PdFPSTO], 4, [], 2>;
10110b57cec5SDimitry Andric// FIXME: .Folded version is one NumMicroOp *less*..
10120b57cec5SDimitry Andric
10130b57cec5SDimitry Andricdef PdWriteCVTSI642SDrr_CVTSI642SSrr_CVTSI2SDr_CVTSI2SSrr : SchedWriteRes<[PdFPU0, PdFPCVT, PdFPSTO]> {
10140b57cec5SDimitry Andric  let Latency = 13;
10155f757f3fSDimitry Andric  let ReleaseAtCycles = [1, 3, 1];
10160b57cec5SDimitry Andric  let NumMicroOps = 2;
10170b57cec5SDimitry Andric}
10180b57cec5SDimitry Andricdef : InstRW<[PdWriteCVTSI642SDrr_CVTSI642SSrr_CVTSI2SDr_CVTSI2SSrr], (instrs CVTSI642SDrr, CVTSI642SSrr, CVTSI2SDrr, CVTSI2SSrr)>;
10190b57cec5SDimitry Andric
10200b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WriteCvtI2PD,   [PdFPU0, PdFPCVT, PdFPSTO], 8, [],     2>;
10210b57cec5SDimitry Andricdefm : PdWriteResYMMPair<WriteCvtI2PDY,  [PdFPU0, PdFPCVT, PdFPSTO], 8, [1, 2, 1], 4, 1>;
10220b57cec5SDimitry Andricdefm : X86WriteResPairUnsupported<WriteCvtI2PDZ>;
10230b57cec5SDimitry Andric
10240b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WriteCvtSS2SD,  [PdFPU0, PdFPCVT, PdFPSTO], 4, [1, 2, 1]>;
10250b57cec5SDimitry Andric
10260b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WriteCvtPS2PD,  [PdFPU0, PdFPCVT, PdFPSTO], 8, [],     2>;
10270b57cec5SDimitry Andricdefm : PdWriteResYMMPair<WriteCvtPS2PDY, [PdFPU0, PdFPCVT, PdFPSTO], 8, [1, 2, 1], 4, 1>;
10280b57cec5SDimitry Andricdefm : X86WriteResPairUnsupported<WriteCvtPS2PDZ>;
10290b57cec5SDimitry Andric
10300b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WriteCvtSD2SS,  [PdFPU0, PdFPCVT, PdFPSTO], 4, [1, 2, 1]>;
10310b57cec5SDimitry Andric
10320b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WriteCvtPD2PS,  [PdFPU0, PdFPCVT, PdFPSTO],          8, [],        2>;
10330b57cec5SDimitry Andricdefm : PdWriteResYMMPair<WriteCvtPD2PSY, [PdFPU0, PdFPCVT, PdFPSTO, PdFPFMA], 8, [1, 2, 1, 1], 4>;
10340b57cec5SDimitry Andricdefm : X86WriteResPairUnsupported<WriteCvtPD2PSZ>;
10350b57cec5SDimitry Andric
10360eae32dcSDimitry Andricdef PdWriteMMX_CVTPD2PIrrMMX_CVTPI2PDrr : SchedWriteRes<[PdFPU0, PdFPCVT, PdFPSTO]> {
10370b57cec5SDimitry Andric  let Latency = 6;
10380b57cec5SDimitry Andric  let NumMicroOps = 2;
10390b57cec5SDimitry Andric}
10400eae32dcSDimitry Andricdef : InstRW<[PdWriteMMX_CVTPD2PIrrMMX_CVTPI2PDrr], (instrs MMX_CVTPD2PIrr,
10410eae32dcSDimitry Andric                                                            MMX_CVTPI2PDrr)>;
10420b57cec5SDimitry Andric
10430eae32dcSDimitry Andricdef PdWriteMMX_CVTPI2PSrr : SchedWriteRes<[PdFPU0, PdFPCVT, PdFPSTO]> {
10440b57cec5SDimitry Andric  let Latency = 4;
10450b57cec5SDimitry Andric  let NumMicroOps = 2;
10460b57cec5SDimitry Andric}
10470eae32dcSDimitry Andricdef : InstRW<[PdWriteMMX_CVTPI2PSrr], (instrs MMX_CVTPI2PSrr)>;
10480b57cec5SDimitry Andric
10490b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WriteCvtPH2PS,  [PdFPU0, PdFPCVT, PdFPSTO], 8, [1, 2, 1], 2, 1>;
10500b57cec5SDimitry Andricdefm : PdWriteResYMMPair<WriteCvtPH2PSY, [PdFPU0, PdFPCVT, PdFPSTO], 8, [1, 2, 1], 4, 3>;
10510b57cec5SDimitry Andricdefm : X86WriteResPairUnsupported<WriteCvtPH2PSZ>;
10520b57cec5SDimitry Andric
10530b57cec5SDimitry Andricdefm : PdWriteRes<WriteCvtPS2PH,        [PdFPU0, PdFPCVT, PdFPSTO],          8, [1, 2, 1],    2>;
10540b57cec5SDimitry Andricdefm : PdWriteRes<WriteCvtPS2PHY,       [PdFPU0, PdFPCVT, PdFPSTO, PdFPFMA], 8, [1, 2, 1, 1], 4>;
10550b57cec5SDimitry Andricdefm : X86WriteResUnsupported<WriteCvtPS2PHZ>;
10560b57cec5SDimitry Andric
10570b57cec5SDimitry Andricdefm : PdWriteRes<WriteCvtPS2PHSt,      [PdFPU0, PdFPCVT, PdFPSTO, PdStore],          4, [1, 2, 1, 1],    3>;
10580b57cec5SDimitry Andricdefm : PdWriteRes<WriteCvtPS2PHYSt,     [PdFPU0, PdFPCVT, PdFPSTO, PdFPFMA, PdStore], 4, [1, 2, 1, 1, 1], 4>;
10590b57cec5SDimitry Andricdefm : X86WriteResUnsupported<WriteCvtPS2PHZSt>;
10600b57cec5SDimitry Andric
10610b57cec5SDimitry Andric////////////////////////////////////////////////////////////////////////////////
10620b57cec5SDimitry Andric// Vector integer operations.
10630b57cec5SDimitry Andric////////////////////////////////////////////////////////////////////////////////
10640b57cec5SDimitry Andric
10650b57cec5SDimitry Andricdefm : PdWriteRes<WriteVecLoad,             [PdLoad, PdFPU01, PdFPMAL], 5, [3, 1, 3]>;
10660b57cec5SDimitry Andricdefm : PdWriteRes<WriteVecLoadX,            [PdLoad, PdFPU01, PdFPMAL], 5, [3, 1, 3]>;
10670b57cec5SDimitry Andricdefm : PdWriteRes<WriteVecLoadY,            [PdLoad, PdFPU01, PdFPMAL], 5, [3, 2, 3], 2>;
10680b57cec5SDimitry Andric
10690b57cec5SDimitry Andricdefm : PdWriteRes<WriteVecLoadNT,           [PdLoad, PdFPU01, PdFPMAL], 5, [3, 1, 4]>;
10700b57cec5SDimitry Andricdefm : PdWriteRes<WriteVecLoadNTY,          [PdLoad, PdFPU01, PdFPMAL], 5, [3, 2, 4]>;
10710b57cec5SDimitry Andric
10720b57cec5SDimitry Andricdefm : PdWriteRes<WriteVecMaskedLoad,       [PdLoad, PdFPU01, PdFPMAL], 6, [3, 1, 2]>;
10730b57cec5SDimitry Andricdefm : PdWriteRes<WriteVecMaskedLoadY,      [PdLoad, PdFPU01, PdFPMAL], 6, [3, 2, 4], 2>;
10740b57cec5SDimitry Andric
10750b57cec5SDimitry Andricdefm : PdWriteRes<WriteVecStore,            [PdStore, PdFPU23, PdFPSTO], 2, [1, 3,  1]>;
10760b57cec5SDimitry Andricdefm : PdWriteRes<WriteVecStoreX,           [PdStore, PdFPU23, PdFPSTO], 1, [1, 3,  1]>;
10770b57cec5SDimitry Andricdefm : PdWriteRes<WriteVecStoreY,           [PdStore, PdFPU23, PdFPSTO], 1, [2, 36, 2], 4>;
10780b57cec5SDimitry Andric
10790b57cec5SDimitry Andricdef PdWriteVMOVDQUYmr : SchedWriteRes<[PdStore, PdFPU1,   PdFPSTO]> {
10800b57cec5SDimitry Andric  let NumMicroOps = 8;
10810b57cec5SDimitry Andric}
10820b57cec5SDimitry Andricdef : InstRW<[PdWriteVMOVDQUYmr], (instrs VMOVDQUYmr)>;
10830b57cec5SDimitry Andric
10840b57cec5SDimitry Andricdefm : PdWriteRes<WriteVecStoreNT,          [PdStore, PdFPU1,   PdFPSTO], 2>;
10850b57cec5SDimitry Andricdefm : PdWriteRes<WriteVecStoreNTY,         [PdStore, PdFPU1,   PdFPSTO], 2, [2, 2, 2], 4>;
10860b57cec5SDimitry Andric
10875ffd83dbSDimitry Andricdefm : X86WriteResUnsupported<WriteVecMaskedStore32>;
10885ffd83dbSDimitry Andricdefm : X86WriteResUnsupported<WriteVecMaskedStore32Y>;
10895ffd83dbSDimitry Andricdefm : X86WriteResUnsupported<WriteVecMaskedStore64>;
10905ffd83dbSDimitry Andricdefm : X86WriteResUnsupported<WriteVecMaskedStore64Y>;
10910b57cec5SDimitry Andric
10920b57cec5SDimitry Andricdefm : PdWriteRes<WriteVecMove,             [PdFPU01, PdFPMAL], 2>;
10930b57cec5SDimitry Andricdefm : PdWriteRes<WriteVecMoveX,            [PdFPU01, PdFPMAL], 1, [1, 2]>;
10940b57cec5SDimitry Andricdefm : PdWriteRes<WriteVecMoveY,            [PdFPU01, PdFPMAL], 2, [2, 2], 2>;
109504eeddc0SDimitry Andricdefm : X86WriteResUnsupported<WriteVecMoveZ>;
10960b57cec5SDimitry Andric
10970b57cec5SDimitry Andricdef PdWriteMOVDQArr : SchedWriteRes<[PdFPU01, PdFPMAL]> {
10980b57cec5SDimitry Andric}
10990b57cec5SDimitry Andricdef : InstRW<[PdWriteMOVDQArr], (instrs MOVDQArr)>;
11000b57cec5SDimitry Andric
11010b57cec5SDimitry Andricdef PdWriteMOVQ2DQrr : SchedWriteRes<[PdFPU01, PdFPMAL]> {
11020b57cec5SDimitry Andric  let Latency = 4;
11030b57cec5SDimitry Andric}
11040b57cec5SDimitry Andricdef : InstRW<[PdWriteMOVQ2DQrr], (instrs MMX_MOVQ2DQrr)>;
11050b57cec5SDimitry Andric
11060b57cec5SDimitry Andricdefm : PdWriteRes<WriteVecMoveToGpr,        [PdFPU0, PdFPFMA, PdEX0], 11>;
11070b57cec5SDimitry Andricdefm : PdWriteRes<WriteVecMoveFromGpr,      [PdFPU01, PdFPFMA], 11, [1, 2], 2>;
11080b57cec5SDimitry Andric
1109bdd1243dSDimitry Andricdefm : PdWriteResXMMPair<WriteVecALU,        [PdFPU23, PdFPMAL], 2>;
1110bdd1243dSDimitry Andricdefm : PdWriteResXMMPair<WriteVecALUX,       [PdFPU23, PdFPMAL], 2>;
11110b57cec5SDimitry Andricdefm : X86WriteResPairUnsupported<WriteVecALUY>;
11120b57cec5SDimitry Andricdefm : X86WriteResPairUnsupported<WriteVecALUZ>;
11130b57cec5SDimitry Andric
1114bdd1243dSDimitry Andricdefm : PdWriteResXMMPair<WriteVecShift,      [PdFPU1, PdFPXBR], 3>;
1115bdd1243dSDimitry Andricdefm : PdWriteResXMMPair<WriteVecShiftX,     [PdFPU1, PdFPXBR], 3>;
11160b57cec5SDimitry Andricdefm : X86WriteResPairUnsupported<WriteVecShiftY>;
11170b57cec5SDimitry Andricdefm : X86WriteResPairUnsupported<WriteVecShiftZ>;
11180b57cec5SDimitry Andric
1119bdd1243dSDimitry Andricdefm : PdWriteResXMMPair<WriteVecShiftImm,   [PdFPU1, PdFPXBR], 2>;
1120bdd1243dSDimitry Andricdefm : PdWriteResXMMPair<WriteVecShiftImmX,  [PdFPU1, PdFPXBR], 2>;
11210b57cec5SDimitry Andricdefm : X86WriteResPairUnsupported<WriteVecShiftImmY>;
11220b57cec5SDimitry Andricdefm : X86WriteResPairUnsupported<WriteVecShiftImmZ>;
11230b57cec5SDimitry Andric
11240b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WriteVecIMul,       [PdFPU0, PdFPMMA], 4>;
11250b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WriteVecIMulX,      [PdFPU0, PdFPMMA], 4>;
11260b57cec5SDimitry Andricdefm : X86WriteResPairUnsupported<WriteVecIMulY>;
11270b57cec5SDimitry Andricdefm : X86WriteResPairUnsupported<WriteVecIMulZ>;
11280b57cec5SDimitry Andric
11290b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WritePMULLD,        [PdFPU0, PdFPU01, PdFPMMA, PdFPMAL], 5, [2, 1, 2, 1]>;
11300b57cec5SDimitry Andricdefm : X86WriteResPairUnsupported<WritePMULLDY>;
11310b57cec5SDimitry Andricdefm : X86WriteResPairUnsupported<WritePMULLDZ>;
11320b57cec5SDimitry Andric
11330b57cec5SDimitry Andricdef PdWriteVPMACS : SchedWriteRes<[PdFPU0, PdFPMMA, PdFPMAL]> {
11340b57cec5SDimitry Andric  let Latency = 4;
11350b57cec5SDimitry Andric}
11360b57cec5SDimitry Andricdef : InstRW<[PdWriteVPMACS], (instrs VPMACSDQHrr, VPMACSDQLrr, VPMACSSDQHrr,
11370b57cec5SDimitry Andric                                      VPMACSSDQLrr)>;
11380b57cec5SDimitry Andric
1139bdd1243dSDimitry Andric// FIXME: Investigate RR vs RM differences.
1140bdd1243dSDimitry Andricdefm : PdWriteRes<WriteMPSAD,   [PdFPU0, PdFPMMA], 8, [1, 4], 8>;
1141bdd1243dSDimitry Andricdefm : PdWriteRes<WriteMPSADLd, [PdFPU0, PdFPMMA, PdLoad], 14, [1, 4, 3],  8>;
11420b57cec5SDimitry Andricdefm : X86WriteResPairUnsupported<WriteMPSADY>;
11430b57cec5SDimitry Andricdefm : X86WriteResPairUnsupported<WriteMPSADZ>;
11440b57cec5SDimitry Andric
11450b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WritePSADBW,        [PdFPU01, PdFPMAL], 4, [1, 2], 2>;
11460b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WritePSADBWX,       [PdFPU01, PdFPMAL], 4, [1, 2], 2>;
11470b57cec5SDimitry Andricdefm : X86WriteResPairUnsupported<WritePSADBWY>;
11480b57cec5SDimitry Andricdefm : X86WriteResPairUnsupported<WritePSADBWZ>;
11490b57cec5SDimitry Andric
11500b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WritePHMINPOS,      [PdFPU0,  PdFPMAL], 4, [], 2>;
11510b57cec5SDimitry Andric
1152bdd1243dSDimitry Andricdefm : PdWriteResXMMPair<WriteShuffle,       [PdFPU1, PdFPXBR], 2>;
1153bdd1243dSDimitry Andricdefm : PdWriteResXMMPair<WriteShuffleX,      [PdFPU1, PdFPXBR], 2>;
1154bdd1243dSDimitry Andricdefm : PdWriteResYMMPair<WriteShuffleY,      [PdFPU1, PdFPXBR], 2, [2, 2]>;
11550b57cec5SDimitry Andricdefm : X86WriteResPairUnsupported<WriteShuffleZ>;
11560b57cec5SDimitry Andric
1157bdd1243dSDimitry Andricdefm : PdWriteResXMMPair<WriteVarShuffle,    [PdFPU1, PdFPXBR], 3>;
1158bdd1243dSDimitry Andricdefm : PdWriteResXMMPair<WriteVarShuffleX,   [PdFPU1, PdFPXBR], 3>;
11590b57cec5SDimitry Andricdefm : X86WriteResPairUnsupported<WriteVarShuffleY>;
11600b57cec5SDimitry Andricdefm : X86WriteResPairUnsupported<WriteVarShuffleZ>;
11610b57cec5SDimitry Andric
1162bdd1243dSDimitry Andricdef PdWriteVPPERM : SchedWriteRes<[PdFPU1, PdFPXBR]> {
11630b57cec5SDimitry Andric  let Latency = 2;
11645f757f3fSDimitry Andric  let ReleaseAtCycles = [1, 1];
11650b57cec5SDimitry Andric}
11660b57cec5SDimitry Andricdef : InstRW<[PdWriteVPPERM], (instrs VPPERMrrr, VPPERMrrr_REV)>;
11670b57cec5SDimitry Andric
1168bdd1243dSDimitry Andricdef PdWriteVPPERMLd : SchedWriteRes<[PdFPU1, PdFPXBR, PdLoad]> {
1169bdd1243dSDimitry Andric  let Latency = 7;
11705f757f3fSDimitry Andric  let ReleaseAtCycles = [1, 1, 3];
1171bdd1243dSDimitry Andric}
1172bdd1243dSDimitry Andricdef : InstRW<[PdWriteVPPERMLd], (instrs VPPERMrrm, VPPERMrmr)>;
1173bdd1243dSDimitry Andric
1174bdd1243dSDimitry Andricdefm : PdWriteResXMMPair<WriteBlend,         [PdFPU23, PdFPMAL], 2>;
11750b57cec5SDimitry Andricdefm : X86WriteResPairUnsupported<WriteBlendY>;
11760b57cec5SDimitry Andricdefm : X86WriteResPairUnsupported<WriteBlendZ>;
11770b57cec5SDimitry Andric
1178bdd1243dSDimitry Andricdefm : PdWriteResXMMPair<WriteVarBlend,      [PdFPU1, PdFPXBR], 2>;
11790b57cec5SDimitry Andricdefm : X86WriteResPairUnsupported<WriteVarBlendY>;
11800b57cec5SDimitry Andricdefm : X86WriteResPairUnsupported<WriteVarBlendZ>;
11810b57cec5SDimitry Andric
1182bdd1243dSDimitry Andricdefm : PdWriteResXMMPair<WriteVecLogic,      [PdFPU23, PdFPMAL], 2>;
1183bdd1243dSDimitry Andricdefm : PdWriteResXMMPair<WriteVecLogicX,     [PdFPU23, PdFPMAL], 2>;
11840b57cec5SDimitry Andricdefm : X86WriteResPairUnsupported<WriteVecLogicY>;
11850b57cec5SDimitry Andricdefm : X86WriteResPairUnsupported<WriteVecLogicZ>;
11860b57cec5SDimitry Andric
11870b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WriteVecTest,       [PdFPU0, PdFPFMA, PdEX0],  1, [], 2>;
11880b57cec5SDimitry Andricdefm : PdWriteResYMMPair<WriteVecTestY,      [PdFPU01, PdFPFMA, PdEX0], 1, [2, 4, 1], 4, 2>;
11890b57cec5SDimitry Andricdefm : X86WriteResPairUnsupported<WriteVecTestZ>;
11900b57cec5SDimitry Andric
11910b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WriteShuffle256,    [PdFPU01, PdFPMAL]>;
1192fe6060f1SDimitry Andricdefm : PdWriteResXMMPair<WriteVPMOV256,      [PdFPU01, PdFPMAL]>;
11930b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WriteVarShuffle256, [PdFPU01, PdFPMAL]>;
11940b57cec5SDimitry Andric
1195bdd1243dSDimitry Andricdefm : PdWriteResXMMPair<WriteVarVecShift,   [PdFPU1, PdFPXBR], 3>;
11960b57cec5SDimitry Andricdefm : X86WriteResPairUnsupported<WriteVarVecShiftY>;
11970b57cec5SDimitry Andricdefm : X86WriteResPairUnsupported<WriteVarVecShiftZ>;
11980b57cec5SDimitry Andric
11990b57cec5SDimitry Andric////////////////////////////////////////////////////////////////////////////////
12000b57cec5SDimitry Andric// Vector insert/extract operations.
12010b57cec5SDimitry Andric////////////////////////////////////////////////////////////////////////////////
12020b57cec5SDimitry Andric
12030b57cec5SDimitry Andricdefm : PdWriteRes<WriteVecInsert,    [PdFPU01, PdFPMAL], 2, [1, 3], 2>;
12040b57cec5SDimitry Andricdefm : PdWriteRes<WriteVecInsertLd,  [PdFPU01, PdFPMAL, PdLoad], 6, [1, 4, 3], 2>;
12050b57cec5SDimitry Andric
12060b57cec5SDimitry Andricdefm : PdWriteRes<WriteVecExtract,   [PdFPU0, PdFPFMA, PdEX0], 12, [1, 3, 1], 2>;
12070b57cec5SDimitry Andricdefm : PdWriteRes<WriteVecExtractSt, [PdFPU1, PdFPSTO, PdStore], 13, [2, 1, 1], 2>;
12080b57cec5SDimitry Andric
12090b57cec5SDimitry Andricdef PdWriteEXTRQ : SchedWriteRes<[PdFPU01, PdFPMAL]> {
12100b57cec5SDimitry Andric  let Latency = 3;
12115f757f3fSDimitry Andric  let ReleaseAtCycles = [1, 3];
12120b57cec5SDimitry Andric}
12130b57cec5SDimitry Andricdef : InstRW<[PdWriteEXTRQ], (instrs EXTRQ, EXTRQI)>;
12140b57cec5SDimitry Andric
12150b57cec5SDimitry Andric////////////////////////////////////////////////////////////////////////////////
12160b57cec5SDimitry Andric// SSE42 String instructions.
12170b57cec5SDimitry Andric////////////////////////////////////////////////////////////////////////////////
12180b57cec5SDimitry Andric
12190b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WritePCmpIStrI, [PdFPU1, PdFPFMA, PdEX0], 11, [1, 6, 1], 7, 1>;
12200b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WritePCmpIStrM, [PdFPU1, PdFPFMA, PdEX0],  7, [1, 8, 1], 7, 2>;
12210b57cec5SDimitry Andric
12220b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WritePCmpEStrI, [PdFPU1, PdStore, PdLoad, PdFPMAL, PdFPFMA, PdEX0], 14, [1, 10, 10, 10, 1, 1], 27, 1>;
12230b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WritePCmpEStrM, [PdFPU1, PdStore, PdLoad, PdFPMAL, PdFPFMA, PdEX0], 10, [1, 10, 10, 10, 1, 1], 27, 1>;
12240b57cec5SDimitry Andric
12250b57cec5SDimitry Andric////////////////////////////////////////////////////////////////////////////////
12260b57cec5SDimitry Andric// MOVMSK Instructions.
12270b57cec5SDimitry Andric////////////////////////////////////////////////////////////////////////////////
12280b57cec5SDimitry Andric
12290b57cec5SDimitry Andricdefm : PdWriteRes<WriteFMOVMSK, [PdFPU0, PdFPFMA, PdEX0],   12, [], 2>;
12300b57cec5SDimitry Andric
12310b57cec5SDimitry Andricdefm : PdWriteRes<WriteVecMOVMSK, [PdFPU0, PdFPFMA, PdEX0], 12, [], 2>;
12320b57cec5SDimitry Andricdefm : X86WriteResUnsupported<WriteVecMOVMSKY>;
12330b57cec5SDimitry Andric// defm : X86WriteResUnsupported<WriteVecMOVMSKZ>;
12340b57cec5SDimitry Andric
12350b57cec5SDimitry Andricdefm : PdWriteRes<WriteMMXMOVMSK, [PdFPU0, PdFPFMA, PdEX0], 10, [], 2>;
12360b57cec5SDimitry Andric
12370b57cec5SDimitry Andric////////////////////////////////////////////////////////////////////////////////
12380b57cec5SDimitry Andric// AES Instructions.
12390b57cec5SDimitry Andric////////////////////////////////////////////////////////////////////////////////
12400b57cec5SDimitry Andric
12410b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WriteAESIMC,    [PdFPU0, PdFPMMA], 5>;
12420b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WriteAESKeyGen, [PdFPU0, PdFPMMA], 5>;
12430b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WriteAESDecEnc, [PdFPU0, PdFPMMA], 9, [], 2>;
12440b57cec5SDimitry Andric
12450b57cec5SDimitry Andric////////////////////////////////////////////////////////////////////////////////
12460b57cec5SDimitry Andric// Horizontal add/sub  instructions.
12470b57cec5SDimitry Andric////////////////////////////////////////////////////////////////////////////////
12480b57cec5SDimitry Andric
12490b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WriteFHAdd,  [PdFPU0, PdFPFMA], 11, [1, 5],     3, 1>;
12500b57cec5SDimitry Andricdefm : PdWriteResYMMPair<WriteFHAddY, [PdFPU0, PdFPFMA], 11, [1, 8], 8, 2>;
12510b57cec5SDimitry Andricdefm : X86WriteResPairUnsupported<WriteFHAddZ>;
12520b57cec5SDimitry Andric
12530b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WritePHAdd,  [PdFPU01, PdFPMAL], 5, [1, 4], 3, 1>;
12540b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WritePHAddX, [PdFPU01, PdFPMAL], 2, [1, 2]>;
12550b57cec5SDimitry Andricdefm : X86WriteResPairUnsupported<WritePHAddY>;
12560b57cec5SDimitry Andricdefm : X86WriteResPairUnsupported<WritePHAddZ>;
12570b57cec5SDimitry Andric
12580b57cec5SDimitry Andricdef : InstRW<[WritePHAdd], (instrs PHADDDrr, PHSUBDrr,
12590b57cec5SDimitry Andric                                   PHADDWrr, PHSUBWrr,
12600b57cec5SDimitry Andric                                   PHADDSWrr, PHSUBSWrr,
12610b57cec5SDimitry Andric                                   VPHADDDrr, VPHSUBDrr,
12620b57cec5SDimitry Andric                                   VPHADDWrr, VPHSUBWrr,
12630b57cec5SDimitry Andric                                   VPHADDSWrr, VPHSUBSWrr)>;
12640b57cec5SDimitry Andric
12650b57cec5SDimitry Andricdef : InstRW<[WritePHAdd.Folded], (instrs PHADDDrm, PHSUBDrm,
12660b57cec5SDimitry Andric                                          PHADDWrm, PHSUBWrm,
12670b57cec5SDimitry Andric                                          PHADDSWrm, PHSUBSWrm,
12680b57cec5SDimitry Andric                                          VPHADDDrm, VPHSUBDrm,
12690b57cec5SDimitry Andric                                          VPHADDWrm, VPHSUBWrm,
12700b57cec5SDimitry Andric                                          VPHADDSWrm, VPHSUBSWrm)>;
12710b57cec5SDimitry Andric
12720b57cec5SDimitry Andric////////////////////////////////////////////////////////////////////////////////
12730b57cec5SDimitry Andric// Carry-less multiplication instructions.
12740b57cec5SDimitry Andric////////////////////////////////////////////////////////////////////////////////
12750b57cec5SDimitry Andric
12760b57cec5SDimitry Andricdefm : PdWriteResXMMPair<WriteCLMul, [PdFPU0, PdFPMMA], 12, [1, 7], 5, 1>;
12770b57cec5SDimitry Andric
1278*0fca6ea1SDimitry Andricdef PdWriteVPCLMULQDQrri : SchedWriteRes<[PdFPU0, PdFPMMA]> {
12790b57cec5SDimitry Andric  let Latency = 12;
12805f757f3fSDimitry Andric  let ReleaseAtCycles = [1, 7];
12810b57cec5SDimitry Andric  let NumMicroOps = 6;
12820b57cec5SDimitry Andric}
1283*0fca6ea1SDimitry Andricdef : InstRW<[PdWriteVPCLMULQDQrri], (instrs VPCLMULQDQrri)>;
12840b57cec5SDimitry Andric
12850b57cec5SDimitry Andric////////////////////////////////////////////////////////////////////////////////
12860b57cec5SDimitry Andric// SSE4A instructions.
12870b57cec5SDimitry Andric////////////////////////////////////////////////////////////////////////////////
12880b57cec5SDimitry Andric
12890b57cec5SDimitry Andricdef PdWriteINSERTQ : SchedWriteRes<[PdFPU01, PdFPMAL]> {
12900b57cec5SDimitry Andric  let Latency = 3;
12915f757f3fSDimitry Andric  let ReleaseAtCycles = [1, 2];
12920b57cec5SDimitry Andric}
12930b57cec5SDimitry Andricdef : InstRW<[PdWriteINSERTQ], (instrs INSERTQ)>;
12940b57cec5SDimitry Andric
12950b57cec5SDimitry Andricdef PdWriteINSERTQI : SchedWriteRes<[PdFPU01, PdFPMAL]> {
12960b57cec5SDimitry Andric  let Latency = 3;
12975f757f3fSDimitry Andric  let ReleaseAtCycles = [1, 3];
12980b57cec5SDimitry Andric}
12990b57cec5SDimitry Andricdef : InstRW<[PdWriteINSERTQI], (instrs INSERTQI)>;
13000b57cec5SDimitry Andric
13010b57cec5SDimitry Andric////////////////////////////////////////////////////////////////////////////////
13020b57cec5SDimitry Andric// AVX instructions.
13030b57cec5SDimitry Andric////////////////////////////////////////////////////////////////////////////////
13040b57cec5SDimitry Andric
13050b57cec5SDimitry Andricdef PdWriteVBROADCASTYLd : SchedWriteRes<[PdLoad, PdFPU01, PdFPFMA]> {
13060b57cec5SDimitry Andric  let Latency = 6;
13075f757f3fSDimitry Andric  let ReleaseAtCycles = [1, 2, 4];
13080b57cec5SDimitry Andric  let NumMicroOps = 2;
13090b57cec5SDimitry Andric}
13100b57cec5SDimitry Andricdef : InstRW<[PdWriteVBROADCASTYLd, ReadAfterLd], (instrs VBROADCASTSDYrm,
13110b57cec5SDimitry Andric                                                          VBROADCASTSSYrm)>;
13120b57cec5SDimitry Andric
13130b57cec5SDimitry Andricdef PdWriteVZEROALL : SchedWriteRes<[]> {
13140b57cec5SDimitry Andric  let Latency = 90;
13150b57cec5SDimitry Andric  let NumMicroOps = 32;
13160b57cec5SDimitry Andric}
13170b57cec5SDimitry Andricdef : InstRW<[PdWriteVZEROALL], (instrs VZEROALL)>;
13180b57cec5SDimitry Andric
13190b57cec5SDimitry Andricdef PdWriteVZEROUPPER : SchedWriteRes<[]> {
13200b57cec5SDimitry Andric  let Latency = 46;
13210b57cec5SDimitry Andric  let NumMicroOps = 16;
13220b57cec5SDimitry Andric}
13230b57cec5SDimitry Andricdef : InstRW<[PdWriteVZEROUPPER], (instrs VZEROUPPER)>;
13240b57cec5SDimitry Andric
13250b57cec5SDimitry Andric///////////////////////////////////////////////////////////////////////////////
13260b57cec5SDimitry Andric//  SchedWriteVariant definitions.
13270b57cec5SDimitry Andric///////////////////////////////////////////////////////////////////////////////
13280b57cec5SDimitry Andric
13290b57cec5SDimitry Andricdef PdWriteZeroLatency : SchedWriteRes<[]> {
13300b57cec5SDimitry Andric  let Latency = 0;
13310b57cec5SDimitry Andric}
13320b57cec5SDimitry Andric
13330b57cec5SDimitry Andricdef PdWriteZeroIdiom : SchedWriteVariant<[
13340b57cec5SDimitry Andric  SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [PdWriteZeroLatency]>,
13350b57cec5SDimitry Andric  SchedVar<MCSchedPredicate<TruePred>,           [WriteALU]>
13360b57cec5SDimitry Andric]>;
13370b57cec5SDimitry Andricdef : InstRW<[PdWriteZeroIdiom], (instrs SUB32rr, SUB64rr,
13380b57cec5SDimitry Andric                                         XOR32rr, XOR64rr)>;
13390b57cec5SDimitry Andric
13400b57cec5SDimitry Andricdef PdWriteFZeroIdiom : SchedWriteVariant<[
13410b57cec5SDimitry Andric  SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [PdWriteZeroLatency]>,
13420b57cec5SDimitry Andric  SchedVar<MCSchedPredicate<TruePred>,           [WriteFLogic]>
13430b57cec5SDimitry Andric]>;
13440b57cec5SDimitry Andricdef : InstRW<[PdWriteFZeroIdiom], (instrs XORPSrr,  VXORPSrr,
13450b57cec5SDimitry Andric                                          XORPDrr,  VXORPDrr,
13460b57cec5SDimitry Andric                                          ANDNPSrr, VANDNPSrr,
13470b57cec5SDimitry Andric                                          ANDNPDrr, VANDNPDrr)>;
13480b57cec5SDimitry Andric
13490b57cec5SDimitry Andric// VXORPSYrr, VXORPDYrr, VANDNPSYrr, VANDNPDYrr "zero-idioms" have latency of 1.
13500b57cec5SDimitry Andric
13510b57cec5SDimitry Andricdef PdWriteVZeroIdiomLogic : SchedWriteVariant<[
13520b57cec5SDimitry Andric  SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [PdWriteZeroLatency]>,
13530b57cec5SDimitry Andric  SchedVar<MCSchedPredicate<TruePred>,           [WriteVecLogic]>
13540b57cec5SDimitry Andric]>;
13550eae32dcSDimitry Andricdef : InstRW<[PdWriteVZeroIdiomLogic], (instrs MMX_PXORrr, MMX_PANDNrr)>;
13560b57cec5SDimitry Andric
13570b57cec5SDimitry Andricdef PdWriteVZeroIdiomLogicX : SchedWriteVariant<[
13580b57cec5SDimitry Andric  SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [PdWriteZeroLatency]>,
13590b57cec5SDimitry Andric  SchedVar<MCSchedPredicate<TruePred>,           [WriteVecLogicX]>
13600b57cec5SDimitry Andric]>;
13610b57cec5SDimitry Andricdef : InstRW<[PdWriteVZeroIdiomLogicX], (instrs PXORrr,  VPXORrr,
13620b57cec5SDimitry Andric                                                PANDNrr, VPANDNrr)>;
13630b57cec5SDimitry Andric
13640b57cec5SDimitry Andricdef PdWriteVZeroIdiomALU : SchedWriteVariant<[
13650b57cec5SDimitry Andric  SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [PdWriteZeroLatency]>,
13660b57cec5SDimitry Andric  SchedVar<MCSchedPredicate<TruePred>,           [WriteVecALU]>
13670b57cec5SDimitry Andric]>;
13680eae32dcSDimitry Andricdef : InstRW<[PdWriteVZeroIdiomALU], (instrs MMX_PSUBBrr,   MMX_PSUBDrr,
13690eae32dcSDimitry Andric                                             MMX_PSUBQrr,   MMX_PSUBWrr,
13700eae32dcSDimitry Andric                                             MMX_PCMPGTBrr,
13710eae32dcSDimitry Andric                                             MMX_PCMPGTDrr,
13720eae32dcSDimitry Andric                                             MMX_PCMPGTWrr)>;
13730b57cec5SDimitry Andric
13740b57cec5SDimitry Andricdef PdWriteVZeroIdiomALUX : SchedWriteVariant<[
13750b57cec5SDimitry Andric    SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [PdWriteZeroLatency]>,
13760b57cec5SDimitry Andric    SchedVar<MCSchedPredicate<TruePred>,           [WriteVecALUX]>
13770b57cec5SDimitry Andric]>;
13780b57cec5SDimitry Andricdef : InstRW<[PdWriteVZeroIdiomALUX], (instrs PSUBBrr, VPSUBBrr,
13790b57cec5SDimitry Andric                                              PSUBDrr, VPSUBDrr,
13800b57cec5SDimitry Andric                                              PSUBQrr, VPSUBQrr,
13810b57cec5SDimitry Andric                                              PSUBWrr, VPSUBWrr,
13820b57cec5SDimitry Andric                                              PCMPGTBrr, VPCMPGTBrr,
13830b57cec5SDimitry Andric                                              PCMPGTDrr, VPCMPGTDrr,
13840b57cec5SDimitry Andric                                              PCMPGTWrr, VPCMPGTWrr)>;
13850b57cec5SDimitry Andric
13860b57cec5SDimitry Andric///////////////////////////////////////////////////////////////////////////////
13870b57cec5SDimitry Andric// Dependency breaking instructions.
13880b57cec5SDimitry Andric///////////////////////////////////////////////////////////////////////////////
13890b57cec5SDimitry Andric
13900b57cec5SDimitry Andric// VPCMPGTQ, but not PCMPGTQ!
13910b57cec5SDimitry Andric
13920b57cec5SDimitry Andricdef : IsZeroIdiomFunction<[
13930b57cec5SDimitry Andric  // GPR Zero-idioms.
13940b57cec5SDimitry Andric  DepBreakingClass<[ SUB32rr, SUB64rr, XOR32rr, XOR64rr ], ZeroIdiomPredicate>,
13950b57cec5SDimitry Andric
13960b57cec5SDimitry Andric  // MMX Zero-idioms.
13970b57cec5SDimitry Andric  DepBreakingClass<[
13980eae32dcSDimitry Andric    MMX_PXORrr, MMX_PANDNrr, MMX_PSUBBrr,
13990eae32dcSDimitry Andric    MMX_PSUBDrr, MMX_PSUBQrr, MMX_PSUBWrr,
14000eae32dcSDimitry Andric    MMX_PSUBSBrr, MMX_PSUBSWrr, MMX_PSUBUSBrr, MMX_PSUBUSWrr,
14010eae32dcSDimitry Andric    MMX_PCMPGTBrr, MMX_PCMPGTDrr, MMX_PCMPGTWrr
14020b57cec5SDimitry Andric  ], ZeroIdiomPredicate>,
14030b57cec5SDimitry Andric
14040b57cec5SDimitry Andric  // SSE Zero-idioms.
14050b57cec5SDimitry Andric  DepBreakingClass<[
14060b57cec5SDimitry Andric    // fp variants.
14070b57cec5SDimitry Andric    XORPSrr, XORPDrr, ANDNPSrr, ANDNPDrr,
14080b57cec5SDimitry Andric
14090b57cec5SDimitry Andric    // int variants.
14100b57cec5SDimitry Andric    PXORrr, PANDNrr,
14110b57cec5SDimitry Andric    PSUBBrr, PSUBWrr, PSUBDrr, PSUBQrr,
14120b57cec5SDimitry Andric    PSUBSBrr, PSUBSWrr, PSUBUSBrr, PSUBUSWrr,
14130b57cec5SDimitry Andric    PCMPGTBrr, PCMPGTDrr, PCMPGTWrr
14140b57cec5SDimitry Andric  ], ZeroIdiomPredicate>,
14150b57cec5SDimitry Andric
14160b57cec5SDimitry Andric  // AVX Zero-idioms.
14170b57cec5SDimitry Andric  DepBreakingClass<[
14180b57cec5SDimitry Andric    // xmm fp variants.
14190b57cec5SDimitry Andric    VXORPSrr, VXORPDrr, VANDNPSrr, VANDNPDrr,
14200b57cec5SDimitry Andric
14210b57cec5SDimitry Andric    // xmm int variants.
14220b57cec5SDimitry Andric    VPXORrr, VPANDNrr,
14230b57cec5SDimitry Andric    VPSUBBrr, VPSUBWrr, VPSUBDrr, VPSUBQrr,
14240b57cec5SDimitry Andric    VPSUBSBrr, VPSUBSWrr, VPSUBUSBrr, VPSUBUSWrr,
14250b57cec5SDimitry Andric    VPCMPGTBrr, VPCMPGTWrr, VPCMPGTDrr, VPCMPGTQrr,
14260b57cec5SDimitry Andric
14270b57cec5SDimitry Andric    // ymm variants.
14280b57cec5SDimitry Andric    VXORPSYrr, VXORPDYrr, VANDNPSYrr, VANDNPDYrr
14290b57cec5SDimitry Andric  ], ZeroIdiomPredicate>
14300b57cec5SDimitry Andric]>;
14310b57cec5SDimitry Andric
14320b57cec5SDimitry Andricdef : IsDepBreakingFunction<[
14330b57cec5SDimitry Andric  // GPR
14340b57cec5SDimitry Andric  DepBreakingClass<[ SBB32rr, SBB64rr ], ZeroIdiomPredicate>,
14350b57cec5SDimitry Andric  DepBreakingClass<[ CMP32rr, CMP64rr ], CheckSameRegOperand<0, 1> >,
14360b57cec5SDimitry Andric
14370b57cec5SDimitry Andric  // MMX
14380b57cec5SDimitry Andric  DepBreakingClass<[
14390eae32dcSDimitry Andric    MMX_PCMPEQBrr, MMX_PCMPEQDrr, MMX_PCMPEQWrr
14400b57cec5SDimitry Andric  ], ZeroIdiomPredicate>,
14410b57cec5SDimitry Andric
14420b57cec5SDimitry Andric  // SSE
14430b57cec5SDimitry Andric  DepBreakingClass<[
14440b57cec5SDimitry Andric    PCMPEQBrr, PCMPEQWrr, PCMPEQDrr
14450b57cec5SDimitry Andric    // But not PCMPEQQrr.
14460b57cec5SDimitry Andric  ], ZeroIdiomPredicate>,
14470b57cec5SDimitry Andric
14480b57cec5SDimitry Andric  // AVX
14490b57cec5SDimitry Andric  DepBreakingClass<[
14500b57cec5SDimitry Andric    VPCMPEQBrr, VPCMPEQWrr, VPCMPEQDrr
14510b57cec5SDimitry Andric    // But not VPCMPEQQrr.
14520b57cec5SDimitry Andric  ], ZeroIdiomPredicate>
14530b57cec5SDimitry Andric]>;
14540b57cec5SDimitry Andric
14550b57cec5SDimitry Andric
14560b57cec5SDimitry Andric} // SchedModel
1457