xref: /freebsd-src/contrib/llvm-project/llvm/lib/Target/X86/X86ScheduleZnver4.td (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
11ac55f4cSDimitry Andric//=- X86ScheduleZnver4.td - X86 Znver4 Scheduling ------------*- tablegen -*-=//
21ac55f4cSDimitry Andric//
31ac55f4cSDimitry Andric// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
41ac55f4cSDimitry Andric// See https://llvm.org/LICENSE.txt for license information.
51ac55f4cSDimitry Andric// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
61ac55f4cSDimitry Andric//
71ac55f4cSDimitry Andric//===----------------------------------------------------------------------===//
81ac55f4cSDimitry Andric//
91ac55f4cSDimitry Andric// This file defines the machine model for Znver4 to support instruction
101ac55f4cSDimitry Andric// scheduling and other instruction cost heuristics.
111ac55f4cSDimitry Andric// Based on:
121ac55f4cSDimitry Andric//  * AMD Software Optimization Guide for AMD Family 19h Processors.
131ac55f4cSDimitry Andric//    https://www.amd.com/system/files/TechDocs/56665.zip
141ac55f4cSDimitry Andric//===----------------------------------------------------------------------===//
151ac55f4cSDimitry Andric
161ac55f4cSDimitry Andricdef Znver4Model : SchedMachineModel {
171ac55f4cSDimitry Andric  // AMD SOG 19h, 2.9.6 Dispatch
181ac55f4cSDimitry Andric  // The processor may dispatch up to 6 macro ops per cycle
191ac55f4cSDimitry Andric  // into the execution engine.
201ac55f4cSDimitry Andric  let IssueWidth = 6;
211ac55f4cSDimitry Andric  // AMD SOG 19h, 2.10.3
221ac55f4cSDimitry Andric  // The retire control unit (RCU) tracks the completion status of all
231ac55f4cSDimitry Andric  // outstanding operations (integer, load/store, and floating-point) and is
241ac55f4cSDimitry Andric  // the final arbiter for exception processing and recovery.
251ac55f4cSDimitry Andric  // The unit can receive up to 6 macro ops dispatched per cycle and track up
261ac55f4cSDimitry Andric  // to 320 macro ops in-flight in non-SMT mode or 160 per thread in SMT mode.
271ac55f4cSDimitry Andric  let MicroOpBufferSize = 320;
281ac55f4cSDimitry Andric  // AMD SOG 19h, 2.9.1 Op Cache
291ac55f4cSDimitry Andric  // The op cache is organized as an associative cache with 64 sets and 8 ways.
301ac55f4cSDimitry Andric  // At each set-way intersection is an entry containing up to 8 macro ops.
31*0fca6ea1SDimitry Andric  // The maximum capacity of the op cache is 6.75K ops.
32*0fca6ea1SDimitry Andric  // Assuming a maximum dispatch of 9 ops/cy and a mispredict cost of 12cy from
33*0fca6ea1SDimitry Andric  // the op-cache, we limit the loop buffer to 9*12 = 108 to avoid loop
34*0fca6ea1SDimitry Andric  // unrolling leading to excessive filling of the op-cache from frontend.
35*0fca6ea1SDimitry Andric  let LoopMicroOpBufferSize = 108;
361ac55f4cSDimitry Andric  // AMD SOG 19h, 2.6.2 L1 Data Cache
371ac55f4cSDimitry Andric  // The L1 data cache has a 4- or 5- cycle integer load-to-use latency.
381ac55f4cSDimitry Andric  // AMD SOG 19h, 2.12 L1 Data Cache
391ac55f4cSDimitry Andric  // The AGU and LS pipelines are optimized for simple address generation modes.
401ac55f4cSDimitry Andric  // <...> and can achieve 4-cycle load-to-use integer load latency.
411ac55f4cSDimitry Andric  let LoadLatency = 4;
421ac55f4cSDimitry Andric  // AMD SOG 19h, 2.12 L1 Data Cache
431ac55f4cSDimitry Andric  // The AGU and LS pipelines are optimized for simple address generation modes.
441ac55f4cSDimitry Andric  // <...> and can achieve <...> 7-cycle load-to-use FP load latency.
451ac55f4cSDimitry Andric  int VecLoadLatency = 7;
461ac55f4cSDimitry Andric  // Latency of a simple store operation.
471ac55f4cSDimitry Andric  int StoreLatency = 1;
481ac55f4cSDimitry Andric  // FIXME:
491ac55f4cSDimitry Andric  let HighLatency = 25; // FIXME: any better choice?
501ac55f4cSDimitry Andric  // AMD SOG 19h, 2.8 Optimizing Branching
511ac55f4cSDimitry Andric  // The branch misprediction penalty is in the range from 11 to 18 cycles,
521ac55f4cSDimitry Andric  // <...>. The common case penalty is 13 cycles.
531ac55f4cSDimitry Andric  let MispredictPenalty = 13;
541ac55f4cSDimitry Andric
551ac55f4cSDimitry Andric  let PostRAScheduler = 1; // Enable Post RegAlloc Scheduler pass.
561ac55f4cSDimitry Andric
571ac55f4cSDimitry Andric  let CompleteModel = 1;
581ac55f4cSDimitry Andric}
591ac55f4cSDimitry Andric
601ac55f4cSDimitry Andriclet SchedModel = Znver4Model in {
611ac55f4cSDimitry Andric
621ac55f4cSDimitry Andric
631ac55f4cSDimitry Andric//===----------------------------------------------------------------------===//
641ac55f4cSDimitry Andric// RCU
651ac55f4cSDimitry Andric//===----------------------------------------------------------------------===//
661ac55f4cSDimitry Andric
671ac55f4cSDimitry Andric// AMD SOG 19h, 2.10.3 Retire Control Unit
681ac55f4cSDimitry Andric// The unit can receive up to 6 macro ops dispatched per cycle and track up to
691ac55f4cSDimitry Andric// 320 macro ops in-flight in non-SMT mode or 128 per thread in SMT mode. <...>
701ac55f4cSDimitry Andric// The retire unit handles in-order commit of up to nine macro ops per cycle.
711ac55f4cSDimitry Andricdef Zn4RCU : RetireControlUnit<Znver4Model.MicroOpBufferSize, 9>;
721ac55f4cSDimitry Andric
731ac55f4cSDimitry Andric//===----------------------------------------------------------------------===//
741ac55f4cSDimitry Andric// Integer Execution Unit
751ac55f4cSDimitry Andric//
761ac55f4cSDimitry Andric
771ac55f4cSDimitry Andric// AMD SOG 19h, 2.4 Superscalar Organization
781ac55f4cSDimitry Andric// The processor uses four decoupled independent integer scheduler queues,
791ac55f4cSDimitry Andric// each one servicing one ALU pipeline and one or two other pipelines
801ac55f4cSDimitry Andric
811ac55f4cSDimitry Andric//
821ac55f4cSDimitry Andric// Execution pipes
831ac55f4cSDimitry Andric//===----------------------------------------------------------------------===//
841ac55f4cSDimitry Andric
851ac55f4cSDimitry Andric// AMD SOG 19h, 2.10.2 Execution Units
861ac55f4cSDimitry Andric// The processor contains 4 general purpose integer execution pipes.
871ac55f4cSDimitry Andric// Each pipe has an ALU capable of general purpose integer operations.
881ac55f4cSDimitry Andricdef Zn4ALU0 : ProcResource<1>;
891ac55f4cSDimitry Andricdef Zn4ALU1 : ProcResource<1>;
901ac55f4cSDimitry Andricdef Zn4ALU2 : ProcResource<1>;
911ac55f4cSDimitry Andricdef Zn4ALU3 : ProcResource<1>;
921ac55f4cSDimitry Andric
931ac55f4cSDimitry Andric// AMD SOG 19h, 2.10.2 Execution Units
941ac55f4cSDimitry Andric// There is also a separate branch execution unit.
951ac55f4cSDimitry Andricdef Zn4BRU1 : ProcResource<1>;
961ac55f4cSDimitry Andric
971ac55f4cSDimitry Andric// AMD SOG 19h, 2.10.2 Execution Units
981ac55f4cSDimitry Andric// There are three Address Generation Units (AGUs) for all load and store
991ac55f4cSDimitry Andric// address generation. There are also 3 store data movement units
1001ac55f4cSDimitry Andric// associated with the same schedulers as the AGUs.
1011ac55f4cSDimitry Andricdef Zn4AGU0 : ProcResource<1>;
1021ac55f4cSDimitry Andricdef Zn4AGU1 : ProcResource<1>;
1031ac55f4cSDimitry Andricdef Zn4AGU2 : ProcResource<1>;
1041ac55f4cSDimitry Andric
1051ac55f4cSDimitry Andric//
1061ac55f4cSDimitry Andric// Execution Units
1071ac55f4cSDimitry Andric//===----------------------------------------------------------------------===//
1081ac55f4cSDimitry Andric
1091ac55f4cSDimitry Andric// AMD SOG 19h, 2.10.2 Execution Units
1101ac55f4cSDimitry Andric// ALU0 additionally has divide <...> execution capability.
1111ac55f4cSDimitry Andricdefvar Zn4Divider = Zn4ALU0;
1121ac55f4cSDimitry Andric
1131ac55f4cSDimitry Andric// AMD SOG 19h, 2.10.2 Execution Units
1141ac55f4cSDimitry Andric// ALU0 additionally has <...> branch execution capability.
1151ac55f4cSDimitry Andricdefvar Zn4BRU0 = Zn4ALU0;
1161ac55f4cSDimitry Andric
1171ac55f4cSDimitry Andric// Integer Multiplication issued on ALU1.
1181ac55f4cSDimitry Andricdefvar Zn4Multiplier = Zn4ALU1;
1191ac55f4cSDimitry Andric
1201ac55f4cSDimitry Andric// Execution pipeline grouping
1211ac55f4cSDimitry Andric//===----------------------------------------------------------------------===//
1221ac55f4cSDimitry Andric
1231ac55f4cSDimitry Andric// General ALU operations
1241ac55f4cSDimitry Andricdef Zn4ALU0123 : ProcResGroup<[Zn4ALU0, Zn4ALU1, Zn4ALU2, Zn4ALU3]>;
1251ac55f4cSDimitry Andric
1261ac55f4cSDimitry Andric// General AGU operations
1271ac55f4cSDimitry Andricdef Zn4AGU012 : ProcResGroup<[Zn4AGU0, Zn4AGU1, Zn4AGU2]>;
1281ac55f4cSDimitry Andric
1291ac55f4cSDimitry Andric// Control flow: jumps, calls
1301ac55f4cSDimitry Andricdef Zn4BRU01 : ProcResGroup<[Zn4BRU0, Zn4BRU1]>;
1311ac55f4cSDimitry Andric
1321ac55f4cSDimitry Andric// Everything that isn't control flow, but still needs to access CC register,
1331ac55f4cSDimitry Andric// namely: conditional moves, SETcc.
1341ac55f4cSDimitry Andricdef Zn4ALU03 : ProcResGroup<[Zn4ALU0, Zn4ALU3]>;
1351ac55f4cSDimitry Andric
1361ac55f4cSDimitry Andric// Zn4ALU1 handles complex bit twiddling: CRC/PDEP/PEXT
1371ac55f4cSDimitry Andric
1381ac55f4cSDimitry Andric// Simple bit twiddling: bit test, shift/rotate, bit extraction
1391ac55f4cSDimitry Andricdef Zn4ALU12 : ProcResGroup<[Zn4ALU1, Zn4ALU2]>;
1401ac55f4cSDimitry Andric
1411ac55f4cSDimitry Andric
1421ac55f4cSDimitry Andric//
1431ac55f4cSDimitry Andric// Scheduling
1441ac55f4cSDimitry Andric//===----------------------------------------------------------------------===//
1451ac55f4cSDimitry Andric
1461ac55f4cSDimitry Andric// AMD SOG 19h, 2.10.3 Retire Control Unit
1471ac55f4cSDimitry Andric// The integer physical register file (PRF) consists of 224 registers.
1481ac55f4cSDimitry Andricdef Zn4IntegerPRF : RegisterFile<224, [GR64, CCR], [1, 1], [1, 0],
1491ac55f4cSDimitry Andric                              6,  // Max moves that can be eliminated per cycle.
1501ac55f4cSDimitry Andric                              0>; // Restrict move elimination to zero regs.
1511ac55f4cSDimitry Andric
1521ac55f4cSDimitry Andric// anandtech, The integer scheduler has a 4*24 entry macro op capacity.
1531ac55f4cSDimitry Andric// AMD SOG 19h, 2.10.1 Schedulers
1541ac55f4cSDimitry Andric// The schedulers can receive up to six macro ops per cycle, with a limit of
1551ac55f4cSDimitry Andric// two per scheduler. Each scheduler can issue one micro op per cycle into
1561ac55f4cSDimitry Andric// each of its associated pipelines
1571ac55f4cSDimitry Andricdef Zn4Int : ProcResGroup<[Zn4ALU0, Zn4AGU0, Zn4BRU0, // scheduler 0
1581ac55f4cSDimitry Andric                           Zn4ALU1, Zn4AGU1,          // scheduler 1
1591ac55f4cSDimitry Andric                           Zn4ALU2, Zn4AGU2,          // scheduler 2
1601ac55f4cSDimitry Andric                           Zn4ALU3,          Zn4BRU1  // scheduler 3
1611ac55f4cSDimitry Andric                          ]> {
1621ac55f4cSDimitry Andric  let BufferSize = !mul(4, 24);
1631ac55f4cSDimitry Andric}
1641ac55f4cSDimitry Andric
1651ac55f4cSDimitry Andric
1661ac55f4cSDimitry Andric//===----------------------------------------------------------------------===//
1671ac55f4cSDimitry Andric// Floating-Point Unit
1681ac55f4cSDimitry Andric//
1691ac55f4cSDimitry Andric
1701ac55f4cSDimitry Andric// AMD SOG 19h, 2.4 Superscalar Organization
1711ac55f4cSDimitry Andric// The processor uses <...> two decoupled independent floating point schedulers
1721ac55f4cSDimitry Andric// each servicing two FP pipelines and one store or FP-to-integer pipeline.
1731ac55f4cSDimitry Andric
1741ac55f4cSDimitry Andric//
1751ac55f4cSDimitry Andric// Execution pipes
1761ac55f4cSDimitry Andric//===----------------------------------------------------------------------===//
1771ac55f4cSDimitry Andric
1781ac55f4cSDimitry Andric// AMD SOG 19h, 2.10.1 Schedulers
1791ac55f4cSDimitry Andric// <...>, and six FPU pipes.
1801ac55f4cSDimitry Andric// Agner, 22.10 Floating point execution pipes
1811ac55f4cSDimitry Andric// There are six floating point/vector execution pipes,
1821ac55f4cSDimitry Andricdef Zn4FP0  : ProcResource<1>;
1831ac55f4cSDimitry Andricdef Zn4FP1  : ProcResource<1>;
1841ac55f4cSDimitry Andricdef Zn4FP2  : ProcResource<1>;
1851ac55f4cSDimitry Andricdef Zn4FP3  : ProcResource<1>;
1861ac55f4cSDimitry Andricdef Zn4FP45 : ProcResource<2>;
1871ac55f4cSDimitry Andric
1881ac55f4cSDimitry Andric//
1891ac55f4cSDimitry Andric// Execution Units
1901ac55f4cSDimitry Andric//===----------------------------------------------------------------------===//
1911ac55f4cSDimitry Andric// AMD SOG 19h, 2.11.1 Floating Point Execution Resources
1921ac55f4cSDimitry Andric
1931ac55f4cSDimitry Andric// (v)FMUL*, (v)FMA*, Floating Point Compares, Blendv(DQ)
1941ac55f4cSDimitry Andricdefvar Zn4FPFMul0 = Zn4FP0;
1951ac55f4cSDimitry Andricdefvar Zn4FPFMul1 = Zn4FP1;
1961ac55f4cSDimitry Andric
1971ac55f4cSDimitry Andric// (v)FADD*
1981ac55f4cSDimitry Andricdefvar Zn4FPFAdd0 = Zn4FP2;
1991ac55f4cSDimitry Andricdefvar Zn4FPFAdd1 = Zn4FP3;
2001ac55f4cSDimitry Andric
2011ac55f4cSDimitry Andric// All convert operations except pack/unpack
2021ac55f4cSDimitry Andricdefvar Zn4FPFCvt0 = Zn4FP2;
2031ac55f4cSDimitry Andricdefvar Zn4FPFCvt1 = Zn4FP3;
2041ac55f4cSDimitry Andric
2051ac55f4cSDimitry Andric// All Divide and Square Root except Reciprocal Approximation
2061ac55f4cSDimitry Andric// AMD SOG 19h, 2.11.1 Floating Point Execution Resources
2071ac55f4cSDimitry Andric// FDIV unit can support 2 simultaneous operations in flight
2081ac55f4cSDimitry Andric// even though it occupies a single pipe.
2091ac55f4cSDimitry Andric// FIXME: BufferSize=2 ?
2101ac55f4cSDimitry Andricdefvar Zn4FPFDiv = Zn4FP1;
2111ac55f4cSDimitry Andric
2121ac55f4cSDimitry Andric// Moves and Logical operations on Floating Point Data Types
2131ac55f4cSDimitry Andricdefvar Zn4FPFMisc0 = Zn4FP0;
2141ac55f4cSDimitry Andricdefvar Zn4FPFMisc1 = Zn4FP1;
2151ac55f4cSDimitry Andricdefvar Zn4FPFMisc2 = Zn4FP2;
2161ac55f4cSDimitry Andricdefvar Zn4FPFMisc3 = Zn4FP3;
2171ac55f4cSDimitry Andric
2181ac55f4cSDimitry Andric// Integer Adds, Subtracts, and Compares
2191ac55f4cSDimitry Andric// Some complex VADD operations are not available in all pipes.
2201ac55f4cSDimitry Andricdefvar Zn4FPVAdd0 = Zn4FP0;
2211ac55f4cSDimitry Andricdefvar Zn4FPVAdd1 = Zn4FP1;
2221ac55f4cSDimitry Andricdefvar Zn4FPVAdd2 = Zn4FP2;
2231ac55f4cSDimitry Andricdefvar Zn4FPVAdd3 = Zn4FP3;
2241ac55f4cSDimitry Andric
2251ac55f4cSDimitry Andric// Integer Multiplies, SAD, Blendvb
2261ac55f4cSDimitry Andricdefvar Zn4FPVMul0 = Zn4FP0;
2271ac55f4cSDimitry Andricdefvar Zn4FPVMul1 = Zn4FP3;
2281ac55f4cSDimitry Andric
2291ac55f4cSDimitry Andric// Data Shuffles, Packs, Unpacks, Permute
2301ac55f4cSDimitry Andric// Some complex shuffle operations are only available in pipe1.
2311ac55f4cSDimitry Andricdefvar Zn4FPVShuf = Zn4FP1;
2321ac55f4cSDimitry Andricdefvar Zn4FPVShufAux = Zn4FP2;
2331ac55f4cSDimitry Andric
2341ac55f4cSDimitry Andric// Bit Shift Left/Right operations
2351ac55f4cSDimitry Andricdefvar Zn4FPVShift0 = Zn4FP1;
2361ac55f4cSDimitry Andricdefvar Zn4FPVShift1 = Zn4FP2;
2371ac55f4cSDimitry Andric
2381ac55f4cSDimitry Andric// Moves and Logical operations on Packed Integer Data Types
2391ac55f4cSDimitry Andricdefvar Zn4FPVMisc0 = Zn4FP0;
2401ac55f4cSDimitry Andricdefvar Zn4FPVMisc1 = Zn4FP1;
2411ac55f4cSDimitry Andricdefvar Zn4FPVMisc2 = Zn4FP2;
2421ac55f4cSDimitry Andricdefvar Zn4FPVMisc3 = Zn4FP3;
2431ac55f4cSDimitry Andric
2441ac55f4cSDimitry Andric// *AES*
2451ac55f4cSDimitry Andricdefvar Zn4FPAES0 = Zn4FP0;
2461ac55f4cSDimitry Andricdefvar Zn4FPAES1 = Zn4FP1;
2471ac55f4cSDimitry Andric
2481ac55f4cSDimitry Andric// *CLM*
2491ac55f4cSDimitry Andricdefvar Zn4FPCLM0 = Zn4FP0;
2501ac55f4cSDimitry Andricdefvar Zn4FPCLM1 = Zn4FP1;
2511ac55f4cSDimitry Andric
2521ac55f4cSDimitry Andric// Execution pipeline grouping
2531ac55f4cSDimitry Andric//===----------------------------------------------------------------------===//
2541ac55f4cSDimitry Andric
2551ac55f4cSDimitry Andric// AMD SOG 19h, 2.11 Floating-Point Unit
2561ac55f4cSDimitry Andric// Stores and floating point to general purpose register transfer
2571ac55f4cSDimitry Andric// have 2 dedicated pipelines (pipe 5 and 6).
2581ac55f4cSDimitry Andricdef Zn4FPU0123 : ProcResGroup<[Zn4FP0, Zn4FP1, Zn4FP2, Zn4FP3]>;
2591ac55f4cSDimitry Andric
2601ac55f4cSDimitry Andric// (v)FMUL*, (v)FMA*, Floating Point Compares, Blendv(DQ)
2611ac55f4cSDimitry Andricdef Zn4FPFMul01 : ProcResGroup<[Zn4FPFMul0, Zn4FPFMul1]>;
2621ac55f4cSDimitry Andric
2631ac55f4cSDimitry Andric// (v)FADD*
2641ac55f4cSDimitry Andric// Some complex VADD operations are not available in all pipes.
2651ac55f4cSDimitry Andricdef Zn4FPFAdd01 : ProcResGroup<[Zn4FPFAdd0, Zn4FPFAdd1]>;
2661ac55f4cSDimitry Andric
2671ac55f4cSDimitry Andric// All convert operations except pack/unpack
2681ac55f4cSDimitry Andricdef Zn4FPFCvt01 : ProcResGroup<[Zn4FPFCvt0, Zn4FPFCvt1]>;
2691ac55f4cSDimitry Andric
2701ac55f4cSDimitry Andric// All Divide and Square Root except Reciprocal Approximation
2711ac55f4cSDimitry Andric// def Zn4FPFDiv : ProcResGroup<[Zn4FPFDiv]>;
2721ac55f4cSDimitry Andric
2731ac55f4cSDimitry Andric// Moves and Logical operations on Floating Point Data Types
2741ac55f4cSDimitry Andricdef Zn4FPFMisc0123 : ProcResGroup<[Zn4FPFMisc0, Zn4FPFMisc1, Zn4FPFMisc2, Zn4FPFMisc3]>;
2751ac55f4cSDimitry Andric
2761ac55f4cSDimitry Andric// FIXUP and RANGE use FP01 pipelines
2771ac55f4cSDimitry Andricdef Zn4FPFMisc01 : ProcResGroup<[Zn4FPFMisc0, Zn4FPFMisc1]>;
2781ac55f4cSDimitry Andricdef Zn4FPFMisc12 : ProcResGroup<[Zn4FPFMisc1, Zn4FPFMisc2]>;
2791ac55f4cSDimitry Andric// SCALE instructions use FP23 pipelines
2801ac55f4cSDimitry Andricdef Zn4FPFMisc23 : ProcResGroup<[Zn4FPFMisc2, Zn4FPFMisc3]>;
2811ac55f4cSDimitry Andricdef Zn4FPFMisc123 : ProcResGroup<[Zn4FPFMisc1,Zn4FPFMisc2, Zn4FPFMisc3]>;
2821ac55f4cSDimitry Andric
2831ac55f4cSDimitry Andric// Loads, Stores and Move to General Register (EX) Operations
2841ac55f4cSDimitry Andric// AMD SOG 19h, 2.11 Floating-Point Unit
2851ac55f4cSDimitry Andric// Stores and floating point to general purpose register transfer
2861ac55f4cSDimitry Andric// have 2 dedicated pipelines (pipe 5 and 6).
2871ac55f4cSDimitry Andricdefvar Zn4FPLd01 = Zn4FP45;
2881ac55f4cSDimitry Andric
2891ac55f4cSDimitry Andric// AMD SOG 19h, 2.11 Floating-Point Unit
2901ac55f4cSDimitry Andric// Note that FP stores are supported on two pipelines,
2911ac55f4cSDimitry Andric// but throughput is limited to one per cycle.
2921ac55f4cSDimitry Andriclet Super = Zn4FP45 in
2931ac55f4cSDimitry Andricdef Zn4FPSt : ProcResource<1>;
2941ac55f4cSDimitry Andric
2951ac55f4cSDimitry Andric// Integer Adds, Subtracts, and Compares
2961ac55f4cSDimitry Andric// Some complex VADD operations are not available in all pipes.
2971ac55f4cSDimitry Andricdef Zn4FPVAdd0123 : ProcResGroup<[Zn4FPVAdd0, Zn4FPVAdd1, Zn4FPVAdd2, Zn4FPVAdd3]>;
2981ac55f4cSDimitry Andric
2991ac55f4cSDimitry Andricdef Zn4FPVAdd01: ProcResGroup<[Zn4FPVAdd0, Zn4FPVAdd1]>;
3001ac55f4cSDimitry Andricdef Zn4FPVAdd12: ProcResGroup<[Zn4FPVAdd1, Zn4FPVAdd2]>;
3011ac55f4cSDimitry Andric
3021ac55f4cSDimitry Andric// AVX512 Opmask pipelines
3031ac55f4cSDimitry Andricdef Zn4FPOpMask01: ProcResGroup<[Zn4FP2, Zn4FP3]>;
3041ac55f4cSDimitry Andricdef Zn4FPOpMask4: ProcResGroup<[Zn4FP45]>;
3051ac55f4cSDimitry Andric
3061ac55f4cSDimitry Andric// Integer Multiplies, SAD, Blendvb
3071ac55f4cSDimitry Andricdef Zn4FPVMul01 : ProcResGroup<[Zn4FPVMul0, Zn4FPVMul1]>;
3081ac55f4cSDimitry Andric
3091ac55f4cSDimitry Andric// Data Shuffles, Packs, Unpacks, Permute
3101ac55f4cSDimitry Andric// Some complex shuffle operations are only available in pipe1.
3111ac55f4cSDimitry Andricdef Zn4FPVShuf01 : ProcResGroup<[Zn4FPVShuf, Zn4FPVShufAux]>;
3121ac55f4cSDimitry Andric
3131ac55f4cSDimitry Andric// Bit Shift Left/Right operations
3141ac55f4cSDimitry Andricdef Zn4FPVShift01 : ProcResGroup<[Zn4FPVShift0, Zn4FPVShift1]>;
3151ac55f4cSDimitry Andric
3161ac55f4cSDimitry Andric// Moves and Logical operations on Packed Integer Data Types
3171ac55f4cSDimitry Andricdef Zn4FPVMisc0123 : ProcResGroup<[Zn4FPVMisc0, Zn4FPVMisc1, Zn4FPVMisc2, Zn4FPVMisc3]>;
3181ac55f4cSDimitry Andric
3191ac55f4cSDimitry Andric// *AES*
3201ac55f4cSDimitry Andricdef Zn4FPAES01 : ProcResGroup<[Zn4FPAES0, Zn4FPAES1]>;
3211ac55f4cSDimitry Andric
3221ac55f4cSDimitry Andric// *CLM*
3231ac55f4cSDimitry Andricdef Zn4FPCLM01 : ProcResGroup<[Zn4FPCLM0, Zn4FPCLM1]>;
3241ac55f4cSDimitry Andric
3251ac55f4cSDimitry Andric
3261ac55f4cSDimitry Andric//
3271ac55f4cSDimitry Andric// Scheduling
3281ac55f4cSDimitry Andric//===----------------------------------------------------------------------===//
3291ac55f4cSDimitry Andric
3301ac55f4cSDimitry Andric// Agner, 21.8 Register renaming and out-of-order schedulers
3311ac55f4cSDimitry Andric// The floating point register file has 192 vector registers
3321ac55f4cSDimitry Andric// of 512b each in zen4.
3331ac55f4cSDimitry Andricdef Zn4FpPRF : RegisterFile<192, [VR64, VR128, VR256, VR512], [1, 1, 1, 1], [0, 1, 1],
3341ac55f4cSDimitry Andric                            6,  // Max moves that can be eliminated per cycle.
3351ac55f4cSDimitry Andric                            0>; // Restrict move elimination to zero regs.
3361ac55f4cSDimitry Andric
3371ac55f4cSDimitry Andric// AMD SOG 19h, 2.11 Floating-Point Unit
3381ac55f4cSDimitry Andric// The floating-point scheduler has a 2*32 entry macro op capacity.
3391ac55f4cSDimitry Andric// AMD SOG 19h, 2.11 Floating-Point Unit
3401ac55f4cSDimitry Andric// <...> the scheduler can issue 1 micro op per cycle for each pipe.
3411ac55f4cSDimitry Andric// FIXME: those are two separate schedulers, not a single big one.
3421ac55f4cSDimitry Andricdef Zn4FP : ProcResGroup<[Zn4FP0, Zn4FP2,          /*Zn4FP4,*/ // scheduler 0
3431ac55f4cSDimitry Andric                          Zn4FP1, Zn4FP3, Zn4FP45 /*Zn4FP5*/  // scheduler 1
3441ac55f4cSDimitry Andric                         ]> {
3451ac55f4cSDimitry Andric  let BufferSize = !mul(2, 32);
3461ac55f4cSDimitry Andric}
3471ac55f4cSDimitry Andric
3481ac55f4cSDimitry Andric// AMD SOG 19h, 2.11 Floating-Point Unit
3491ac55f4cSDimitry Andric// Macro ops can be dispatched to the 64 entry Non Scheduling Queue (NSQ)
3501ac55f4cSDimitry Andric// even if floating-point scheduler is full.
3511ac55f4cSDimitry Andric// FIXME: how to model this properly?
3521ac55f4cSDimitry Andric
3531ac55f4cSDimitry Andric
3541ac55f4cSDimitry Andric//===----------------------------------------------------------------------===//
3551ac55f4cSDimitry Andric// Load-Store Unit
3561ac55f4cSDimitry Andric//
3571ac55f4cSDimitry Andric
3581ac55f4cSDimitry Andric// AMD SOG 19h, 2.12 Load-Store Unit
3591ac55f4cSDimitry Andric// The LS unit contains three largely independent pipe-lines
3601ac55f4cSDimitry Andric// enabling the execution of three 256-bit memory operations per cycle.
3611ac55f4cSDimitry Andricdef Zn4LSU : ProcResource<3>;
3621ac55f4cSDimitry Andric
3631ac55f4cSDimitry Andric// AMD SOG 19h, 2.12 Load-Store Unit
3641ac55f4cSDimitry Andric// All three memory operations can be loads.
3651ac55f4cSDimitry Andriclet Super = Zn4LSU in
3661ac55f4cSDimitry Andricdef Zn4Load : ProcResource<3> {
3671ac55f4cSDimitry Andric  // AMD SOG 19h, 2.12 Load-Store Unit
3681ac55f4cSDimitry Andric  // The LS unit can process up to 72 out-of-order loads.
3691ac55f4cSDimitry Andric  let BufferSize = 72;
3701ac55f4cSDimitry Andric}
3711ac55f4cSDimitry Andric
3721ac55f4cSDimitry Andricdef Zn4LoadQueue : LoadQueue<Zn4Load>;
3731ac55f4cSDimitry Andric
3741ac55f4cSDimitry Andric// AMD SOG 19h, 2.12 Load-Store Unit
3751ac55f4cSDimitry Andric// A maximum of two of the memory operations can be stores.
3761ac55f4cSDimitry Andriclet Super = Zn4LSU in
3771ac55f4cSDimitry Andricdef Zn4Store : ProcResource<2> {
3781ac55f4cSDimitry Andric  // AMD SOG 19h, 2.12 Load-Store Unit
3791ac55f4cSDimitry Andric  // The LS unit utilizes a 64-entry store queue (STQ).
3801ac55f4cSDimitry Andric  let BufferSize = 64;
3811ac55f4cSDimitry Andric}
3821ac55f4cSDimitry Andric
3831ac55f4cSDimitry Andricdef Zn4StoreQueue : StoreQueue<Zn4Store>;
3841ac55f4cSDimitry Andric
3851ac55f4cSDimitry Andric//===----------------------------------------------------------------------===//
3861ac55f4cSDimitry Andric// Basic helper classes.
3871ac55f4cSDimitry Andric//===----------------------------------------------------------------------===//
3881ac55f4cSDimitry Andric
3891ac55f4cSDimitry Andric// Many SchedWrites are defined in pairs with and without a folded load.
3901ac55f4cSDimitry Andric// Instructions with folded loads are usually micro-fused, so they only appear
3911ac55f4cSDimitry Andric// as two micro-ops when dispatched by the schedulers.
3921ac55f4cSDimitry Andric// This multiclass defines the resource usage for variants with and without
3931ac55f4cSDimitry Andric// folded loads.
3941ac55f4cSDimitry Andric
3951ac55f4cSDimitry Andricmulticlass __Zn4WriteRes<SchedWrite SchedRW, list<ProcResourceKind> ExePorts,
3961ac55f4cSDimitry Andric                         int Lat = 1, list<int> Res = [], int UOps = 1> {
3971ac55f4cSDimitry Andric  def : WriteRes<SchedRW, ExePorts> {
3981ac55f4cSDimitry Andric    let Latency = Lat;
3995f757f3fSDimitry Andric    let ReleaseAtCycles = Res;
4001ac55f4cSDimitry Andric    let NumMicroOps = UOps;
4011ac55f4cSDimitry Andric  }
4021ac55f4cSDimitry Andric}
4031ac55f4cSDimitry Andric
4041ac55f4cSDimitry Andricmulticlass __Zn4WriteResPair<X86FoldableSchedWrite SchedRW,
4051ac55f4cSDimitry Andric                             list<ProcResourceKind> ExePorts, int Lat,
4061ac55f4cSDimitry Andric                             list<int> Res, int UOps, int LoadLat, int LoadUOps,
4071ac55f4cSDimitry Andric                             ProcResourceKind AGU, int LoadRes> {
4081ac55f4cSDimitry Andric  defm : __Zn4WriteRes<SchedRW, ExePorts, Lat, Res, UOps>;
4091ac55f4cSDimitry Andric
4101ac55f4cSDimitry Andric  defm : __Zn4WriteRes<SchedRW.Folded,
4111ac55f4cSDimitry Andric                       !listconcat([AGU, Zn4Load], ExePorts),
4121ac55f4cSDimitry Andric                       !add(Lat, LoadLat),
4131ac55f4cSDimitry Andric                       !if(!and(!empty(Res), !eq(LoadRes, 1)),
4141ac55f4cSDimitry Andric                         [],
4151ac55f4cSDimitry Andric                         !listconcat([1, LoadRes],
4161ac55f4cSDimitry Andric                           !if(!empty(Res),
4171ac55f4cSDimitry Andric                             !listsplat(1, !size(ExePorts)),
4181ac55f4cSDimitry Andric                             Res))),
4191ac55f4cSDimitry Andric                       !add(UOps, LoadUOps)>;
4201ac55f4cSDimitry Andric}
4211ac55f4cSDimitry Andric
4221ac55f4cSDimitry Andric// For classes without folded loads.
4231ac55f4cSDimitry Andricmulticlass Zn4WriteResInt<SchedWrite SchedRW,
4241ac55f4cSDimitry Andric                          list<ProcResourceKind> ExePorts, int Lat = 1,
4251ac55f4cSDimitry Andric                          list<int> Res = [], int UOps = 1> {
4261ac55f4cSDimitry Andric  defm : __Zn4WriteRes<SchedRW, ExePorts, Lat, Res, UOps>;
4271ac55f4cSDimitry Andric}
4281ac55f4cSDimitry Andric
4291ac55f4cSDimitry Andricmulticlass Zn4WriteResXMM<SchedWrite SchedRW,
4301ac55f4cSDimitry Andric                          list<ProcResourceKind> ExePorts, int Lat = 1,
4311ac55f4cSDimitry Andric                          list<int> Res = [], int UOps = 1> {
4321ac55f4cSDimitry Andric  defm : __Zn4WriteRes<SchedRW, ExePorts, Lat, Res, UOps>;
4331ac55f4cSDimitry Andric}
4341ac55f4cSDimitry Andric
4351ac55f4cSDimitry Andricmulticlass Zn4WriteResYMM<SchedWrite SchedRW,
4361ac55f4cSDimitry Andric                          list<ProcResourceKind> ExePorts, int Lat = 1,
4371ac55f4cSDimitry Andric                          list<int> Res = [], int UOps = 1> {
4381ac55f4cSDimitry Andric  defm : __Zn4WriteRes<SchedRW, ExePorts, Lat, Res, UOps>;
4391ac55f4cSDimitry Andric}
4401ac55f4cSDimitry Andric
4411ac55f4cSDimitry Andricmulticlass Zn4WriteResZMM<SchedWrite SchedRW,
4421ac55f4cSDimitry Andric                          list<ProcResourceKind> ExePorts, int Lat = 1,
4431ac55f4cSDimitry Andric                          list<int> Res = [], int UOps = 1> {
4441ac55f4cSDimitry Andric  defm : __Zn4WriteRes<SchedRW, ExePorts, Lat, Res, UOps>;
4451ac55f4cSDimitry Andric}
4461ac55f4cSDimitry Andric
4471ac55f4cSDimitry Andric// For classes with folded loads.
4481ac55f4cSDimitry Andricmulticlass Zn4WriteResIntPair<X86FoldableSchedWrite SchedRW,
4491ac55f4cSDimitry Andric                              list<ProcResourceKind> ExePorts, int Lat = 1,
4501ac55f4cSDimitry Andric                              list<int> Res = [], int UOps = 1,
4511ac55f4cSDimitry Andric                              int LoadUOps = 0, int LoadRes = 1> {
4521ac55f4cSDimitry Andric  defm : __Zn4WriteResPair<SchedRW, ExePorts, Lat, Res, UOps,
4531ac55f4cSDimitry Andric                           Znver4Model.LoadLatency,
4541ac55f4cSDimitry Andric                           LoadUOps, Zn4AGU012, LoadRes>;
4551ac55f4cSDimitry Andric}
4561ac55f4cSDimitry Andric
4571ac55f4cSDimitry Andricmulticlass Zn4WriteResXMMPair<X86FoldableSchedWrite SchedRW,
4581ac55f4cSDimitry Andric                              list<ProcResourceKind> ExePorts, int Lat = 1,
4591ac55f4cSDimitry Andric                              list<int> Res = [], int UOps = 1,
4601ac55f4cSDimitry Andric                              int LoadUOps = 0, int LoadRes = 1> {
4611ac55f4cSDimitry Andric  defm : __Zn4WriteResPair<SchedRW, ExePorts, Lat, Res, UOps,
4621ac55f4cSDimitry Andric                           Znver4Model.VecLoadLatency,
4631ac55f4cSDimitry Andric                           LoadUOps, Zn4FPLd01, LoadRes>;
4641ac55f4cSDimitry Andric}
4651ac55f4cSDimitry Andric
4661ac55f4cSDimitry Andricmulticlass Zn4WriteResYMMPair<X86FoldableSchedWrite SchedRW,
4671ac55f4cSDimitry Andric                              list<ProcResourceKind> ExePorts, int Lat = 1,
4681ac55f4cSDimitry Andric                              list<int> Res = [], int UOps = 1,
4691ac55f4cSDimitry Andric                              int LoadUOps = 0, int LoadRes = 1> {
4701ac55f4cSDimitry Andric  defm : __Zn4WriteResPair<SchedRW, ExePorts, Lat, Res, UOps,
4711ac55f4cSDimitry Andric                           Znver4Model.VecLoadLatency,
4721ac55f4cSDimitry Andric                           LoadUOps, Zn4FPLd01, LoadRes>;
4731ac55f4cSDimitry Andric}
4741ac55f4cSDimitry Andric
4751ac55f4cSDimitry Andricmulticlass Zn4WriteResZMMPair<X86FoldableSchedWrite SchedRW,
4761ac55f4cSDimitry Andric                              list<ProcResourceKind> ExePorts, int Lat = 1,
4771ac55f4cSDimitry Andric                              list<int> Res = [], int UOps = 2,
4781ac55f4cSDimitry Andric                              int LoadUOps = 0, int LoadRes = 1> {
4791ac55f4cSDimitry Andric  defm : __Zn4WriteResPair<SchedRW, ExePorts, Lat, Res, UOps,
4801ac55f4cSDimitry Andric                           Znver4Model.VecLoadLatency,
4811ac55f4cSDimitry Andric                           LoadUOps, Zn4FPLd01, LoadRes>;
4821ac55f4cSDimitry Andric}
4831ac55f4cSDimitry Andric
4841ac55f4cSDimitry Andric//===----------------------------------------------------------------------===//
4851ac55f4cSDimitry Andric// Here be dragons.
4861ac55f4cSDimitry Andric//===----------------------------------------------------------------------===//
4871ac55f4cSDimitry Andric
4881ac55f4cSDimitry Andricdef : ReadAdvance<ReadAfterLd, Znver4Model.LoadLatency>;
4891ac55f4cSDimitry Andric
4901ac55f4cSDimitry Andricdef : ReadAdvance<ReadAfterVecLd, Znver4Model.VecLoadLatency>;
4911ac55f4cSDimitry Andricdef : ReadAdvance<ReadAfterVecXLd, Znver4Model.VecLoadLatency>;
4921ac55f4cSDimitry Andricdef : ReadAdvance<ReadAfterVecYLd, Znver4Model.VecLoadLatency>;
4931ac55f4cSDimitry Andric
4941ac55f4cSDimitry Andric// AMD SOG 19h, 2.11 Floating-Point Unit
4951ac55f4cSDimitry Andric// There is 1 cycle of added latency for a result to cross
4961ac55f4cSDimitry Andric// from F to I or I to F domain.
4971ac55f4cSDimitry Andricdef : ReadAdvance<ReadInt2Fpu, -1>;
4981ac55f4cSDimitry Andric
4991ac55f4cSDimitry Andric// Instructions with both a load and a store folded are modeled as a folded
5001ac55f4cSDimitry Andric// load + WriteRMW.
5011ac55f4cSDimitry Andricdefm : Zn4WriteResInt<WriteRMW, [Zn4AGU012, Zn4Store], Znver4Model.StoreLatency, [1, 1], 0>;
5021ac55f4cSDimitry Andric
5031ac55f4cSDimitry Andric// Loads, stores, and moves, not folded with other operations.
5041ac55f4cSDimitry Andricdefm : Zn4WriteResInt<WriteLoad, [Zn4AGU012, Zn4Load], !add(Znver4Model.LoadLatency, 1), [1, 1], 1>;
5051ac55f4cSDimitry Andric
5061ac55f4cSDimitry Andric// Model the effect of clobbering the read-write mask operand of the GATHER operation.
5071ac55f4cSDimitry Andric// Does not cost anything by itself, only has latency, matching that of the WriteLoad,
5081ac55f4cSDimitry Andricdefm : Zn4WriteResInt<WriteVecMaskedGatherWriteback, [], !add(Znver4Model.LoadLatency, 1), [], 0>;
5091ac55f4cSDimitry Andric
5101ac55f4cSDimitry Andricdef Zn4WriteMOVSlow : SchedWriteRes<[Zn4AGU012, Zn4Load]> {
5111ac55f4cSDimitry Andric  let Latency = !add(Znver4Model.LoadLatency, 1);
5125f757f3fSDimitry Andric  let ReleaseAtCycles = [3, 1];
5131ac55f4cSDimitry Andric  let NumMicroOps = 1;
5141ac55f4cSDimitry Andric}
5151ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteMOVSlow], (instrs MOV8rm, MOV8rm_NOREX, MOV16rm, MOVSX16rm16, MOVSX16rm32, MOVZX16rm16, MOVSX16rm8, MOVZX16rm8)>;
5161ac55f4cSDimitry Andric
5171ac55f4cSDimitry Andricdefm : Zn4WriteResInt<WriteStore, [Zn4AGU012, Zn4Store], Znver4Model.StoreLatency, [1, 2], 1>;
5181ac55f4cSDimitry Andricdefm : Zn4WriteResInt<WriteStoreNT, [Zn4AGU012, Zn4Store], Znver4Model.StoreLatency, [1, 2], 1>;
5191ac55f4cSDimitry Andricdefm : Zn4WriteResInt<WriteMove, [Zn4ALU0123], 1, [4], 1>;
5201ac55f4cSDimitry Andric
5211ac55f4cSDimitry Andric// Treat misc copies as a move.
5221ac55f4cSDimitry Andricdef : InstRW<[WriteMove], (instrs COPY)>;
5231ac55f4cSDimitry Andric
5241ac55f4cSDimitry Andricdef Zn4WriteMOVBE16rm : SchedWriteRes<[Zn4AGU012, Zn4Load, Zn4ALU0123]> {
5251ac55f4cSDimitry Andric  let Latency = Znver4Model.LoadLatency;
5265f757f3fSDimitry Andric  let ReleaseAtCycles = [1, 1, 4];
5271ac55f4cSDimitry Andric  let NumMicroOps = 1;
5281ac55f4cSDimitry Andric}
5291ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteMOVBE16rm], (instrs MOVBE16rm)>;
5301ac55f4cSDimitry Andric
5311ac55f4cSDimitry Andricdef Zn4WriteMOVBEmr : SchedWriteRes<[Zn4ALU0123, Zn4AGU012, Zn4Store]> {
5321ac55f4cSDimitry Andric  let Latency = Znver4Model.StoreLatency;
5335f757f3fSDimitry Andric  let ReleaseAtCycles = [4, 1, 1];
5341ac55f4cSDimitry Andric  let NumMicroOps = 2;
5351ac55f4cSDimitry Andric}
5361ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteMOVBEmr], (instrs MOVBE16mr, MOVBE32mr, MOVBE64mr)>;
5371ac55f4cSDimitry Andric
5381ac55f4cSDimitry Andric// Arithmetic.
5391ac55f4cSDimitry Andricdefm : Zn4WriteResIntPair<WriteALU, [Zn4ALU0123], 1, [1], 1>; // Simple integer ALU op.
5401ac55f4cSDimitry Andric
5411ac55f4cSDimitry Andricdef Zn4WriteALUSlow : SchedWriteRes<[Zn4ALU0123]> {
5421ac55f4cSDimitry Andric  let Latency = 1;
5435f757f3fSDimitry Andric  let ReleaseAtCycles = [4];
5441ac55f4cSDimitry Andric  let NumMicroOps = 1;
5451ac55f4cSDimitry Andric}
5461ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteALUSlow], (instrs ADD8i8, ADD16i16, ADD32i32, ADD64i32,
5471ac55f4cSDimitry Andric                                        AND8i8, AND16i16, AND32i32, AND64i32,
5481ac55f4cSDimitry Andric                                         OR8i8,  OR16i16,  OR32i32,  OR64i32,
5491ac55f4cSDimitry Andric                                        SUB8i8, SUB16i16, SUB32i32, SUB64i32,
5501ac55f4cSDimitry Andric                                        XOR8i8, XOR16i16, XOR32i32, XOR64i32)>;
5511ac55f4cSDimitry Andric
5521ac55f4cSDimitry Andricdef Zn4WriteMoveExtend : SchedWriteRes<[Zn4ALU0123]> {
5531ac55f4cSDimitry Andric  let Latency = 1;
5545f757f3fSDimitry Andric  let ReleaseAtCycles = [4];
5551ac55f4cSDimitry Andric  let NumMicroOps = 1;
5561ac55f4cSDimitry Andric}
5571ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteMoveExtend], (instrs MOVSX16rr16, MOVSX16rr32, MOVZX16rr16, MOVSX16rr8, MOVZX16rr8)>;
5581ac55f4cSDimitry Andric
5591ac55f4cSDimitry Andricdef Zn4WriteMaterialize32bitImm: SchedWriteRes<[Zn4ALU0123]> {
5601ac55f4cSDimitry Andric  let Latency = 1;
5615f757f3fSDimitry Andric  let ReleaseAtCycles = [2];
5621ac55f4cSDimitry Andric  let NumMicroOps = 1;
5631ac55f4cSDimitry Andric}
5641ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteMaterialize32bitImm], (instrs MOV32ri, MOV32ri_alt, MOV64ri32)>;
5651ac55f4cSDimitry Andric
5661ac55f4cSDimitry Andricdef Zn4WritePDEP_PEXT : SchedWriteRes<[Zn4ALU1]> {
5671ac55f4cSDimitry Andric  let Latency = 3;
5685f757f3fSDimitry Andric  let ReleaseAtCycles = [1];
5691ac55f4cSDimitry Andric  let NumMicroOps = 1;
5701ac55f4cSDimitry Andric}
5711ac55f4cSDimitry Andricdef : InstRW<[Zn4WritePDEP_PEXT], (instrs PDEP32rr, PDEP64rr,
5721ac55f4cSDimitry Andric                                          PEXT32rr, PEXT64rr)>;
5731ac55f4cSDimitry Andric
5741ac55f4cSDimitry Andricdefm : Zn4WriteResIntPair<WriteADC, [Zn4ALU0123], 1, [4], 1>; // Integer ALU + flags op.
5751ac55f4cSDimitry Andric
5761ac55f4cSDimitry Andricdef Zn4WriteADC8mr_SBB8mr : SchedWriteRes<[Zn4AGU012, Zn4Load, Zn4ALU0123, Zn4Store]> {
5771ac55f4cSDimitry Andric  let Latency = 1;
5785f757f3fSDimitry Andric  let ReleaseAtCycles = [1, 1, 7, 1];
5791ac55f4cSDimitry Andric  let NumMicroOps = 1;
5801ac55f4cSDimitry Andric}
5811ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteADC8mr_SBB8mr], (instrs ADC8mr, SBB8mr)>;
5821ac55f4cSDimitry Andric
5831ac55f4cSDimitry Andric// This is for simple LEAs with one or two input operands.
5841ac55f4cSDimitry Andricdefm : Zn4WriteResInt<WriteLEA, [Zn4AGU012], 1, [1], 1>;     // LEA instructions can't fold loads.
5851ac55f4cSDimitry Andric
5861ac55f4cSDimitry Andric// This write is used for slow LEA instructions.
5871ac55f4cSDimitry Andricdef Zn4Write3OpsLEA : SchedWriteRes<[Zn4ALU0123]> {
5881ac55f4cSDimitry Andric  let Latency = 2;
5895f757f3fSDimitry Andric  let ReleaseAtCycles = [1];
5901ac55f4cSDimitry Andric  let NumMicroOps = 2;
5911ac55f4cSDimitry Andric}
5921ac55f4cSDimitry Andric
5931ac55f4cSDimitry Andric// On Znver4, a slow LEA is either a 3Ops LEA (base, index, offset),
5941ac55f4cSDimitry Andric// or an LEA with a `Scale` value different than 1.
5951ac55f4cSDimitry Andricdef Zn4SlowLEAPredicate : MCSchedPredicate<
5961ac55f4cSDimitry Andric  CheckAny<[
5971ac55f4cSDimitry Andric    // A 3-operand LEA (base, index, offset).
5981ac55f4cSDimitry Andric    IsThreeOperandsLEAFn,
5991ac55f4cSDimitry Andric    // An LEA with a "Scale" different than 1.
6001ac55f4cSDimitry Andric    CheckAll<[
6011ac55f4cSDimitry Andric      CheckIsImmOperand<2>,
6021ac55f4cSDimitry Andric      CheckNot<CheckImmOperand<2, 1>>
6031ac55f4cSDimitry Andric    ]>
6041ac55f4cSDimitry Andric  ]>
6051ac55f4cSDimitry Andric>;
6061ac55f4cSDimitry Andric
6071ac55f4cSDimitry Andricdef Zn4WriteLEA : SchedWriteVariant<[
6081ac55f4cSDimitry Andric    SchedVar<Zn4SlowLEAPredicate, [Zn4Write3OpsLEA]>,
6091ac55f4cSDimitry Andric    SchedVar<NoSchedPred,         [WriteLEA]>
6101ac55f4cSDimitry Andric]>;
6111ac55f4cSDimitry Andric
6121ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteLEA], (instrs LEA32r, LEA64r, LEA64_32r)>;
6131ac55f4cSDimitry Andric
6141ac55f4cSDimitry Andricdef Zn4SlowLEA16r : SchedWriteRes<[Zn4ALU0123]> {
6151ac55f4cSDimitry Andric  let Latency = 2; // FIXME: not from llvm-exegesis
6165f757f3fSDimitry Andric  let ReleaseAtCycles = [4];
6171ac55f4cSDimitry Andric  let NumMicroOps = 2;
6181ac55f4cSDimitry Andric}
6191ac55f4cSDimitry Andric
6201ac55f4cSDimitry Andricdef : InstRW<[Zn4SlowLEA16r], (instrs LEA16r)>;
6211ac55f4cSDimitry Andric
6221ac55f4cSDimitry Andric// Integer multiplication
6231ac55f4cSDimitry Andricdefm : Zn4WriteResIntPair<WriteIMul8, [Zn4Multiplier], 3, [3], 1>; // Integer 8-bit multiplication.
6241ac55f4cSDimitry Andricdefm : Zn4WriteResIntPair<WriteIMul16, [Zn4Multiplier], 3, [3], 3, /*LoadUOps=*/1>; // Integer 16-bit multiplication.
6251ac55f4cSDimitry Andricdefm : Zn4WriteResIntPair<WriteIMul16Imm, [Zn4Multiplier], 4, [4], 2>; // Integer 16-bit multiplication by immediate.
6261ac55f4cSDimitry Andricdefm : Zn4WriteResIntPair<WriteIMul16Reg, [Zn4Multiplier], 3, [1], 1>; // Integer 16-bit multiplication by register.
6271ac55f4cSDimitry Andricdefm : Zn4WriteResIntPair<WriteIMul32, [Zn4Multiplier], 3, [3], 2>;    // Integer 32-bit multiplication.
6281ac55f4cSDimitry Andricdefm : Zn4WriteResIntPair<WriteMULX32, [Zn4Multiplier], 3, [1], 2>;    // Integer 32-bit Unsigned Multiply Without Affecting Flags.
6291ac55f4cSDimitry Andricdefm : Zn4WriteResIntPair<WriteIMul32Imm, [Zn4Multiplier], 3, [1], 1>; // Integer 32-bit multiplication by immediate.
6301ac55f4cSDimitry Andricdefm : Zn4WriteResIntPair<WriteIMul32Reg, [Zn4Multiplier], 3, [1], 1>; // Integer 32-bit multiplication by register.
6311ac55f4cSDimitry Andricdefm : Zn4WriteResIntPair<WriteIMul64, [Zn4Multiplier], 3, [3], 2>;    // Integer 64-bit multiplication.
6321ac55f4cSDimitry Andricdefm : Zn4WriteResIntPair<WriteMULX64, [Zn4Multiplier], 3, [1], 2>;    // Integer 32-bit Unsigned Multiply Without Affecting Flags.
6331ac55f4cSDimitry Andricdefm : Zn4WriteResIntPair<WriteIMul64Imm, [Zn4Multiplier], 3, [1], 1>; // Integer 64-bit multiplication by immediate.
6341ac55f4cSDimitry Andricdefm : Zn4WriteResIntPair<WriteIMul64Reg, [Zn4Multiplier], 3, [1], 1>; // Integer 64-bit multiplication by register.
6351ac55f4cSDimitry Andricdefm : Zn4WriteResInt<WriteIMulHLd, [], !add(4, Znver4Model.LoadLatency), [], 0>;  // Integer multiplication, high part.
6361ac55f4cSDimitry Andricdefm : Zn4WriteResInt<WriteIMulH, [], 4, [], 0>;  // Integer multiplication, high part.
6371ac55f4cSDimitry Andric
6381ac55f4cSDimitry Andricdefm : Zn4WriteResInt<WriteBSWAP32, [Zn4ALU0123], 1, [1], 1>; // Byte Order (Endianness) 32-bit Swap.
6391ac55f4cSDimitry Andricdefm : Zn4WriteResInt<WriteBSWAP64, [Zn4ALU0123], 1, [1], 1>; // Byte Order (Endianness) 64-bit Swap.
6401ac55f4cSDimitry Andric
6411ac55f4cSDimitry Andricdefm : Zn4WriteResIntPair<WriteCMPXCHG, [Zn4ALU0123], 3, [12], 5>; // Compare and set, compare and swap.
6421ac55f4cSDimitry Andric
6431ac55f4cSDimitry Andricdef Zn4WriteCMPXCHG8rr : SchedWriteRes<[Zn4ALU0123]> {
6441ac55f4cSDimitry Andric  let Latency = 3;
6455f757f3fSDimitry Andric  let ReleaseAtCycles = [12];
6461ac55f4cSDimitry Andric  let NumMicroOps = 3;
6471ac55f4cSDimitry Andric}
6481ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteCMPXCHG8rr], (instrs CMPXCHG8rr)>;
6491ac55f4cSDimitry Andric
6501ac55f4cSDimitry Andricdefm : Zn4WriteResInt<WriteCMPXCHGRMW, [Zn4ALU0123], 3, [12], 6>;     // Compare and set, compare and swap.
6511ac55f4cSDimitry Andric
6521ac55f4cSDimitry Andricdef Zn4WriteCMPXCHG8rm_LCMPXCHG8 : SchedWriteRes<[Zn4AGU012, Zn4Load, Zn4ALU0123]> {
6531ac55f4cSDimitry Andric  let Latency = !add(Znver4Model.LoadLatency, Zn4WriteCMPXCHG8rr.Latency);
6545f757f3fSDimitry Andric  let ReleaseAtCycles = [1, 1, 12];
6551ac55f4cSDimitry Andric  let NumMicroOps = !add(Zn4WriteCMPXCHG8rr.NumMicroOps, 2);
6561ac55f4cSDimitry Andric}
6571ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteCMPXCHG8rm_LCMPXCHG8], (instrs CMPXCHG8rm, LCMPXCHG8)>;
6581ac55f4cSDimitry Andric
6591ac55f4cSDimitry Andricdef Zn4WriteCMPXCHG8B : SchedWriteRes<[Zn4ALU0123]> {
6601ac55f4cSDimitry Andric  let Latency = 3; // FIXME: not from llvm-exegesis
6615f757f3fSDimitry Andric  let ReleaseAtCycles = [24];
6621ac55f4cSDimitry Andric  let NumMicroOps = 19;
6631ac55f4cSDimitry Andric}
6641ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteCMPXCHG8B], (instrs CMPXCHG8B)>;
6651ac55f4cSDimitry Andric
6661ac55f4cSDimitry Andricdef Zn4WriteCMPXCHG16B_LCMPXCHG16B : SchedWriteRes<[Zn4ALU0123]> {
6671ac55f4cSDimitry Andric  let Latency = 4; // FIXME: not from llvm-exegesis
6685f757f3fSDimitry Andric  let ReleaseAtCycles = [59];
6691ac55f4cSDimitry Andric  let NumMicroOps = 28;
6701ac55f4cSDimitry Andric}
6711ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteCMPXCHG16B_LCMPXCHG16B], (instrs CMPXCHG16B, LCMPXCHG16B)>;
6721ac55f4cSDimitry Andric
6731ac55f4cSDimitry Andricdef Zn4WriteWriteXCHGUnrenameable : SchedWriteRes<[Zn4ALU0123]> {
6741ac55f4cSDimitry Andric  let Latency = 1;
6755f757f3fSDimitry Andric  let ReleaseAtCycles = [2];
6761ac55f4cSDimitry Andric  let NumMicroOps = 2;
6771ac55f4cSDimitry Andric}
6781ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteWriteXCHGUnrenameable], (instrs XCHG8rr, XCHG16rr, XCHG16ar)>;
6791ac55f4cSDimitry Andric
6801ac55f4cSDimitry Andricdef Zn4WriteXCHG8rm_XCHG16rm : SchedWriteRes<[Zn4AGU012, Zn4Load, Zn4ALU0123]> {
6811ac55f4cSDimitry Andric  let Latency = !add(Znver4Model.LoadLatency, 3); // FIXME: not from llvm-exegesis
6825f757f3fSDimitry Andric  let ReleaseAtCycles = [1, 1, 2];
6831ac55f4cSDimitry Andric  let NumMicroOps = 5;
6841ac55f4cSDimitry Andric}
6851ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteXCHG8rm_XCHG16rm], (instrs XCHG8rm, XCHG16rm)>;
6861ac55f4cSDimitry Andric
6871ac55f4cSDimitry Andricdef Zn4WriteXCHG32rm_XCHG64rm : SchedWriteRes<[Zn4AGU012, Zn4Load, Zn4ALU0123]> {
6881ac55f4cSDimitry Andric  let Latency = !add(Znver4Model.LoadLatency, 2); // FIXME: not from llvm-exegesis
6895f757f3fSDimitry Andric  let ReleaseAtCycles = [1, 1, 2];
6901ac55f4cSDimitry Andric  let NumMicroOps = 2;
6911ac55f4cSDimitry Andric}
6921ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteXCHG32rm_XCHG64rm], (instrs XCHG32rm, XCHG64rm)>;
6931ac55f4cSDimitry Andric
6941ac55f4cSDimitry Andric// Integer division.
6951ac55f4cSDimitry Andric// FIXME: uops for 8-bit division measures as 2. for others it's a guess.
6961ac55f4cSDimitry Andric// FIXME: latency for 8-bit division measures as 10. for others it's a guess.
6971ac55f4cSDimitry Andricdefm : Zn4WriteResIntPair<WriteDiv8, [Zn4Divider], 10, [10], 2>;
6981ac55f4cSDimitry Andricdefm : Zn4WriteResIntPair<WriteDiv16, [Zn4Divider], 11, [11], 2>;
6991ac55f4cSDimitry Andricdefm : Zn4WriteResIntPair<WriteDiv32, [Zn4Divider], 13, [13], 2>;
7001ac55f4cSDimitry Andricdefm : Zn4WriteResIntPair<WriteDiv64, [Zn4Divider], 17, [17], 2>;
7011ac55f4cSDimitry Andricdefm : Zn4WriteResIntPair<WriteIDiv8, [Zn4Divider], 10, [10], 2>;
7021ac55f4cSDimitry Andricdefm : Zn4WriteResIntPair<WriteIDiv16, [Zn4Divider], 11, [11], 2>;
7031ac55f4cSDimitry Andricdefm : Zn4WriteResIntPair<WriteIDiv32, [Zn4Divider], 13, [13], 2>;
7041ac55f4cSDimitry Andricdefm : Zn4WriteResIntPair<WriteIDiv64, [Zn4Divider], 17, [17], 2>;
7051ac55f4cSDimitry Andric
7061ac55f4cSDimitry Andricdefm : Zn4WriteResIntPair<WriteBSF, [Zn4ALU1], 1, [1], 6, /*LoadUOps=*/1>; // Bit scan forward.
7071ac55f4cSDimitry Andricdefm : Zn4WriteResIntPair<WriteBSR, [Zn4ALU1], 1, [1], 6, /*LoadUOps=*/1>; // Bit scan reverse.
7081ac55f4cSDimitry Andric
7091ac55f4cSDimitry Andricdefm : Zn4WriteResIntPair<WritePOPCNT, [Zn4ALU0123], 1, [1], 1>; // Bit population count.
7101ac55f4cSDimitry Andric
7111ac55f4cSDimitry Andricdef Zn4WritePOPCNT16rr : SchedWriteRes<[Zn4ALU0123]> {
7121ac55f4cSDimitry Andric  let Latency = 1;
7135f757f3fSDimitry Andric  let ReleaseAtCycles = [4];
7141ac55f4cSDimitry Andric  let NumMicroOps = 1;
7151ac55f4cSDimitry Andric}
7161ac55f4cSDimitry Andricdef : InstRW<[Zn4WritePOPCNT16rr], (instrs POPCNT16rr)>;
7171ac55f4cSDimitry Andric
7181ac55f4cSDimitry Andricdefm : Zn4WriteResIntPair<WriteLZCNT, [Zn4ALU0123], 1, [1], 1>; // Leading zero count.
7191ac55f4cSDimitry Andric
7201ac55f4cSDimitry Andricdef Zn4WriteLZCNT16rr : SchedWriteRes<[Zn4ALU0123]> {
7211ac55f4cSDimitry Andric  let Latency = 1;
7225f757f3fSDimitry Andric  let ReleaseAtCycles = [4];
7231ac55f4cSDimitry Andric  let NumMicroOps = 1;
7241ac55f4cSDimitry Andric}
7251ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteLZCNT16rr], (instrs LZCNT16rr)>;
7261ac55f4cSDimitry Andric
7271ac55f4cSDimitry Andricdefm : Zn4WriteResIntPair<WriteTZCNT, [Zn4ALU12], 2, [1], 2>; // Trailing zero count.
7281ac55f4cSDimitry Andric
7291ac55f4cSDimitry Andricdef Zn4WriteTZCNT16rr : SchedWriteRes<[Zn4ALU0123]> {
7301ac55f4cSDimitry Andric  let Latency = 2;
7315f757f3fSDimitry Andric  let ReleaseAtCycles = [4];
7321ac55f4cSDimitry Andric  let NumMicroOps = 2;
7331ac55f4cSDimitry Andric}
7341ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteTZCNT16rr], (instrs TZCNT16rr)>;
7351ac55f4cSDimitry Andric
7361ac55f4cSDimitry Andricdefm : Zn4WriteResIntPair<WriteCMOV, [Zn4ALU03], 1, [1], 1>; // Conditional move.
7371ac55f4cSDimitry Andricdefm : Zn4WriteResInt<WriteFCMOV, [Zn4ALU0123], 7, [28], 7>; // FIXME: not from llvm-exegesis // X87 conditional move.
7381ac55f4cSDimitry Andricdefm : Zn4WriteResInt<WriteSETCC, [Zn4ALU03], 1, [2], 1>; // Set register based on condition code.
7391ac55f4cSDimitry Andricdefm : Zn4WriteResInt<WriteSETCCStore, [Zn4ALU03, Zn4AGU012, Zn4Store], 2, [2, 1, 1], 2>; // FIXME: latency not from llvm-exegesis
7401ac55f4cSDimitry Andricdefm : Zn4WriteResInt<WriteLAHFSAHF, [Zn4ALU3], 1, [1], 1>; // Load/Store flags in AH.
7411ac55f4cSDimitry Andric
7421ac55f4cSDimitry Andricdefm : Zn4WriteResInt<WriteBitTest, [Zn4ALU12], 1, [1], 1>; // Bit Test
7431ac55f4cSDimitry Andricdefm : Zn4WriteResInt<WriteBitTestImmLd, [Zn4AGU012, Zn4Load, Zn4ALU12], !add(Znver4Model.LoadLatency, 1), [1, 1, 1], 2>;
7441ac55f4cSDimitry Andricdefm : Zn4WriteResInt<WriteBitTestRegLd, [Zn4AGU012, Zn4Load, Zn4ALU12], !add(Znver4Model.LoadLatency, 1), [1, 1, 1], 7>;
7451ac55f4cSDimitry Andric
7461ac55f4cSDimitry Andricdefm : Zn4WriteResInt<WriteBitTestSet, [Zn4ALU12], 2, [2], 2>; // Bit Test + Set
7471ac55f4cSDimitry Andricdefm : Zn4WriteResInt<WriteBitTestSetImmLd, [Zn4AGU012, Zn4Load, Zn4ALU12], !add(Znver4Model.LoadLatency, 2), [1, 1, 1], 4>;
7481ac55f4cSDimitry Andricdefm : Zn4WriteResInt<WriteBitTestSetRegLd, [Zn4AGU012, Zn4Load, Zn4ALU12], !add(Znver4Model.LoadLatency, 2), [1, 1, 1], 9>;
7491ac55f4cSDimitry Andric
7501ac55f4cSDimitry Andric// Integer shifts and rotates.
7511ac55f4cSDimitry Andricdefm : Zn4WriteResIntPair<WriteShift, [Zn4ALU12], 1, [1], 1, /*LoadUOps=*/1>;
7521ac55f4cSDimitry Andricdefm : Zn4WriteResIntPair<WriteShiftCL, [Zn4ALU12], 1, [1], 1, /*LoadUOps=*/1>;
7531ac55f4cSDimitry Andricdefm : Zn4WriteResIntPair<WriteRotate, [Zn4ALU12], 1, [1], 1, /*LoadUOps=*/1>;
7541ac55f4cSDimitry Andric
7551ac55f4cSDimitry Andricdef Zn4WriteRotateR1 : SchedWriteRes<[Zn4ALU12]> {
7561ac55f4cSDimitry Andric  let Latency = 1;
7575f757f3fSDimitry Andric  let ReleaseAtCycles = [2];
7581ac55f4cSDimitry Andric  let NumMicroOps = 1;
7591ac55f4cSDimitry Andric}
7601ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteRotateR1], (instrs RCL8r1, RCL16r1, RCL32r1, RCL64r1,
7611ac55f4cSDimitry Andric                                         RCR8r1, RCR16r1, RCR32r1, RCR64r1)>;
7621ac55f4cSDimitry Andric
7631ac55f4cSDimitry Andricdef Zn4WriteRotateM1 : SchedWriteRes<[Zn4AGU012, Zn4Load, Zn4ALU12]> {
7641ac55f4cSDimitry Andric  let Latency = !add(Znver4Model.LoadLatency, Zn4WriteRotateR1.Latency);
7655f757f3fSDimitry Andric  let ReleaseAtCycles = [1, 1, 2];
7661ac55f4cSDimitry Andric  let NumMicroOps = !add(Zn4WriteRotateR1.NumMicroOps, 1);
7671ac55f4cSDimitry Andric}
7681ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteRotateM1], (instrs RCL8m1, RCL16m1, RCL32m1, RCL64m1,
7691ac55f4cSDimitry Andric                                         RCR8m1, RCR16m1, RCR32m1, RCR64m1)>;
7701ac55f4cSDimitry Andric
7711ac55f4cSDimitry Andricdef Zn4WriteRotateRightRI : SchedWriteRes<[Zn4ALU12]> {
7721ac55f4cSDimitry Andric  let Latency = 3;
7735f757f3fSDimitry Andric  let ReleaseAtCycles = [6];
7741ac55f4cSDimitry Andric  let NumMicroOps = 7;
7751ac55f4cSDimitry Andric}
7761ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteRotateRightRI], (instrs RCR8ri, RCR16ri, RCR32ri, RCR64ri)>;
7771ac55f4cSDimitry Andric
7781ac55f4cSDimitry Andricdef Zn4WriteRotateRightMI : SchedWriteRes<[Zn4AGU012, Zn4Load, Zn4ALU12]> {
7791ac55f4cSDimitry Andric  let Latency = !add(Znver4Model.LoadLatency, Zn4WriteRotateRightRI.Latency);
7805f757f3fSDimitry Andric  let ReleaseAtCycles = [1, 1, 8];
7811ac55f4cSDimitry Andric  let NumMicroOps = !add(Zn4WriteRotateRightRI.NumMicroOps, 3);
7821ac55f4cSDimitry Andric}
7831ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteRotateRightMI], (instrs RCR8mi, RCR16mi, RCR32mi, RCR64mi)>;
7841ac55f4cSDimitry Andric
7851ac55f4cSDimitry Andricdef Zn4WriteRotateLeftRI : SchedWriteRes<[Zn4ALU12]> {
7861ac55f4cSDimitry Andric  let Latency = 4;
7875f757f3fSDimitry Andric  let ReleaseAtCycles = [8];
7881ac55f4cSDimitry Andric  let NumMicroOps = 9;
7891ac55f4cSDimitry Andric}
7901ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteRotateLeftRI], (instrs RCL8ri, RCL16ri, RCL32ri, RCL64ri)>;
7911ac55f4cSDimitry Andric
7921ac55f4cSDimitry Andricdef Zn4WriteRotateLeftMI : SchedWriteRes<[Zn4AGU012, Zn4Load, Zn4ALU12]> {
7931ac55f4cSDimitry Andric  let Latency = !add(Znver4Model.LoadLatency, Zn4WriteRotateLeftRI.Latency);
7945f757f3fSDimitry Andric  let ReleaseAtCycles = [1, 1, 8];
7951ac55f4cSDimitry Andric  let NumMicroOps = !add(Zn4WriteRotateLeftRI.NumMicroOps, 2);
7961ac55f4cSDimitry Andric}
7971ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteRotateLeftMI], (instrs RCL8mi, RCL16mi, RCL32mi, RCL64mi)>;
7981ac55f4cSDimitry Andric
7991ac55f4cSDimitry Andricdefm : Zn4WriteResIntPair<WriteRotateCL, [Zn4ALU12], 1, [1], 1, /*LoadUOps=*/1>;
8001ac55f4cSDimitry Andric
8011ac55f4cSDimitry Andricdef Zn4WriteRotateRightRCL : SchedWriteRes<[Zn4ALU12]> {
8021ac55f4cSDimitry Andric  let Latency = 3;
8035f757f3fSDimitry Andric  let ReleaseAtCycles = [6];
8041ac55f4cSDimitry Andric  let NumMicroOps = 7;
8051ac55f4cSDimitry Andric}
8061ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteRotateRightRCL], (instrs RCR8rCL, RCR16rCL, RCR32rCL, RCR64rCL)>;
8071ac55f4cSDimitry Andric
8081ac55f4cSDimitry Andricdef Zn4WriteRotateRightMCL : SchedWriteRes<[Zn4AGU012, Zn4Load, Zn4ALU12]> {
8091ac55f4cSDimitry Andric  let Latency = !add(Znver4Model.LoadLatency, Zn4WriteRotateRightRCL.Latency);
8105f757f3fSDimitry Andric  let ReleaseAtCycles = [1, 1, 8];
8111ac55f4cSDimitry Andric  let NumMicroOps = !add(Zn4WriteRotateRightRCL.NumMicroOps, 2);
8121ac55f4cSDimitry Andric}
8131ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteRotateRightMCL], (instrs RCR8mCL, RCR16mCL, RCR32mCL, RCR64mCL)>;
8141ac55f4cSDimitry Andric
8151ac55f4cSDimitry Andricdef Zn4WriteRotateLeftRCL : SchedWriteRes<[Zn4ALU12]> {
8161ac55f4cSDimitry Andric  let Latency = 4;
8175f757f3fSDimitry Andric  let ReleaseAtCycles = [8];
8181ac55f4cSDimitry Andric  let NumMicroOps = 9;
8191ac55f4cSDimitry Andric}
8201ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteRotateLeftRCL], (instrs RCL8rCL, RCL16rCL, RCL32rCL, RCL64rCL)>;
8211ac55f4cSDimitry Andric
8221ac55f4cSDimitry Andricdef Zn4WriteRotateLeftMCL : SchedWriteRes<[Zn4AGU012, Zn4Load, Zn4ALU12]> {
8231ac55f4cSDimitry Andric  let Latency = !add(Znver4Model.LoadLatency, Zn4WriteRotateLeftRCL.Latency);
8245f757f3fSDimitry Andric  let ReleaseAtCycles = [1, 1, 8];
8251ac55f4cSDimitry Andric  let NumMicroOps = !add(Zn4WriteRotateLeftRCL.NumMicroOps, 2);
8261ac55f4cSDimitry Andric}
8271ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteRotateLeftMCL], (instrs RCL8mCL, RCL16mCL, RCL32mCL, RCL64mCL)>;
8281ac55f4cSDimitry Andric
8291ac55f4cSDimitry Andric// Double shift instructions.
8301ac55f4cSDimitry Andricdefm : Zn4WriteResInt<WriteSHDrri, [Zn4ALU12], 2, [3], 4>;
8311ac55f4cSDimitry Andricdefm : Zn4WriteResInt<WriteSHDrrcl, [Zn4ALU12], 2, [3], 5>;
8321ac55f4cSDimitry Andricdefm : Zn4WriteResInt<WriteSHDmri, [Zn4AGU012, Zn4Load, Zn4ALU12], !add(Znver4Model.LoadLatency, 2), [1, 1, 4], 6>;
8331ac55f4cSDimitry Andricdefm : Zn4WriteResInt<WriteSHDmrcl, [Zn4AGU012, Zn4Load, Zn4ALU12], !add(Znver4Model.LoadLatency, 2), [1, 1, 4], 6>;
8341ac55f4cSDimitry Andric
8351ac55f4cSDimitry Andric// BMI1 BEXTR/BLS, BMI2 BZHI
8361ac55f4cSDimitry Andricdefm : Zn4WriteResIntPair<WriteBEXTR, [Zn4ALU12], 1, [1], 1, /*LoadUOps=*/1>;
8371ac55f4cSDimitry Andricdefm : Zn4WriteResIntPair<WriteBLS, [Zn4ALU0123], 1, [1], 1, /*LoadUOps=*/1>;
8381ac55f4cSDimitry Andricdefm : Zn4WriteResIntPair<WriteBZHI, [Zn4ALU12], 1, [1], 1, /*LoadUOps=*/1>;
8391ac55f4cSDimitry Andric
8401ac55f4cSDimitry Andric// Idioms that clear a register, like xorps %xmm0, %xmm0.
8411ac55f4cSDimitry Andric// These can often bypass execution ports completely.
8421ac55f4cSDimitry Andricdefm : Zn4WriteResInt<WriteZero, [Zn4ALU0123], 0, [0], 1>;
8431ac55f4cSDimitry Andric
8441ac55f4cSDimitry Andric// Branches don't produce values, so they have no latency, but they still
8451ac55f4cSDimitry Andric// consume resources. Indirect branches can fold loads.
8461ac55f4cSDimitry Andricdefm : Zn4WriteResIntPair<WriteJump, [Zn4BRU01], 1, [1], 1>; // FIXME: not from llvm-exegesis
8471ac55f4cSDimitry Andric
8481ac55f4cSDimitry Andric// Floating point. This covers both scalar and vector operations.
8491ac55f4cSDimitry Andricdefm : Zn4WriteResInt<WriteFLD0, [Zn4FPLd01, Zn4Load, Zn4FP1], !add(Znver4Model.LoadLatency, 4), [1, 1, 1], 1>;
8501ac55f4cSDimitry Andricdefm : Zn4WriteResInt<WriteFLD1, [Zn4FPLd01, Zn4Load, Zn4FP1], !add(Znver4Model.LoadLatency, 7), [1, 1, 1], 1>;
8511ac55f4cSDimitry Andricdefm : Zn4WriteResInt<WriteFLDC, [Zn4FPLd01, Zn4Load, Zn4FP1], !add(Znver4Model.LoadLatency, 7), [1, 1, 1], 1>;
8521ac55f4cSDimitry Andricdefm : Zn4WriteResXMM<WriteFLoad, [Zn4FPLd01, Zn4Load], !add(Znver4Model.VecLoadLatency, 1), [1, 1], 1>;
8531ac55f4cSDimitry Andricdefm : Zn4WriteResXMM<WriteFLoadX, [Zn4FPLd01, Zn4Load], !add(Znver4Model.VecLoadLatency, 1), [1, 1], 1>;
8541ac55f4cSDimitry Andricdefm : Zn4WriteResYMM<WriteFLoadY, [Zn4FPLd01, Zn4Load], !add(Znver4Model.VecLoadLatency, 1), [1, 1], 1>;
8551ac55f4cSDimitry Andricdefm : Zn4WriteResXMM<WriteFMaskedLoad, [Zn4FPLd01, Zn4Load], !add(Znver4Model.VecLoadLatency, 1), [1, 1], 1>;
8561ac55f4cSDimitry Andricdefm : Zn4WriteResYMM<WriteFMaskedLoadY, [Zn4FPLd01, Zn4Load], !add(Znver4Model.VecLoadLatency, 1), [1, 1], 1>;
8571ac55f4cSDimitry Andricdefm : Zn4WriteResXMM<WriteFStore, [Zn4FPSt, Zn4Store], Znver4Model.StoreLatency, [1, 1], 1>;
8581ac55f4cSDimitry Andric
8591ac55f4cSDimitry Andricdef Zn4WriteWriteFStoreMMX : SchedWriteRes<[Zn4FPSt, Zn4Store]> {
8601ac55f4cSDimitry Andric  let Latency = 2; // FIXME: not from llvm-exegesis
8615f757f3fSDimitry Andric  let ReleaseAtCycles = [1, 1];
8621ac55f4cSDimitry Andric  let NumMicroOps = 2;
8631ac55f4cSDimitry Andric}
8641ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteWriteFStoreMMX], (instrs MOVHPDmr,  MOVHPSmr,
8651ac55f4cSDimitry Andric                                               VMOVHPDmr, VMOVHPSmr)>;
8661ac55f4cSDimitry Andric
8671ac55f4cSDimitry Andricdefm : Zn4WriteResXMM<WriteFStoreX, [Zn4FPSt, Zn4Store], Znver4Model.StoreLatency, [1, 1], 1>;
8681ac55f4cSDimitry Andricdefm : Zn4WriteResYMM<WriteFStoreY, [Zn4FPSt, Zn4Store], Znver4Model.StoreLatency, [1, 1], 1>;
8691ac55f4cSDimitry Andricdefm : Zn4WriteResXMM<WriteFStoreNT, [Zn4FPSt, Zn4Store], Znver4Model.StoreLatency, [1, 1], 1>;
8701ac55f4cSDimitry Andricdefm : Zn4WriteResXMM<WriteFStoreNTX, [Zn4FPSt, Zn4Store], Znver4Model.StoreLatency, [1, 1], 1>;
8711ac55f4cSDimitry Andricdefm : Zn4WriteResYMM<WriteFStoreNTY, [Zn4FPSt, Zn4Store], Znver4Model.StoreLatency, [1, 1], 1>;
8721ac55f4cSDimitry Andric
8731ac55f4cSDimitry Andricdefm : Zn4WriteResXMM<WriteFMaskedStore32, [Zn4FPSt, Zn4Store], Znver4Model.StoreLatency, [6, 1], 18>;
8741ac55f4cSDimitry Andricdefm : Zn4WriteResXMM<WriteFMaskedStore64, [Zn4FPSt, Zn4Store], Znver4Model.StoreLatency, [4, 1], 10>;
8751ac55f4cSDimitry Andricdefm : Zn4WriteResYMM<WriteFMaskedStore32Y, [Zn4FPSt, Zn4Store], Znver4Model.StoreLatency, [12, 1], 42>;
8761ac55f4cSDimitry Andricdefm : Zn4WriteResYMM<WriteFMaskedStore64Y, [Zn4FPSt, Zn4Store], Znver4Model.StoreLatency, [6, 1], 18>;
8771ac55f4cSDimitry Andric
8781ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteFAdd, [Zn4FPFAdd01], 3, [1], 1>;  // Floating point add/sub.
8791ac55f4cSDimitry Andric
8801ac55f4cSDimitry Andricdef Zn4WriteX87Arith : SchedWriteRes<[Zn4AGU012, Zn4Load, Zn4FPU0123]> {
8811ac55f4cSDimitry Andric  let Latency = !add(Znver4Model.LoadLatency, 1); // FIXME: not from llvm-exegesis
8825f757f3fSDimitry Andric  let ReleaseAtCycles = [1, 1, 24];
8831ac55f4cSDimitry Andric  let NumMicroOps = 2;
8841ac55f4cSDimitry Andric}
8851ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteX87Arith], (instrs ADD_FI16m, ADD_FI32m,
8861ac55f4cSDimitry Andric                                         SUB_FI16m, SUB_FI32m,
8871ac55f4cSDimitry Andric                                         SUBR_FI16m, SUBR_FI32m,
8881ac55f4cSDimitry Andric                                         MUL_FI16m, MUL_FI32m)>;
8891ac55f4cSDimitry Andric
8901ac55f4cSDimitry Andricdef Zn4WriteX87Div : SchedWriteRes<[Zn4AGU012, Zn4Load, Zn4FPU0123]> {
8911ac55f4cSDimitry Andric  let Latency = !add(Znver4Model.LoadLatency, 1); // FIXME: not from llvm-exegesis
8925f757f3fSDimitry Andric  let ReleaseAtCycles = [1, 1, 62];
8931ac55f4cSDimitry Andric  let NumMicroOps = 2;
8941ac55f4cSDimitry Andric}
8951ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteX87Div], (instrs DIV_FI16m, DIV_FI32m,
8961ac55f4cSDimitry Andric                                       DIVR_FI16m, DIVR_FI32m)>;
8971ac55f4cSDimitry Andric
8981ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteFAddX, [Zn4FPFAdd01], 3, [1], 1>; // Floating point add/sub (XMM).
8991ac55f4cSDimitry Andricdefm : Zn4WriteResYMMPair<WriteFAddY, [Zn4FPFAdd01], 3, [1], 1>; // Floating point add/sub (YMM).
9001ac55f4cSDimitry Andricdefm : Zn4WriteResZMMPair<WriteFAddZ, [Zn4FPFAdd01], 3, [2], 1>; // Floating point add/sub (ZMM).
9011ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteFAdd64, [Zn4FPFAdd01], 3, [1], 1>;  // Floating point double add/sub.
9021ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteFAdd64X, [Zn4FPFAdd01], 3, [1], 1>; // Floating point double add/sub (XMM).
9031ac55f4cSDimitry Andricdefm : Zn4WriteResYMMPair<WriteFAdd64Y, [Zn4FPFAdd01], 3, [1], 1>; // Floating point double add/sub (YMM).
9041ac55f4cSDimitry Andricdefm : Zn4WriteResZMMPair<WriteFAdd64Z, [Zn4FPFAdd01], 3, [2], 1>; // Floating point double add/sub (ZMM).
9051ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteFCmp, [Zn4FPFMul01], 2, [2], 1>;  // Floating point compare.
90606c3fb27SDimitry Andricdefm : Zn4WriteResXMMPair<WriteFCmpX, [Zn4FPFMul01], 2, [1], 1>; // Floating point compare (XMM).
90706c3fb27SDimitry Andricdefm : Zn4WriteResYMMPair<WriteFCmpY, [Zn4FPFMul01], 2, [1], 1>; // Floating point compare (YMM).
90806c3fb27SDimitry Andricdefm : Zn4WriteResZMMPair<WriteFCmpZ, [Zn4FPFMul01], 2, [2], 1>; // Floating point compare (ZMM).
9091ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteFCmp64, [Zn4FPFMul01], 1, [1], 1>;  // Floating point double compare.
91006c3fb27SDimitry Andricdefm : Zn4WriteResXMMPair<WriteFCmp64X, [Zn4FPFMul01], 2, [1], 1>; // Floating point double compare (XMM).
91106c3fb27SDimitry Andricdefm : Zn4WriteResYMMPair<WriteFCmp64Y, [Zn4FPFMul01], 2, [1], 1>; // Floating point double compare (YMM).
91206c3fb27SDimitry Andricdefm : Zn4WriteResZMMPair<WriteFCmp64Z, [Zn4FPFMul01], 2, [2], 1>; // Floating point double compare (ZMM).
9131ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteFCom, [Zn4FPFMul01], 3, [2], 1>; // FIXME: latency not from llvm-exegesis  // Floating point compare to flags (X87).
9141ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteFComX, [Zn4FPFMul01], 4, [2], 2>;  // FIXME: latency not from llvm-exegesis // Floating point compare to flags (SSE).
9151ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteFMul, [Zn4FPFMul01], 3, [1], 1>;  // Floating point multiplication.
9161ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteFMulX, [Zn4FPFMul01], 3, [1], 1>; // Floating point multiplication (XMM).
9171ac55f4cSDimitry Andricdefm : Zn4WriteResYMMPair<WriteFMulY, [Zn4FPFMul01], 3, [1], 1>; // Floating point multiplication (YMM).
9181ac55f4cSDimitry Andricdefm : Zn4WriteResZMMPair<WriteFMulZ, [Zn4FPFMul01], 3, [2], 1>; // Floating point multiplication (ZMM).
9191ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteFMul64, [Zn4FPFMul01], 3, [1], 1>;  // Floating point double multiplication.
9201ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteFMul64X, [Zn4FPFMul01], 3, [1], 1>; // Floating point double multiplication (XMM).
9211ac55f4cSDimitry Andricdefm : Zn4WriteResYMMPair<WriteFMul64Y, [Zn4FPFMul01], 3, [1], 1>; // Floating point double multiplication (YMM).
9221ac55f4cSDimitry Andricdefm : Zn4WriteResZMMPair<WriteFMul64Z, [Zn4FPFMul01], 3, [2], 1>; // Floating point double multiplication (ZMM).
9231ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteFDiv, [Zn4FPFDiv], 11, [3], 1>;  // Floating point division.
9241ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteFDivX, [Zn4FPFDiv], 11, [3], 1>; // Floating point division (XMM).
9251ac55f4cSDimitry Andricdefm : Zn4WriteResYMMPair<WriteFDivY, [Zn4FPFDiv], 11, [3], 1>; // Floating point division (YMM).
9261ac55f4cSDimitry Andricdefm : Zn4WriteResZMMPair<WriteFDivZ, [Zn4FPFDiv], 11, [6], 1>; // Floating point division (ZMM).
9271ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteFDiv64, [Zn4FPFDiv], 13, [5], 1>;  // Floating point double division.
9281ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteFDiv64X, [Zn4FPFDiv], 13, [5], 1>; // Floating point double division (XMM).
9291ac55f4cSDimitry Andricdefm : Zn4WriteResYMMPair<WriteFDiv64Y, [Zn4FPFDiv], 13, [5], 1>; // Floating point double division (YMM).
9301ac55f4cSDimitry Andricdefm : Zn4WriteResZMMPair<WriteFDiv64Z, [Zn4FPFDiv], 13, [10], 1>; // Floating point double division (ZMM).
9311ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteFSqrt, [Zn4FPFDiv], 15, [5], 1>;   // Floating point square root.
9321ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteFSqrtX, [Zn4FPFDiv], 15, [5], 1>;  // Floating point square root (XMM).
9331ac55f4cSDimitry Andricdefm : Zn4WriteResYMMPair<WriteFSqrtY, [Zn4FPFDiv], 15, [5], 1>;  // Floating point square root (YMM).
9341ac55f4cSDimitry Andricdefm : Zn4WriteResZMMPair<WriteFSqrtZ, [Zn4FPFDiv], 15, [10], 1>;  // Floating point square root (ZMM).
9351ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteFSqrt64, [Zn4FPFDiv], 21, [9], 1>;  // Floating point double square root.
9361ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteFSqrt64X, [Zn4FPFDiv], 21, [9], 1>; // Floating point double square root (XMM).
9371ac55f4cSDimitry Andricdefm : Zn4WriteResYMMPair<WriteFSqrt64Y, [Zn4FPFDiv], 21, [9], 1>; // Floating point double square root (YMM).
9381ac55f4cSDimitry Andricdefm : Zn4WriteResZMMPair<WriteFSqrt64Z, [Zn4FPFDiv], 21, [18], 1>; // Floating point double square root (ZMM).
9391ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteFSqrt80, [Zn4FPFDiv], 22, [23], 1>; // FIXME: latency not from llvm-exegesis  // Floating point long double square root.
9401ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteFRcp, [Zn4FPFMul01], 4, [1], 1>;  // Floating point reciprocal estimate.
9411ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteFRcpX, [Zn4FPFMul01], 4, [1], 1>; // Floating point reciprocal estimate (XMM).
9421ac55f4cSDimitry Andricdefm : Zn4WriteResYMMPair<WriteFRcpY, [Zn4FPFMul01], 5, [1], 1>; // Floating point reciprocal estimate (YMM).
9431ac55f4cSDimitry Andricdefm : Zn4WriteResZMMPair<WriteFRcpZ, [Zn4FPFMul01], 5, [2], 1>; // Floating point reciprocal estimate (ZMM).
9441ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteFRsqrt, [Zn4FPFDiv], 4, [1], 1>;  // Floating point reciprocal square root estimate.
9451ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteFRsqrtX, [Zn4FPFDiv], 4, [1], 1>; // Floating point reciprocal square root estimate (XMM).
9461ac55f4cSDimitry Andricdefm : Zn4WriteResYMMPair<WriteFRsqrtY, [Zn4FPFDiv], 4, [1], 1>; // Floating point reciprocal square root estimate (YMM).
9471ac55f4cSDimitry Andricdefm : Zn4WriteResZMMPair<WriteFRsqrtZ, [Zn4FPFDiv], 5, [2], 1>; // Floating point reciprocal square root estimate (ZMM).
9481ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteFMA, [Zn4FPFMul01], 4, [2], 1>;  // Fused Multiply Add.
94906c3fb27SDimitry Andricdefm : Zn4WriteResXMMPair<WriteFMAX, [Zn4FPFMul01], 4, [1], 1>; // Fused Multiply Add (XMM).
95006c3fb27SDimitry Andricdefm : Zn4WriteResYMMPair<WriteFMAY, [Zn4FPFMul01], 4, [1], 1>; // Fused Multiply Add (YMM).
95106c3fb27SDimitry Andricdefm : Zn4WriteResZMMPair<WriteFMAZ, [Zn4FPFMul01], 4, [2], 1>; // Fused Multiply Add (ZMM).
9521ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteDPPD, [Zn4FPFMul01], 7, [6], 3, /*LoadUOps=*/2>; // Floating point double dot product.
9531ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteDPPS, [Zn4FPFMul01], 11, [8], 8, /*LoadUOps=*/2>; // Floating point single dot product.
9541ac55f4cSDimitry Andricdefm : Zn4WriteResYMMPair<WriteDPPSY, [Zn4FPFMul01], 11, [8], 7, /*LoadUOps=*/1>; // Floating point single dot product (YMM).
9551ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteFSign, [Zn4FPFMul01], 1, [2], 1>; // FIXME: latency not from llvm-exegesis  // Floating point fabs/fchs.
9561ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteFRnd, [Zn4FPFCvt01], 3, [1], 1>; // Floating point rounding.
9571ac55f4cSDimitry Andricdefm : Zn4WriteResYMMPair<WriteFRndY, [Zn4FPFCvt01], 3, [1], 1>; // Floating point rounding (YMM).
9581ac55f4cSDimitry Andricdefm : Zn4WriteResZMMPair<WriteFRndZ, [Zn4FPFCvt01], 3, [2], 1>; // Floating point rounding (ZMM).
9591ac55f4cSDimitry Andric
9601ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteFLogic, [Zn4FPVMisc0123], 1, [1], 1>; // Floating point and/or/xor logicals.
9611ac55f4cSDimitry Andricdefm : Zn4WriteResYMMPair<WriteFLogicY, [Zn4FPVMisc0123], 1, [1], 1>; // Floating point and/or/xor logicals (YMM).
9621ac55f4cSDimitry Andricdefm : Zn4WriteResZMMPair<WriteFLogicZ, [Zn4FPVMisc0123], 1, [2], 1>; // Floating point and/or/xor logicals (ZMM).
9631ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteFTest, [Zn4FPFMisc12], 1, [2], 2>; // FIXME: latency not from llvm-exegesis // Floating point TEST instructions.
9641ac55f4cSDimitry Andricdefm : Zn4WriteResYMMPair<WriteFTestY, [Zn4FPFMisc12], 1, [2], 2>; // FIXME: latency not from llvm-exegesis // Floating point TEST instructions (YMM).
9651ac55f4cSDimitry Andricdefm : Zn4WriteResZMMPair<WriteFTestZ, [Zn4FPFMisc12], 1, [4], 1>; // FIXME: latency not from llvm-exegesis // Floating point TEST instructions (ZMM).
9661ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteFShuffle, [Zn4FPVShuf01], 1, [1], 1>; // Floating point vector shuffles.
9671ac55f4cSDimitry Andricdefm : Zn4WriteResYMMPair<WriteFShuffleY, [Zn4FPVShuf01], 1, [1], 1>; // Floating point vector shuffles (YMM).
9681ac55f4cSDimitry Andricdefm : Zn4WriteResZMMPair<WriteFShuffleZ, [Zn4FPVShuf01], 1, [2], 1>; // Floating point vector shuffles (ZMM).
9691ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteFVarShuffle, [Zn4FPVShuf01], 3, [1], 1>; // Floating point vector variable shuffles.
9701ac55f4cSDimitry Andricdefm : Zn4WriteResYMMPair<WriteFVarShuffleY, [Zn4FPVShuf01], 3, [1], 1>; // Floating point vector variable shuffles (YMM).
9711ac55f4cSDimitry Andricdefm : Zn4WriteResZMMPair<WriteFVarShuffleZ, [Zn4FPVShuf01], 3, [2], 1>; // Floating point vector variable shuffles (ZMM).
9721ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteFBlend, [Zn4FPFMul01], 1, [1], 1>; // Floating point vector blends.
9731ac55f4cSDimitry Andricdefm : Zn4WriteResYMMPair<WriteFBlendY, [Zn4FPFMul01], 1, [1], 1>; // Floating point vector blends (YMM).
9741ac55f4cSDimitry Andricdefm : Zn4WriteResZMMPair<WriteFBlendZ, [Zn4FPFMul01], 1, [2], 1>; // Floating point vector blends (ZMM).
9751ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteFVarBlend, [Zn4FPFMul01], 1, [1], 1>; // Fp vector variable blends.
9761ac55f4cSDimitry Andricdefm : Zn4WriteResYMMPair<WriteFVarBlendY, [Zn4FPFMul01], 1, [1], 1>; // Fp vector variable blends (YMM).
9771ac55f4cSDimitry Andricdefm : Zn4WriteResZMMPair<WriteFVarBlendZ, [Zn4FPFMul01], 1, [2], 1>; // Fp vector variable blends (ZMM).
9781ac55f4cSDimitry Andric
9791ac55f4cSDimitry Andric// Horizontal Add/Sub (float and integer)
9801ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteFHAdd, [Zn4FPFAdd0], 4, [2], 3>;
9811ac55f4cSDimitry Andricdefm : Zn4WriteResYMMPair<WriteFHAddY, [Zn4FPFAdd0], 4, [2], 3, /*LoadUOps=*/1>;
9821ac55f4cSDimitry Andricdefm : Zn4WriteResZMMPair<WriteFHAddZ, [Zn4FPFAdd0], 6, [4], 3, /*LoadUOps=*/1>;
9831ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WritePHAdd, [Zn4FPVAdd0], 2, [2], 3, /*LoadUOps=*/1>;
9841ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WritePHAddX, [Zn4FPVAdd0], 2, [2], 3>;
9851ac55f4cSDimitry Andricdefm : Zn4WriteResYMMPair<WritePHAddY, [Zn4FPVAdd0], 3, [3], 3, /*LoadUOps=*/1>;
9861ac55f4cSDimitry Andricdefm : Zn4WriteResZMMPair<WritePHAddZ, [Zn4FPVAdd0], 2, [4], 3, /*LoadUOps=*/1>;
9871ac55f4cSDimitry Andric
9881ac55f4cSDimitry Andric// Vector integer operations.
9891ac55f4cSDimitry Andricdefm : Zn4WriteResXMM<WriteVecLoad, [Zn4FPLd01, Zn4Load], !add(Znver4Model.VecLoadLatency, 1), [1, 1], 1>;
9901ac55f4cSDimitry Andricdefm : Zn4WriteResXMM<WriteVecLoadX, [Zn4FPLd01, Zn4Load], !add(Znver4Model.VecLoadLatency, 1), [1, 1], 1>;
9911ac55f4cSDimitry Andricdefm : Zn4WriteResYMM<WriteVecLoadY, [Zn4FPLd01, Zn4Load], !add(Znver4Model.VecLoadLatency, 1), [1, 1], 1>;
9921ac55f4cSDimitry Andricdefm : Zn4WriteResXMM<WriteVecLoadNT, [Zn4FPLd01, Zn4Load], !add(Znver4Model.VecLoadLatency, 1), [1, 1], 1>;
9931ac55f4cSDimitry Andricdefm : Zn4WriteResYMM<WriteVecLoadNTY, [Zn4FPLd01, Zn4Load], !add(Znver4Model.VecLoadLatency, 1), [1, 1], 1>;
9941ac55f4cSDimitry Andricdefm : Zn4WriteResXMM<WriteVecMaskedLoad, [Zn4FPLd01, Zn4Load], !add(Znver4Model.VecLoadLatency, 1), [1, 1], 1>;
9951ac55f4cSDimitry Andricdefm : Zn4WriteResYMM<WriteVecMaskedLoadY, [Zn4FPLd01, Zn4Load], !add(Znver4Model.VecLoadLatency, 1), [1, 1], 1>;
9961ac55f4cSDimitry Andricdefm : Zn4WriteResXMM<WriteVecStore, [Zn4FPSt, Zn4Store], Znver4Model.StoreLatency, [1, 1], 1>;
9971ac55f4cSDimitry Andricdefm : Zn4WriteResXMM<WriteVecStoreX, [Zn4FPSt, Zn4Store], Znver4Model.StoreLatency, [1, 1], 1>;
9981ac55f4cSDimitry Andric
9991ac55f4cSDimitry Andricdef Zn4WriteVEXTRACTF128rr_VEXTRACTI128rr : SchedWriteRes<[Zn4FPFMisc0]> {
10001ac55f4cSDimitry Andric  let Latency = 4;
10015f757f3fSDimitry Andric  let ReleaseAtCycles = [1];
10021ac55f4cSDimitry Andric  let NumMicroOps = 1;
10031ac55f4cSDimitry Andric}
10041ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteVEXTRACTF128rr_VEXTRACTI128rr], (instrs VEXTRACTF128rr, VEXTRACTI128rr)>;
10051ac55f4cSDimitry Andric
10061ac55f4cSDimitry Andricdef Zn4WriteVEXTRACTI128mr : SchedWriteRes<[Zn4FPFMisc0, Zn4FPSt, Zn4Store]> {
10071ac55f4cSDimitry Andric  let Latency = !add(Znver4Model.LoadLatency, Zn4WriteVEXTRACTF128rr_VEXTRACTI128rr.Latency);
10085f757f3fSDimitry Andric  let ReleaseAtCycles = [1, 1, 1];
10091ac55f4cSDimitry Andric  let NumMicroOps = !add(Zn4WriteVEXTRACTF128rr_VEXTRACTI128rr.NumMicroOps, 1);
10101ac55f4cSDimitry Andric}
10111ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteVEXTRACTI128mr], (instrs VEXTRACTI128mr, VEXTRACTF128mr)>;
10121ac55f4cSDimitry Andric
10131ac55f4cSDimitry Andricdef Zn4WriteVINSERTF128rmr : SchedWriteRes<[Zn4AGU012, Zn4Load, Zn4FPFMisc0]> {
10141ac55f4cSDimitry Andric  let Latency = !add(Znver4Model.LoadLatency, Zn4WriteVEXTRACTF128rr_VEXTRACTI128rr.Latency);
10155f757f3fSDimitry Andric  let ReleaseAtCycles = [1, 1, 1];
10161ac55f4cSDimitry Andric  let NumMicroOps = !add(Zn4WriteVEXTRACTF128rr_VEXTRACTI128rr.NumMicroOps, 0);
10171ac55f4cSDimitry Andric}
10181ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteVINSERTF128rmr], (instrs VINSERTF128rm)>;
10191ac55f4cSDimitry Andric
10201ac55f4cSDimitry Andricdefm : Zn4WriteResYMM<WriteVecStoreY, [Zn4FPSt, Zn4Store], Znver4Model.StoreLatency, [1, 1], 1>;
10211ac55f4cSDimitry Andricdefm : Zn4WriteResXMM<WriteVecStoreNT, [Zn4FPSt, Zn4Store], Znver4Model.StoreLatency, [1, 1], 1>;
10221ac55f4cSDimitry Andricdefm : Zn4WriteResYMM<WriteVecStoreNTY, [Zn4FPSt, Zn4Store], Znver4Model.StoreLatency, [1, 1], 1>;
10231ac55f4cSDimitry Andricdefm : Zn4WriteResXMM<WriteVecMaskedStore32, [Zn4FPSt, Zn4Store], Znver4Model.StoreLatency, [6, 1], 18>;
10241ac55f4cSDimitry Andricdefm : Zn4WriteResXMM<WriteVecMaskedStore64, [Zn4FPSt, Zn4Store], Znver4Model.StoreLatency, [4, 1], 10>;
10251ac55f4cSDimitry Andricdefm : Zn4WriteResYMM<WriteVecMaskedStore32Y, [Zn4FPSt, Zn4Store], Znver4Model.StoreLatency, [12, 1], 42>;
10261ac55f4cSDimitry Andricdefm : Zn4WriteResYMM<WriteVecMaskedStore64Y, [Zn4FPSt, Zn4Store], Znver4Model.StoreLatency, [6, 1], 18>;
10271ac55f4cSDimitry Andric
10281ac55f4cSDimitry Andricdefm : Zn4WriteResXMM<WriteVecMoveToGpr, [Zn4FPLd01], 1, [2], 1>;
10291ac55f4cSDimitry Andricdefm : Zn4WriteResXMM<WriteVecMoveFromGpr, [Zn4FPLd01], 1, [2], 1>;
10301ac55f4cSDimitry Andric
10311ac55f4cSDimitry Andricdef Zn4WriteMOVMMX : SchedWriteRes<[Zn4FPLd01, Zn4FPFMisc0123]> {
10321ac55f4cSDimitry Andric  let Latency = 1;
10335f757f3fSDimitry Andric  let ReleaseAtCycles = [1, 2];
10341ac55f4cSDimitry Andric  let NumMicroOps = 2;
10351ac55f4cSDimitry Andric}
10361ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteMOVMMX], (instrs MMX_MOVQ2FR64rr, MMX_MOVQ2DQrr)>;
10371ac55f4cSDimitry Andric
10381ac55f4cSDimitry Andricdef Zn4WriteMOVMMXSlow : SchedWriteRes<[Zn4FPLd01, Zn4FPFMisc0123]> {
10391ac55f4cSDimitry Andric  let Latency = 1;
10405f757f3fSDimitry Andric  let ReleaseAtCycles = [1, 4];
10411ac55f4cSDimitry Andric  let NumMicroOps = 2;
10421ac55f4cSDimitry Andric}
10431ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteMOVMMXSlow], (instrs MMX_MOVD64rr, MMX_MOVD64to64rr)>;
10441ac55f4cSDimitry Andric
10451ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteVecALU, [Zn4FPVAdd0123], 1, [1], 1>;  // Vector integer ALU op, no logicals.
10461ac55f4cSDimitry Andric
10471ac55f4cSDimitry Andricdef Zn4WriteEXTRQ_INSERTQ : SchedWriteRes<[Zn4FPVShuf01, Zn4FPLd01]> {
10481ac55f4cSDimitry Andric  let Latency = 3;
10495f757f3fSDimitry Andric  let ReleaseAtCycles = [1, 1];
10501ac55f4cSDimitry Andric  let NumMicroOps = 1;
10511ac55f4cSDimitry Andric}
10521ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteEXTRQ_INSERTQ], (instrs EXTRQ, INSERTQ)>;
10531ac55f4cSDimitry Andric
10541ac55f4cSDimitry Andricdef Zn4WriteEXTRQI_INSERTQI : SchedWriteRes<[Zn4FPVShuf01, Zn4FPLd01]> {
10551ac55f4cSDimitry Andric  let Latency = 3;
10565f757f3fSDimitry Andric  let ReleaseAtCycles = [1, 1];
10571ac55f4cSDimitry Andric  let NumMicroOps = 2;
10581ac55f4cSDimitry Andric}
10591ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteEXTRQI_INSERTQI], (instrs EXTRQI, INSERTQI)>;
10601ac55f4cSDimitry Andric
10611ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteVecALUX, [Zn4FPVAdd0123], 1, [1], 1>; // Vector integer ALU op, no logicals (XMM).
10621ac55f4cSDimitry Andric
10631ac55f4cSDimitry Andricdef Zn4WriteVecALUXSlow : SchedWriteRes<[Zn4FPVAdd01]> {
10641ac55f4cSDimitry Andric  let Latency = 2;
10655f757f3fSDimitry Andric  let ReleaseAtCycles = [2];
10661ac55f4cSDimitry Andric  let NumMicroOps = 1;
10671ac55f4cSDimitry Andric}
10681ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteVecALUXSlow], (instrs PABSBrr, PABSDrr, PABSWrr,
10691ac55f4cSDimitry Andric                                            PADDSBrr, PADDSWrr, PADDUSBrr, PADDUSWrr,
10701ac55f4cSDimitry Andric                                            PAVGBrr, PAVGWrr,
10711ac55f4cSDimitry Andric                                            PSIGNBrr, PSIGNDrr, PSIGNWrr,
10721ac55f4cSDimitry Andric                                            VPABSBrr, VPABSDrr, VPABSWrr,
10731ac55f4cSDimitry Andric                                            VPADDSBrr, VPADDSWrr, VPADDUSBrr, VPADDUSWrr,
10741ac55f4cSDimitry Andric                                            VPAVGBrr, VPAVGWrr,
10751ac55f4cSDimitry Andric                                            VPCMPEQQrr,
10761ac55f4cSDimitry Andric                                            VPSIGNBrr, VPSIGNDrr, VPSIGNWrr,
10771ac55f4cSDimitry Andric                                            PSUBSBrr, PSUBSWrr, PSUBUSBrr, PSUBUSWrr, VPSUBSBrr, VPSUBSWrr, VPSUBUSBrr, VPSUBUSWrr)>;
10781ac55f4cSDimitry Andric
10791ac55f4cSDimitry Andricdef Zn4WriteVecOpMask : SchedWriteRes<[Zn4FPOpMask01]> {
10801ac55f4cSDimitry Andric  let Latency = 1;
10815f757f3fSDimitry Andric  let ReleaseAtCycles = [1];
10821ac55f4cSDimitry Andric  let NumMicroOps = 1;
10831ac55f4cSDimitry Andric}
10841ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteVecOpMask], (instrs   KADDBrr, KADDDrr, KADDQrr, KADDWrr,
10851ac55f4cSDimitry Andric                                            KANDBrr, KANDDrr, KANDQrr, KANDWrr,
10861ac55f4cSDimitry Andric                                            KANDNBrr, KANDNDrr, KANDNQrr, KANDNWrr,
10871ac55f4cSDimitry Andric                                            KMOVBkk, KMOVDkk, KMOVQkk, KMOVWkk,
10881ac55f4cSDimitry Andric                                            KMOVBrk, KMOVDrk, KMOVQrk, KMOVWrk,
10891ac55f4cSDimitry Andric                                            KNOTBrr, KNOTDrr, KNOTQrr, KNOTWrr,
10901ac55f4cSDimitry Andric                                            KORBrr, KORDrr, KORQrr, KORWrr,
10911ac55f4cSDimitry Andric                                            KORTESTBrr, KORTESTDrr, KORTESTQrr, KORTESTWrr,
10921ac55f4cSDimitry Andric                                            KTESTBrr, KTESTDrr, KTESTQrr, KTESTWrr,
10931ac55f4cSDimitry Andric                                            KUNPCKBWrr, KUNPCKDQrr, KUNPCKWDrr,
10941ac55f4cSDimitry Andric                                            KXNORBrr, KXNORDrr, KXNORQrr, KXNORWrr,
10951ac55f4cSDimitry Andric                                            KXORBrr, KXORDrr, KXORQrr, KXORWrr)>;
10961ac55f4cSDimitry Andric
10971ac55f4cSDimitry Andricdef Zn4WriteVecOpMaskMemMov : SchedWriteRes<[Zn4FPOpMask4]> {
10981ac55f4cSDimitry Andric  let Latency = 1;
10995f757f3fSDimitry Andric  let ReleaseAtCycles = [1];
11001ac55f4cSDimitry Andric  let NumMicroOps = 1;
11011ac55f4cSDimitry Andric}
11021ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteVecOpMaskMemMov], (instrs KMOVBmk, KMOVDmk, KMOVQmk, KMOVWmk)>;
11031ac55f4cSDimitry Andric
11041ac55f4cSDimitry Andricdef Zn4WriteVecOpMaskKRMov : SchedWriteRes<[Zn4FPOpMask4]> {
11051ac55f4cSDimitry Andric  let Latency = 1;
11065f757f3fSDimitry Andric  let ReleaseAtCycles = [1];
11071ac55f4cSDimitry Andric  let NumMicroOps = 1;
11081ac55f4cSDimitry Andric}
11091ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteVecOpMaskKRMov], (instrs KMOVBkr, KMOVDkr, KMOVQkr, KMOVWkr)>;
11101ac55f4cSDimitry Andric
11111ac55f4cSDimitry Andricdef Zn4WriteVecALU2Slow : SchedWriteRes<[Zn4FPVAdd12]> {
11121ac55f4cSDimitry Andric  // TODO: All align instructions are expected to be of 4 cycle latency
11131ac55f4cSDimitry Andric  let Latency = 4;
11145f757f3fSDimitry Andric  let ReleaseAtCycles = [1];
11151ac55f4cSDimitry Andric  let NumMicroOps = 1;
11161ac55f4cSDimitry Andric}
11171ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteVecALU2Slow], (instrs VALIGNDZrri, VALIGNDZ128rri, VALIGNDZ256rri,
11181ac55f4cSDimitry Andric                                            VALIGNQZrri, VALIGNQZ128rri, VALIGNQZ256rri)
11191ac55f4cSDimitry Andric                                            >;
11201ac55f4cSDimitry Andricdefm : Zn4WriteResYMMPair<WriteVecALUY, [Zn4FPVAdd0123], 1, [1], 1>; // Vector integer ALU op, no logicals (YMM).
11211ac55f4cSDimitry Andric
11221ac55f4cSDimitry Andricdef Zn4WriteVecALUYSlow : SchedWriteRes<[Zn4FPVAdd01]> {
11231ac55f4cSDimitry Andric  let Latency = 1;
11245f757f3fSDimitry Andric  let ReleaseAtCycles = [1];
11251ac55f4cSDimitry Andric  let NumMicroOps = 1;
11261ac55f4cSDimitry Andric}
11271ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteVecALUYSlow], (instrs VPABSBYrr, VPABSDYrr, VPABSWYrr,
11281ac55f4cSDimitry Andric                                            VPADDSBYrr, VPADDSWYrr, VPADDUSBYrr, VPADDUSWYrr,
11291ac55f4cSDimitry Andric                                            VPSUBSBYrr, VPSUBSWYrr, VPSUBUSBYrr, VPSUBUSWYrr,
11301ac55f4cSDimitry Andric                                            VPAVGBYrr, VPAVGWYrr,
11311ac55f4cSDimitry Andric                                            VPCMPEQQYrr,
11321ac55f4cSDimitry Andric                                            VPSIGNBYrr, VPSIGNDYrr, VPSIGNWYrr)>;
11331ac55f4cSDimitry Andric
11341ac55f4cSDimitry Andricdefm : Zn4WriteResZMMPair<WriteVecALUZ, [Zn4FPVAdd0123], 1, [2], 1>; // Vector integer ALU op, no logicals (ZMM).
11351ac55f4cSDimitry Andric
11361ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteVecLogic, [Zn4FPVMisc0123], 1, [1], 1>;  // Vector integer and/or/xor logicals.
11371ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteVecLogicX, [Zn4FPVMisc0123], 1, [1], 1>; // Vector integer and/or/xor logicals (XMM).
11381ac55f4cSDimitry Andricdefm : Zn4WriteResYMMPair<WriteVecLogicY, [Zn4FPVMisc0123], 1, [1], 1>; // Vector integer and/or/xor logicals (YMM).
11391ac55f4cSDimitry Andricdefm : Zn4WriteResZMMPair<WriteVecLogicZ, [Zn4FPVMisc0123], 1, [2], 1>; // Vector integer and/or/xor logicals (ZMM).
11401ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteVecTest, [Zn4FPVAdd12, Zn4FPSt], 1, [1, 1], 2>;  // FIXME: latency not from llvm-exegesis // Vector integer TEST instructions.
11411ac55f4cSDimitry Andricdefm : Zn4WriteResYMMPair<WriteVecTestY, [Zn4FPVAdd12, Zn4FPSt], 1, [1, 1], 2>; // FIXME: latency not from llvm-exegesis  // Vector integer TEST instructions (YMM).
11421ac55f4cSDimitry Andricdefm : Zn4WriteResZMMPair<WriteVecTestZ, [Zn4FPVAdd12, Zn4FPSt], 1, [2, 2], 2>; // FIXME: latency not from llvm-exegesis  // Vector integer TEST instructions (ZMM).
11431ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteVecShift, [Zn4FPVShift01], 1, [1], 1>;  // Vector integer shifts (default).
11441ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteVecShiftX, [Zn4FPVShift01], 2, [2], 1>; // Vector integer shifts (XMM).
11451ac55f4cSDimitry Andricdefm : Zn4WriteResYMMPair<WriteVecShiftY, [Zn4FPVShift01], 1, [1], 1>; // Vector integer shifts (YMM).
11461ac55f4cSDimitry Andricdefm : Zn4WriteResZMMPair<WriteVecShiftZ, [Zn4FPVShift01], 1, [2], 1>; // Vector integer shifts (ZMM).
11471ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteVecShiftImm, [Zn4FPVShift01], 1, [1], 1>;  // Vector integer immediate shifts (default).
11481ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteVecShiftImmX, [Zn4FPVShift01], 1, [1], 1>; // Vector integer immediate shifts (XMM).
11491ac55f4cSDimitry Andricdefm : Zn4WriteResYMMPair<WriteVecShiftImmY, [Zn4FPVShift01], 1, [1], 1>; // Vector integer immediate shifts (YMM).
11501ac55f4cSDimitry Andricdefm : Zn4WriteResZMMPair<WriteVecShiftImmZ, [Zn4FPVShift01], 1, [2], 1>; // Vector integer immediate shifts (ZMM).
11511ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteVecIMul, [Zn4FPVMul01], 3, [1], 1>;  // Vector integer multiply (default).
11521ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteVecIMulX, [Zn4FPVMul01], 3, [1], 1>; // Vector integer multiply (XMM).
11531ac55f4cSDimitry Andricdefm : Zn4WriteResYMMPair<WriteVecIMulY, [Zn4FPVMul01], 3, [1], 1>; // Vector integer multiply (YMM).
11541ac55f4cSDimitry Andricdefm : Zn4WriteResZMMPair<WriteVecIMulZ, [Zn4FPVMul01], 3, [2], 1>; // Vector integer multiply (ZMM).
11551ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WritePMULLD, [Zn4FPVMul01], 3, [1], 1>; // Vector PMULLD.
11561ac55f4cSDimitry Andricdefm : Zn4WriteResYMMPair<WritePMULLDY, [Zn4FPVMul01], 3, [1], 1>; // Vector PMULLD (YMM).
11571ac55f4cSDimitry Andricdefm : Zn4WriteResZMMPair<WritePMULLDZ, [Zn4FPVMul01], 3, [2], 1>; // Vector PMULLD (ZMM).
11581ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteShuffle, [Zn4FPVShuf01], 1, [1], 1>;  // Vector shuffles.
11591ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteShuffleX, [Zn4FPVShuf01], 1, [1], 1>; // Vector shuffles (XMM).
11601ac55f4cSDimitry Andricdefm : Zn4WriteResYMMPair<WriteShuffleY, [Zn4FPVShuf01], 1, [1], 1>; // Vector shuffles (YMM).
11611ac55f4cSDimitry Andricdefm : Zn4WriteResZMMPair<WriteShuffleZ, [Zn4FPVShuf01], 1, [2], 1>; // Vector shuffles (ZMM).
11621ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteVarShuffle, [Zn4FPVShuf01], 1, [1], 1>;  // Vector variable shuffles.
11631ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteVarShuffleX, [Zn4FPVShuf01], 1, [1], 1>; // Vector variable shuffles (XMM).
11641ac55f4cSDimitry Andricdefm : Zn4WriteResYMMPair<WriteVarShuffleY, [Zn4FPVShuf01], 1, [1], 1>; // Vector variable shuffles (YMM).
11651ac55f4cSDimitry Andricdefm : Zn4WriteResZMMPair<WriteVarShuffleZ, [Zn4FPVShuf01], 1, [2], 1>; // Vector variable shuffles (ZMM).
11661ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteBlend, [Zn4FPVMisc0123], 1, [1], 1>; // Vector blends.
11671ac55f4cSDimitry Andricdefm : Zn4WriteResYMMPair<WriteBlendY, [Zn4FPVMisc0123], 1, [1], 1>; // Vector blends (YMM).
11681ac55f4cSDimitry Andricdefm : Zn4WriteResZMMPair<WriteBlendZ, [Zn4FPVMisc0123], 1, [2], 1>; // Vector blends (ZMM).
11691ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteVarBlend, [Zn4FPVMul01], 1, [1], 1>; // Vector variable blends.
11701ac55f4cSDimitry Andricdefm : Zn4WriteResYMMPair<WriteVarBlendY, [Zn4FPVMul01], 1, [1], 1>; // Vector variable blends (YMM).
11711ac55f4cSDimitry Andricdefm : Zn4WriteResZMMPair<WriteVarBlendZ, [Zn4FPVMul01], 1, [2], 1>; // Vector variable blends (ZMM).
11721ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WritePSADBW, [Zn4FPVAdd0123], 3, [2], 1>;  // Vector PSADBW.
11731ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WritePSADBWX, [Zn4FPVAdd0123], 3, [2], 1>; // Vector PSADBW (XMM).
11741ac55f4cSDimitry Andricdefm : Zn4WriteResYMMPair<WritePSADBWY, [Zn4FPVAdd0123], 3, [2], 1>; // Vector PSADBW (YMM).
11751ac55f4cSDimitry Andricdefm : Zn4WriteResZMMPair<WritePSADBWZ, [Zn4FPVAdd0123], 4, [4], 1>; // Vector PSADBW (ZMM).
11761ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteMPSAD, [Zn4FPVAdd0123], 4, [8], 4, /*LoadUOps=*/2>; // Vector MPSAD.
11771ac55f4cSDimitry Andricdefm : Zn4WriteResYMMPair<WriteMPSADY, [Zn4FPVAdd0123], 4, [8], 3, /*LoadUOps=*/1>; // Vector MPSAD (YMM).
11781ac55f4cSDimitry Andricdefm : Zn4WriteResZMMPair<WriteMPSADZ, [Zn4FPVAdd0123], 4, [16], 3, /*LoadUOps=*/1>; // Vector MPSAD (ZMM).
11791ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WritePHMINPOS, [Zn4FPVAdd01], 3, [1], 1>;  // Vector PHMINPOS.
11801ac55f4cSDimitry Andric
11811ac55f4cSDimitry Andric// Vector insert/extract operations.
11821ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteVecInsert, [Zn4FPLd01], 1, [2], 2, /*LoadUOps=*/-1>; // Insert gpr to vector element.
11831ac55f4cSDimitry Andricdefm : Zn4WriteResXMM<WriteVecExtract, [Zn4FPLd01], 1, [2], 2>; // Extract vector element to gpr.
11841ac55f4cSDimitry Andricdefm : Zn4WriteResXMM<WriteVecExtractSt, [Zn4FPSt, Zn4Store], !add(1, Znver4Model.StoreLatency), [1, 1], 2>; // Extract vector element and store.
11851ac55f4cSDimitry Andric
11861ac55f4cSDimitry Andric// MOVMSK operations.
11871ac55f4cSDimitry Andricdefm : Zn4WriteResXMM<WriteFMOVMSK, [Zn4FPVMisc2], 1, [1], 1>;
11881ac55f4cSDimitry Andricdefm : Zn4WriteResXMM<WriteVecMOVMSK, [Zn4FPVMisc2], 1, [1], 1>;
11891ac55f4cSDimitry Andricdefm : Zn4WriteResYMM<WriteVecMOVMSKY, [Zn4FPVMisc2], 1, [1], 1>;
11901ac55f4cSDimitry Andricdefm : Zn4WriteResXMM<WriteMMXMOVMSK, [Zn4FPVMisc2], 1, [1], 1>;
11911ac55f4cSDimitry Andric
11921ac55f4cSDimitry Andric// Conversion between integer and float.
11931ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteCvtSD2I, [Zn4FPFCvt01], 1, [1], 1>;  // Double -> Integer.
11941ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteCvtPD2I, [Zn4FPFCvt01], 3, [2], 1>; // Double -> Integer (XMM).
11951ac55f4cSDimitry Andricdefm : Zn4WriteResYMMPair<WriteCvtPD2IY, [Zn4FPFCvt01], 3, [2], 2>; // Double -> Integer (YMM).
11961ac55f4cSDimitry Andricdefm : Zn4WriteResZMMPair<WriteCvtPD2IZ, [Zn4FPFCvt01], 3, [4], 2>; // Double -> Integer (ZMM).
11971ac55f4cSDimitry Andric
11981ac55f4cSDimitry Andricdef Zn4WriteCvtPD2IMMX : SchedWriteRes<[Zn4FPFCvt01]> {
11991ac55f4cSDimitry Andric  let Latency = 1;
12005f757f3fSDimitry Andric  let ReleaseAtCycles = [2];
12011ac55f4cSDimitry Andric  let NumMicroOps = 2;
12021ac55f4cSDimitry Andric}
12031ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteCvtSS2I, [Zn4FPFCvt01], 5, [5], 2>;  // Float -> Integer.
12041ac55f4cSDimitry Andric
12051ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteCvtPS2I, [Zn4FPFCvt01], 3, [1], 1>; // Float -> Integer (XMM).
12061ac55f4cSDimitry Andricdefm : Zn4WriteResYMMPair<WriteCvtPS2IY, [Zn4FPFCvt01], 4, [1], 1>; // Float -> Integer (YMM).
12071ac55f4cSDimitry Andricdefm : Zn4WriteResZMMPair<WriteCvtPS2IZ, [Zn4FPFCvt01], 4, [2], 2>; // Float -> Integer (ZMM).
12081ac55f4cSDimitry Andric
12091ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteCvtI2SD, [Zn4FPFCvt01], 4, [2], 2, /*LoadUOps=*/-1>;  // Integer -> Double.
12101ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteCvtI2PD, [Zn4FPFCvt01], 3, [1], 1>; // Integer -> Double (XMM).
12111ac55f4cSDimitry Andricdefm : Zn4WriteResYMMPair<WriteCvtI2PDY, [Zn4FPFCvt01], 3, [2], 2, /*LoadUOps=*/-1>; // Integer -> Double (YMM).
12121ac55f4cSDimitry Andricdefm : Zn4WriteResZMMPair<WriteCvtI2PDZ, [Zn4FPFCvt01], 4, [4], 4, /*LoadUOps=*/-1>; // Integer -> Double (ZMM).
12131ac55f4cSDimitry Andric
12141ac55f4cSDimitry Andricdef Zn4WriteCvtI2PDMMX : SchedWriteRes<[Zn4FPFCvt01]> {
12151ac55f4cSDimitry Andric  let Latency = 2;
12165f757f3fSDimitry Andric  let ReleaseAtCycles = [6];
12171ac55f4cSDimitry Andric  let NumMicroOps = 2;
12181ac55f4cSDimitry Andric}
12191ac55f4cSDimitry Andric
12201ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteCvtI2SS, [Zn4FPFCvt01], 3, [2], 2, /*LoadUOps=*/-1>;  // Integer -> Float.
12211ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteCvtI2PS, [Zn4FPFCvt01], 3, [1], 1>; // Integer -> Float (XMM).
12221ac55f4cSDimitry Andricdefm : Zn4WriteResYMMPair<WriteCvtI2PSY, [Zn4FPFCvt01], 3, [1], 1>; // Integer -> Float (YMM).
12231ac55f4cSDimitry Andricdefm : Zn4WriteResZMMPair<WriteCvtI2PSZ, [Zn4FPFCvt01], 3, [2], 2>; // Integer -> Float (ZMM).
12241ac55f4cSDimitry Andric
12251ac55f4cSDimitry Andricdef Zn4WriteCvtI2PSMMX : SchedWriteRes<[Zn4FPFCvt01]> {
12261ac55f4cSDimitry Andric  let Latency = 3;
12275f757f3fSDimitry Andric  let ReleaseAtCycles = [1];
12281ac55f4cSDimitry Andric  let NumMicroOps = 2;
12291ac55f4cSDimitry Andric}
12301ac55f4cSDimitry Andric
12311ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteCvtSS2SD, [Zn4FPFCvt01], 3, [1], 1>;  // Float -> Double size conversion.
12321ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteCvtPS2PD, [Zn4FPFCvt01], 3, [1], 1>; // Float -> Double size conversion (XMM).
12331ac55f4cSDimitry Andricdefm : Zn4WriteResYMMPair<WriteCvtPS2PDY, [Zn4FPFCvt01], 4, [2], 2, /*LoadUOps=*/-1>; // Float -> Double size conversion (YMM).
12341ac55f4cSDimitry Andricdefm : Zn4WriteResZMMPair<WriteCvtPS2PDZ, [Zn4FPFCvt01], 6, [4], 4, /*LoadUOps=*/-1>; // Float -> Double size conversion (ZMM).
12351ac55f4cSDimitry Andric
12361ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteCvtSD2SS, [Zn4FPFCvt01], 3, [1], 1>;  // Double -> Float size conversion.
12371ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteCvtPD2PS, [Zn4FPFCvt01], 3, [1], 1>; // Double -> Float size conversion (XMM).
12381ac55f4cSDimitry Andricdefm : Zn4WriteResYMMPair<WriteCvtPD2PSY, [Zn4FPFCvt01], 6, [2], 2>; // Double -> Float size conversion (YMM).
12391ac55f4cSDimitry Andricdefm : Zn4WriteResZMMPair<WriteCvtPD2PSZ, [Zn4FPFCvt01], 6, [4], 4>; // Double -> Float size conversion (ZMM).
12401ac55f4cSDimitry Andric
12411ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteCvtPH2PS, [Zn4FPFCvt01], 3, [1], 1>; // Half -> Float size conversion.
12421ac55f4cSDimitry Andricdefm : Zn4WriteResYMMPair<WriteCvtPH2PSY, [Zn4FPFCvt01], 4, [2], 2, /*LoadUOps=*/-1>; // Half -> Float size conversion (YMM).
12431ac55f4cSDimitry Andricdefm : Zn4WriteResZMMPair<WriteCvtPH2PSZ, [Zn4FPFCvt01], 4, [4], 4, /*LoadUOps=*/-1>; // Half -> Float size conversion (ZMM).
12441ac55f4cSDimitry Andric
12451ac55f4cSDimitry Andricdefm : Zn4WriteResXMM<WriteCvtPS2PH, [Zn4FPFCvt01], 3, [2], 1>; // Float -> Half size conversion.
12461ac55f4cSDimitry Andricdefm : Zn4WriteResYMM<WriteCvtPS2PHY, [Zn4FPFCvt01], 6, [2], 2>; // Float -> Half size conversion (YMM).
12471ac55f4cSDimitry Andricdefm : Zn4WriteResZMM<WriteCvtPS2PHZ, [Zn4FPFCvt01], 6, [2], 2>; // Float -> Half size conversion (ZMM).
12481ac55f4cSDimitry Andric
12491ac55f4cSDimitry Andricdefm : Zn4WriteResXMM<WriteCvtPS2PHSt, [Zn4FPFCvt01, Zn4FPSt, Zn4Store], !add(3, Znver4Model.StoreLatency), [1, 1, 1], 2>; // Float -> Half + store size conversion.
12501ac55f4cSDimitry Andricdefm : Zn4WriteResYMM<WriteCvtPS2PHYSt, [Zn4FPFCvt01, Zn4FPSt, Zn4Store], !add(6, Znver4Model.StoreLatency), [2, 1, 1], 3>; // Float -> Half + store size conversion (YMM).
12511ac55f4cSDimitry Andricdefm : Zn4WriteResYMM<WriteCvtPS2PHZSt, [Zn4FPFCvt01, Zn4FPSt, Zn4Store], !add(6, Znver4Model.StoreLatency), [2, 1, 1], 3>; // Float -> Half + store size conversion (ZMM).
12521ac55f4cSDimitry Andric
12531ac55f4cSDimitry Andric// CRC32 instruction.
12541ac55f4cSDimitry Andricdefm : Zn4WriteResIntPair<WriteCRC32, [Zn4ALU1], 3, [1], 1>;
12551ac55f4cSDimitry Andric
12561ac55f4cSDimitry Andricdef Zn4WriteSHA1MSG1rr : SchedWriteRes<[Zn4FPU0123]> {
12571ac55f4cSDimitry Andric  let Latency = 2;
12585f757f3fSDimitry Andric  let ReleaseAtCycles = [2];
12591ac55f4cSDimitry Andric  let NumMicroOps = 2;
12601ac55f4cSDimitry Andric}
12611ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteSHA1MSG1rr], (instrs SHA1MSG1rr)>;
12621ac55f4cSDimitry Andric
12631ac55f4cSDimitry Andricdef Zn4WriteSHA1MSG1rm : SchedWriteRes<[Zn4AGU012, Zn4Load, Zn4FPU0123]> {
12641ac55f4cSDimitry Andric  let Latency = !add(Znver4Model.LoadLatency, Zn4WriteSHA1MSG1rr.Latency);
12655f757f3fSDimitry Andric  let ReleaseAtCycles = [1, 1, 2];
12661ac55f4cSDimitry Andric  let NumMicroOps = !add(Zn4WriteSHA1MSG1rr.NumMicroOps, 0);
12671ac55f4cSDimitry Andric}
12681ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteSHA1MSG1rm], (instrs SHA1MSG1rm)>;
12691ac55f4cSDimitry Andric
12701ac55f4cSDimitry Andricdef Zn4WriteSHA1MSG2rr_SHA1NEXTErr : SchedWriteRes<[Zn4FPU0123]> {
12711ac55f4cSDimitry Andric  let Latency = 1;
12725f757f3fSDimitry Andric  let ReleaseAtCycles = [2];
12731ac55f4cSDimitry Andric  let NumMicroOps = 1;
12741ac55f4cSDimitry Andric}
12751ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteSHA1MSG2rr_SHA1NEXTErr], (instrs SHA1MSG2rr, SHA1NEXTErr)>;
12761ac55f4cSDimitry Andric
12771ac55f4cSDimitry Andricdef Zn4Writerm_SHA1MSG2rm_SHA1NEXTErm : SchedWriteRes<[Zn4AGU012, Zn4Load, Zn4FPU0123]> {
12781ac55f4cSDimitry Andric  let Latency = !add(Znver4Model.LoadLatency, Zn4WriteSHA1MSG2rr_SHA1NEXTErr.Latency);
12795f757f3fSDimitry Andric  let ReleaseAtCycles = [1, 1, 2];
12801ac55f4cSDimitry Andric  let NumMicroOps = !add(Zn4WriteSHA1MSG2rr_SHA1NEXTErr.NumMicroOps, 0);
12811ac55f4cSDimitry Andric}
12821ac55f4cSDimitry Andricdef : InstRW<[Zn4Writerm_SHA1MSG2rm_SHA1NEXTErm], (instrs SHA1MSG2rm, SHA1NEXTErm)>;
12831ac55f4cSDimitry Andric
12841ac55f4cSDimitry Andricdef Zn4WriteSHA256MSG1rr : SchedWriteRes<[Zn4FPU0123]> {
12851ac55f4cSDimitry Andric  let Latency = 2;
12865f757f3fSDimitry Andric  let ReleaseAtCycles = [3];
12871ac55f4cSDimitry Andric  let NumMicroOps = 2;
12881ac55f4cSDimitry Andric}
12891ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteSHA256MSG1rr], (instrs SHA256MSG1rr)>;
12901ac55f4cSDimitry Andric
12911ac55f4cSDimitry Andricdef Zn4Writerm_SHA256MSG1rm : SchedWriteRes<[Zn4AGU012, Zn4Load, Zn4FPU0123]> {
12921ac55f4cSDimitry Andric  let Latency = !add(Znver4Model.LoadLatency, Zn4WriteSHA256MSG1rr.Latency);
12935f757f3fSDimitry Andric  let ReleaseAtCycles = [1, 1, 3];
12941ac55f4cSDimitry Andric  let NumMicroOps = !add(Zn4WriteSHA256MSG1rr.NumMicroOps, 0);
12951ac55f4cSDimitry Andric}
12961ac55f4cSDimitry Andricdef : InstRW<[Zn4Writerm_SHA256MSG1rm], (instrs SHA256MSG1rm)>;
12971ac55f4cSDimitry Andric
12981ac55f4cSDimitry Andricdef Zn4WriteSHA256MSG2rr : SchedWriteRes<[Zn4FPU0123]> {
12991ac55f4cSDimitry Andric  let Latency = 3;
13005f757f3fSDimitry Andric  let ReleaseAtCycles = [8];
13011ac55f4cSDimitry Andric  let NumMicroOps = 4;
13021ac55f4cSDimitry Andric}
13031ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteSHA256MSG2rr], (instrs SHA256MSG2rr)>;
13041ac55f4cSDimitry Andric
13051ac55f4cSDimitry Andricdef Zn4WriteSHA256MSG2rm : SchedWriteRes<[Zn4AGU012, Zn4Load, Zn4FPU0123]> {
13061ac55f4cSDimitry Andric  let Latency = !add(Znver4Model.LoadLatency, Zn4WriteSHA256MSG2rr.Latency);
13075f757f3fSDimitry Andric  let ReleaseAtCycles = [1, 1, 8];
13081ac55f4cSDimitry Andric  let NumMicroOps = !add(Zn4WriteSHA256MSG2rr.NumMicroOps, 1);
13091ac55f4cSDimitry Andric}
13101ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteSHA256MSG2rm], (instrs SHA256MSG2rm)>;
13111ac55f4cSDimitry Andric
13121ac55f4cSDimitry Andricdef Zn4WriteSHA1RNDS4rri : SchedWriteRes<[Zn4FPU0123]> {
13131ac55f4cSDimitry Andric  let Latency = 6;
13145f757f3fSDimitry Andric  let ReleaseAtCycles = [8];
13151ac55f4cSDimitry Andric  let NumMicroOps = 1;
13161ac55f4cSDimitry Andric}
13171ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteSHA1RNDS4rri], (instrs SHA1RNDS4rri)>;
13181ac55f4cSDimitry Andric
13191ac55f4cSDimitry Andricdef Zn4WriteSHA256RNDS2rr : SchedWriteRes<[Zn4FPU0123]> {
13201ac55f4cSDimitry Andric  let Latency = 4;
13215f757f3fSDimitry Andric  let ReleaseAtCycles = [8];
13221ac55f4cSDimitry Andric  let NumMicroOps = 1;
13231ac55f4cSDimitry Andric}
13241ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteSHA256RNDS2rr], (instrs SHA256RNDS2rr)>;
13251ac55f4cSDimitry Andric
13261ac55f4cSDimitry Andric// Strings instructions.
13271ac55f4cSDimitry Andric// Packed Compare Implicit Length Strings, Return Mask
13281ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WritePCmpIStrM, [Zn4FPVAdd0123], 6, [8], 3, /*LoadUOps=*/1>;
13291ac55f4cSDimitry Andric// Packed Compare Explicit Length Strings, Return Mask
13301ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WritePCmpEStrM, [Zn4FPVAdd0123], 6, [12], 7, /*LoadUOps=*/5>;
13311ac55f4cSDimitry Andric// Packed Compare Implicit Length Strings, Return Index
13321ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WritePCmpIStrI, [Zn4FPVAdd0123], 2, [8], 4>;
13331ac55f4cSDimitry Andric// Packed Compare Explicit Length Strings, Return Index
13341ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WritePCmpEStrI, [Zn4FPVAdd0123], 6, [12], 8, /*LoadUOps=*/4>;
13351ac55f4cSDimitry Andric
13361ac55f4cSDimitry Andric// AES instructions.
13371ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteAESDecEnc, [Zn4FPAES01], 4, [1], 1>; // Decryption, encryption.
13381ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteAESIMC, [Zn4FPAES01], 4, [1], 1>; // InvMixColumn.
13391ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteAESKeyGen, [Zn4FPAES01], 4, [1], 1>; // Key Generation.
13401ac55f4cSDimitry Andric
13411ac55f4cSDimitry Andric// Carry-less multiplication instructions.
13421ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteCLMul, [Zn4FPCLM01], 4, [4], 4>;
13431ac55f4cSDimitry Andric
13441ac55f4cSDimitry Andric// EMMS/FEMMS
13451ac55f4cSDimitry Andricdefm : Zn4WriteResInt<WriteEMMS, [Zn4ALU0123], 2, [1], 1>; // FIXME: latency not from llvm-exegesis
13461ac55f4cSDimitry Andric
13471ac55f4cSDimitry Andric// Load/store MXCSR
13481ac55f4cSDimitry Andricdefm : Zn4WriteResInt<WriteLDMXCSR, [Zn4AGU012, Zn4Load, Zn4ALU0123], !add(Znver4Model.LoadLatency, 1), [1, 1, 6], 1>; // FIXME: latency not from llvm-exegesis
13491ac55f4cSDimitry Andricdefm : Zn4WriteResInt<WriteSTMXCSR, [Zn4ALU0123, Zn4AGU012, Zn4Store], !add(1, Znver4Model.StoreLatency), [60, 1, 1], 2>; // FIXME: latency not from llvm-exegesis
13501ac55f4cSDimitry Andric
13511ac55f4cSDimitry Andric// Catch-all for expensive system instructions.
13521ac55f4cSDimitry Andricdefm : Zn4WriteResInt<WriteSystem, [Zn4ALU0123], 100, [100], 100>;
13531ac55f4cSDimitry Andric
13541ac55f4cSDimitry Andricdef Zn4WriteVZEROUPPER : SchedWriteRes<[Zn4FPU0123]> {
13551ac55f4cSDimitry Andric  let Latency = 0; // FIXME: not from llvm-exegesis
13565f757f3fSDimitry Andric  let ReleaseAtCycles = [1];
13571ac55f4cSDimitry Andric  let NumMicroOps = 1;
13581ac55f4cSDimitry Andric}
13591ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteVZEROUPPER], (instrs VZEROUPPER)>;
13601ac55f4cSDimitry Andric
13611ac55f4cSDimitry Andricdef Zn4WriteVZEROALL : SchedWriteRes<[Zn4FPU0123]> {
13621ac55f4cSDimitry Andric  let Latency = 10; // FIXME: not from llvm-exegesis
13635f757f3fSDimitry Andric  let ReleaseAtCycles = [24];
13641ac55f4cSDimitry Andric  let NumMicroOps = 18;
13651ac55f4cSDimitry Andric}
13661ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteVZEROALL], (instrs VZEROALL)>;
13671ac55f4cSDimitry Andric
13681ac55f4cSDimitry Andric// AVX2.
13691ac55f4cSDimitry Andricdefm : Zn4WriteResYMMPair<WriteFShuffle256, [Zn4FPVShuf], 2, [1], 1, /*LoadUOps=*/2>; // Fp 256-bit width vector shuffles.
13701ac55f4cSDimitry Andricdefm : Zn4WriteResYMMPair<WriteFVarShuffle256, [Zn4FPVShuf], 7, [1], 2, /*LoadUOps=*/1>; // Fp 256-bit width variable shuffles.
13711ac55f4cSDimitry Andricdefm : Zn4WriteResYMMPair<WriteShuffle256, [Zn4FPVShuf], 1, [1], 1>; // 256-bit width vector shuffles.
13721ac55f4cSDimitry Andric
13731ac55f4cSDimitry Andricdef Zn4WriteVPERM2I128rr_VPERM2F128rr : SchedWriteRes<[Zn4FPVShuf]> {
13741ac55f4cSDimitry Andric  let Latency = 3;
13755f757f3fSDimitry Andric  let ReleaseAtCycles = [1];
13761ac55f4cSDimitry Andric  let NumMicroOps = 1;
13771ac55f4cSDimitry Andric}
13781ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteVPERM2I128rr_VPERM2F128rr], (instrs VPERM2I128rr, VPERM2F128rr)>;
13791ac55f4cSDimitry Andric
13801ac55f4cSDimitry Andricdef Zn4WriteVPERM2F128rm : SchedWriteRes<[Zn4AGU012, Zn4Load, Zn4FPVShuf]> {
13811ac55f4cSDimitry Andric  let Latency = !add(Znver4Model.LoadLatency, Zn4WriteVPERM2I128rr_VPERM2F128rr.Latency);
13825f757f3fSDimitry Andric  let ReleaseAtCycles = [1, 1, 1];
13831ac55f4cSDimitry Andric  let NumMicroOps = !add(Zn4WriteVPERM2I128rr_VPERM2F128rr.NumMicroOps, 0);
13841ac55f4cSDimitry Andric}
13851ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteVPERM2F128rm], (instrs VPERM2F128rm)>;
13861ac55f4cSDimitry Andric
13871ac55f4cSDimitry Andricdef Zn4WriteVPERMPSYrr : SchedWriteRes<[Zn4FPVShuf]> {
13881ac55f4cSDimitry Andric  let Latency = 7;
13895f757f3fSDimitry Andric  let ReleaseAtCycles = [1];
13901ac55f4cSDimitry Andric  let NumMicroOps = 2;
13911ac55f4cSDimitry Andric}
13921ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteVPERMPSYrr], (instrs VPERMPSYrr)>;
13931ac55f4cSDimitry Andric
13941ac55f4cSDimitry Andricdef Zn4WriteVPERMPSYrm : SchedWriteRes<[Zn4AGU012, Zn4Load, Zn4FPVShuf]> {
13951ac55f4cSDimitry Andric  let Latency = !add(Znver4Model.LoadLatency, Zn4WriteVPERMPSYrr.Latency);
13965f757f3fSDimitry Andric  let ReleaseAtCycles = [1, 1, 2];
13971ac55f4cSDimitry Andric  let NumMicroOps = !add(Zn4WriteVPERMPSYrr.NumMicroOps, 1);
13981ac55f4cSDimitry Andric}
13991ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteVPERMPSYrm], (instrs VPERMPSYrm)>;
14001ac55f4cSDimitry Andric
14011ac55f4cSDimitry Andricdef Zn4WriteVPERMYri : SchedWriteRes<[Zn4FPVShuf]> {
14021ac55f4cSDimitry Andric  let Latency = 6;
14035f757f3fSDimitry Andric  let ReleaseAtCycles = [1];
14041ac55f4cSDimitry Andric  let NumMicroOps = 2;
14051ac55f4cSDimitry Andric}
14061ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteVPERMYri], (instrs VPERMPDYri, VPERMQYri)>;
14071ac55f4cSDimitry Andric
14081ac55f4cSDimitry Andricdef Zn4WriteVPERMPDYmi : SchedWriteRes<[Zn4AGU012, Zn4Load, Zn4FPVShuf]> {
14091ac55f4cSDimitry Andric  let Latency = !add(Znver4Model.LoadLatency, Zn4WriteVPERMYri.Latency);
14105f757f3fSDimitry Andric  let ReleaseAtCycles = [1, 1, 2];
14111ac55f4cSDimitry Andric  let NumMicroOps = !add(Zn4WriteVPERMYri.NumMicroOps, 1);
14121ac55f4cSDimitry Andric}
14131ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteVPERMPDYmi], (instrs VPERMPDYmi)>;
14141ac55f4cSDimitry Andric
14151ac55f4cSDimitry Andricdef Zn4WriteVPERMDYrr : SchedWriteRes<[Zn4FPVShuf]> {
14161ac55f4cSDimitry Andric  let Latency = 5;
14175f757f3fSDimitry Andric  let ReleaseAtCycles = [1];
14181ac55f4cSDimitry Andric  let NumMicroOps = 2;
14191ac55f4cSDimitry Andric}
14201ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteVPERMDYrr], (instrs VPERMDYrr)>;
14211ac55f4cSDimitry Andric
14221ac55f4cSDimitry Andricdef Zn4WriteVPERMYm : SchedWriteRes<[Zn4AGU012, Zn4Load, Zn4FPVShuf]> {
14231ac55f4cSDimitry Andric  let Latency = !add(Znver4Model.LoadLatency, Zn4WriteVPERMDYrr.Latency);
14245f757f3fSDimitry Andric  let ReleaseAtCycles = [1, 1, 2];
14251ac55f4cSDimitry Andric  let NumMicroOps = !add(Zn4WriteVPERMDYrr.NumMicroOps, 0);
14261ac55f4cSDimitry Andric}
14271ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteVPERMYm], (instrs VPERMQYmi, VPERMDYrm)>;
14281ac55f4cSDimitry Andric
14291ac55f4cSDimitry Andricdefm : Zn4WriteResYMMPair<WriteVPMOV256, [Zn4FPVShuf01], 4, [3], 2, /*LoadUOps=*/-1>; // 256-bit width packed vector width-changing move.
14301ac55f4cSDimitry Andricdefm : Zn4WriteResYMMPair<WriteVarShuffle256, [Zn4FPVShuf01], 1, [1], 2>; // 256-bit width vector variable shuffles.
14311ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteVarVecShift, [Zn4FPVShift01], 1, [1], 1>; // Variable vector shifts.
14321ac55f4cSDimitry Andricdefm : Zn4WriteResYMMPair<WriteVarVecShiftY, [Zn4FPVShift01], 1, [1], 1>; // Variable vector shifts (YMM).
14331ac55f4cSDimitry Andricdefm : Zn4WriteResZMMPair<WriteVarVecShiftZ, [Zn4FPVShift01], 1, [2], 2>; // Variable vector shifts (ZMM).
14341ac55f4cSDimitry Andric
14351ac55f4cSDimitry Andric// Old microcoded instructions that nobody use.
14361ac55f4cSDimitry Andricdefm : Zn4WriteResInt<WriteMicrocoded, [Zn4ALU0123], 100, [100], 100>;
14371ac55f4cSDimitry Andric
14381ac55f4cSDimitry Andric// Fence instructions.
14391ac55f4cSDimitry Andricdefm : Zn4WriteResInt<WriteFence, [Zn4ALU0123], 1, [100], 1>;
14401ac55f4cSDimitry Andric
14411ac55f4cSDimitry Andricdef Zn4WriteLFENCE : SchedWriteRes<[Zn4LSU]> {
14421ac55f4cSDimitry Andric  let Latency = 1;
14435f757f3fSDimitry Andric  let ReleaseAtCycles = [30];
14441ac55f4cSDimitry Andric  let NumMicroOps = 1;
14451ac55f4cSDimitry Andric}
14461ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteLFENCE], (instrs LFENCE)>;
14471ac55f4cSDimitry Andric
14481ac55f4cSDimitry Andricdef Zn4WriteSFENCE : SchedWriteRes<[Zn4LSU]> {
14491ac55f4cSDimitry Andric  let Latency = 1;
14505f757f3fSDimitry Andric  let ReleaseAtCycles = [1];
14511ac55f4cSDimitry Andric  let NumMicroOps = 1;
14521ac55f4cSDimitry Andric}
14531ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteSFENCE], (instrs SFENCE)>;
14541ac55f4cSDimitry Andric
14551ac55f4cSDimitry Andric// Nop, not very useful expect it provides a model for nops!
14561ac55f4cSDimitry Andricdefm : Zn4WriteResInt<WriteNop, [Zn4ALU0123], 0, [1], 1>; // FIXME: latency not from llvm-exegesis
14571ac55f4cSDimitry Andric
14581ac55f4cSDimitry Andric
14591ac55f4cSDimitry Andric///////////////////////////////////////////////////////////////////////////////
14601ac55f4cSDimitry Andric// Zero Cycle Move
14611ac55f4cSDimitry Andric///////////////////////////////////////////////////////////////////////////////
14621ac55f4cSDimitry Andric
14631ac55f4cSDimitry Andricdef Zn4WriteZeroLatency : SchedWriteRes<[]> {
14641ac55f4cSDimitry Andric  let Latency = 0;
14655f757f3fSDimitry Andric  let ReleaseAtCycles = [];
14661ac55f4cSDimitry Andric  let NumMicroOps = 1;
14671ac55f4cSDimitry Andric}
14681ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteZeroLatency], (instrs MOV32rr, MOV32rr_REV,
14691ac55f4cSDimitry Andric                                               MOV64rr, MOV64rr_REV,
14701ac55f4cSDimitry Andric                                               MOVSX32rr32)>;
14711ac55f4cSDimitry Andric
14721ac55f4cSDimitry Andricdef Zn4WriteSwapRenameable : SchedWriteRes<[]> {
14731ac55f4cSDimitry Andric  let Latency = 0;
14745f757f3fSDimitry Andric  let ReleaseAtCycles = [];
14751ac55f4cSDimitry Andric  let NumMicroOps = 2;
14761ac55f4cSDimitry Andric}
14771ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteSwapRenameable], (instrs XCHG32rr, XCHG32ar,
14781ac55f4cSDimitry Andric                                               XCHG64rr, XCHG64ar)>;
14791ac55f4cSDimitry Andric
14801ac55f4cSDimitry Andricdefm : Zn4WriteResInt<WriteXCHG, [Zn4ALU0123], 0, [8], 2>;        // Compare+Exchange - TODO RMW support.
14811ac55f4cSDimitry Andric
14821ac55f4cSDimitry Andricdefm : Zn4WriteResXMM<WriteFMoveX, [], 0, [], 1>;
14831ac55f4cSDimitry Andricdefm : Zn4WriteResYMM<WriteFMoveY, [], 0, [], 1>;
14841ac55f4cSDimitry Andricdefm : Zn4WriteResYMM<WriteFMoveZ, [], 0, [], 1>;
14851ac55f4cSDimitry Andric
14861ac55f4cSDimitry Andricdefm : Zn4WriteResXMM<WriteVecMove, [Zn4FPFMisc0123], 1, [1], 1>; // MMX
14871ac55f4cSDimitry Andricdefm : Zn4WriteResXMM<WriteVecMoveX, [], 0, [], 1>;
14881ac55f4cSDimitry Andricdefm : Zn4WriteResYMM<WriteVecMoveY, [], 0, [], 1>;
14891ac55f4cSDimitry Andricdefm : Zn4WriteResYMM<WriteVecMoveZ, [], 0, [], 1>;
14901ac55f4cSDimitry Andric
14911ac55f4cSDimitry Andricdef : IsOptimizableRegisterMove<[
14921ac55f4cSDimitry Andric  InstructionEquivalenceClass<[
14931ac55f4cSDimitry Andric    // GPR variants.
14941ac55f4cSDimitry Andric    MOV32rr, MOV32rr_REV,
14951ac55f4cSDimitry Andric    MOV64rr, MOV64rr_REV,
14961ac55f4cSDimitry Andric    MOVSX32rr32,
14971ac55f4cSDimitry Andric    XCHG32rr, XCHG32ar,
14981ac55f4cSDimitry Andric    XCHG64rr, XCHG64ar,
14991ac55f4cSDimitry Andric
15001ac55f4cSDimitry Andric    // MMX variants.
15011ac55f4cSDimitry Andric    // MMX moves are *NOT* eliminated.
15021ac55f4cSDimitry Andric
15031ac55f4cSDimitry Andric    // SSE variants.
15041ac55f4cSDimitry Andric    MOVAPSrr, MOVAPSrr_REV,
15051ac55f4cSDimitry Andric    MOVUPSrr, MOVUPSrr_REV,
15061ac55f4cSDimitry Andric    MOVAPDrr, MOVAPDrr_REV,
15071ac55f4cSDimitry Andric    MOVUPDrr, MOVUPDrr_REV,
15081ac55f4cSDimitry Andric    MOVDQArr, MOVDQArr_REV,
15091ac55f4cSDimitry Andric    MOVDQUrr, MOVDQUrr_REV,
15101ac55f4cSDimitry Andric
15111ac55f4cSDimitry Andric    // AVX variants.
15121ac55f4cSDimitry Andric    VMOVAPSrr, VMOVAPSrr_REV,
15131ac55f4cSDimitry Andric    VMOVUPSrr, VMOVUPSrr_REV,
15141ac55f4cSDimitry Andric    VMOVAPDrr, VMOVAPDrr_REV,
15151ac55f4cSDimitry Andric    VMOVUPDrr, VMOVUPDrr_REV,
15161ac55f4cSDimitry Andric    VMOVDQArr, VMOVDQArr_REV,
15171ac55f4cSDimitry Andric    VMOVDQUrr, VMOVDQUrr_REV,
15181ac55f4cSDimitry Andric
15191ac55f4cSDimitry Andric    // AVX YMM variants.
15201ac55f4cSDimitry Andric    VMOVAPSYrr, VMOVAPSYrr_REV,
15211ac55f4cSDimitry Andric    VMOVUPSYrr, VMOVUPSYrr_REV,
15221ac55f4cSDimitry Andric    VMOVAPDYrr, VMOVAPDYrr_REV,
15231ac55f4cSDimitry Andric    VMOVUPDYrr, VMOVUPDYrr_REV,
15241ac55f4cSDimitry Andric    VMOVDQAYrr, VMOVDQAYrr_REV,
15251ac55f4cSDimitry Andric    VMOVDQUYrr, VMOVDQUYrr_REV,
15261ac55f4cSDimitry Andric  ], TruePred >
15271ac55f4cSDimitry Andric]>;
15281ac55f4cSDimitry Andric
15291ac55f4cSDimitry Andric// FIXUP and RANGE Instructions
15301ac55f4cSDimitry Andricdef Zn4WriteVFIXUPIMMPDZrr_VRANGESDrr : SchedWriteRes<[Zn4FPFMisc01]> {
15311ac55f4cSDimitry Andric  let Latency = 2;
15325f757f3fSDimitry Andric  let ReleaseAtCycles = [2];
15331ac55f4cSDimitry Andric  let NumMicroOps = 1;
15341ac55f4cSDimitry Andric}
15351ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteVFIXUPIMMPDZrr_VRANGESDrr], (instregex
15361ac55f4cSDimitry Andric	"VFIXUPIMM(S|P)(S|D)(Z|Z128|Z256?)rrik", "VFIXUPIMM(S|P)(S|D)(Z?|Z128?|Z256?)rrikz",
15371ac55f4cSDimitry Andric        "VFIXUPIMM(S|P)(S|D)(Z128|Z256?)rri",  "VRANGE(S|P)(S|D)(Z?|Z128?|Z256?)rri(b?)",
15381ac55f4cSDimitry Andric	"VRANGE(S|P)(S|D)(Z|Z128|Z256?)rri(b?)k","VRANGE(S|P)(S|D)(Z?|Z128?|Z256?)rri(b?)kz"
15391ac55f4cSDimitry Andric	)>;
15401ac55f4cSDimitry Andric
15411ac55f4cSDimitry Andric// SCALE & REDUCE instructions
15421ac55f4cSDimitry Andricdef Zn4WriteSCALErr: SchedWriteRes<[Zn4FPFMisc23]> {
15431ac55f4cSDimitry Andric  let Latency = 6;
15445f757f3fSDimitry Andric  let ReleaseAtCycles = [6];
15451ac55f4cSDimitry Andric  let NumMicroOps = 2;
15461ac55f4cSDimitry Andric}
15471ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteSCALErr], (instregex
15481ac55f4cSDimitry Andric        "V(SCALEF|REDUCE)(S|P)(S|D)(Z?|Z128?|Z256?)(rr|rrb|rrkz|rrik|rrikz|rri)(_Int?|_Intkz?)",
15491ac55f4cSDimitry Andric        "(V?)REDUCE(PD|PS|SD|SS)(Z?|Z128?)(rri|rrikz|rrib)"
15501ac55f4cSDimitry Andric	)>;
15511ac55f4cSDimitry Andric
15521ac55f4cSDimitry Andric//BF16PS Instructions
15531ac55f4cSDimitry Andricdef Zn4WriteBF16: SchedWriteRes<[Zn4FPFMisc23]> {
15541ac55f4cSDimitry Andric  let Latency = 6;
15555f757f3fSDimitry Andric  let ReleaseAtCycles = [6];
15561ac55f4cSDimitry Andric  let NumMicroOps = 2;
15571ac55f4cSDimitry Andric}
15581ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteBF16], (instregex
15591ac55f4cSDimitry Andric        "(V?)DPBF16PS(Z?|Z128?|Z256?)(r|rk|rkz)"
15601ac55f4cSDimitry Andric	)>;
15611ac55f4cSDimitry Andric
15621ac55f4cSDimitry Andric// BUSD and VPMADD Instructions
15631ac55f4cSDimitry Andricdef Zn4WriteBUSDr_VPMADDr: SchedWriteRes<[Zn4FPFMisc01]> {
15641ac55f4cSDimitry Andric  let Latency = 4;
15655f757f3fSDimitry Andric  let ReleaseAtCycles = [4];
15661ac55f4cSDimitry Andric  let NumMicroOps = 1;
15671ac55f4cSDimitry Andric}
15681ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteBUSDr_VPMADDr], (instregex
15691ac55f4cSDimitry Andric	"VPDP(BU|WS)(S|P)(S|D|DS)(Z|Z128|Z256)(r|rk|rkz)",
15701ac55f4cSDimitry Andric        "VPMADD52(H|L)UQ(Z|Z128|Z256)(r|rk|rkz)"
15711ac55f4cSDimitry Andric	)>;
15721ac55f4cSDimitry Andric
15731ac55f4cSDimitry Andric// SHIFT instructions
15741ac55f4cSDimitry Andricdef Zn4WriteSHIFTrr: SchedWriteRes<[Zn4FPFMisc01]> {
15751ac55f4cSDimitry Andric  let Latency = 2;
15765f757f3fSDimitry Andric  let ReleaseAtCycles = [2];
15771ac55f4cSDimitry Andric  let NumMicroOps = 1;
15781ac55f4cSDimitry Andric}
15791ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteSHIFTrr], (instregex
15801ac55f4cSDimitry Andric        "VP(LZCNT|SHLD|SHRD?)(D|Q|W|VD|VQ|VW?)(Z?|Z128?|Z256?)(rr|rk|rrk|rrkz|rri|rrik|rrikz)",
15811ac55f4cSDimitry Andric        "(V?)P(SLL|SRL|SRA)(D|Q|W|DQ)(Y?|Z?|Z128?|Z256?)(rr|rrk|rrkz)",
15821ac55f4cSDimitry Andric        "(V?)P(SLL|SRL|SRA)DQYri",
15831ac55f4cSDimitry Andric        "(V?)P(SLL|SRL)DQ(Z?|Z256?)ri",
15841ac55f4cSDimitry Andric        "(V?)P(SHUFB)(Y|Z|Z128|Z256?)(rr|rrk|rrkz)",
15851ac55f4cSDimitry Andric        "(V?)P(ROL|ROR)(D|Q|VD|VQ)(Z?|Z128?|Z256?)(rr|rrk|rrkz)",
15861ac55f4cSDimitry Andric        "(V?)P(ROL|ROR)(D|Q|VD|VQ)(Z256?)(ri|rik|rikz)",
15871ac55f4cSDimitry Andric        "(V?)P(ROL|ROR)(D|Q)(Z?|Z128?)(ri|rik|rikz)",
15881ac55f4cSDimitry Andric	"VPSHUFBITQMBZ128rr", "VFMSUB231SSZr_Intkz"
15891ac55f4cSDimitry Andric	)>;
15901ac55f4cSDimitry Andric
15911ac55f4cSDimitry Andricdef Zn4WriteSHIFTri: SchedWriteRes<[Zn4FPFMisc01]> {
15921ac55f4cSDimitry Andric  let Latency = 1;
15935f757f3fSDimitry Andric  let ReleaseAtCycles = [1];
15941ac55f4cSDimitry Andric  let NumMicroOps = 1;
15951ac55f4cSDimitry Andric}
15961ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteSHIFTri], (instregex
15971ac55f4cSDimitry Andric        "VP(SLL|SRL|SRA)(D|Q|W)(Z|Z128|Z256?)(ri|rik|rikz)"
15981ac55f4cSDimitry Andric	)>;
15991ac55f4cSDimitry Andric
16001ac55f4cSDimitry Andric// ALIGN Instructions
16011ac55f4cSDimitry Andricdef Zn4WriteALIGN: SchedWriteRes<[Zn4FPFMisc12]> {
16021ac55f4cSDimitry Andric  let Latency = 2;
16035f757f3fSDimitry Andric  let ReleaseAtCycles = [2];
16041ac55f4cSDimitry Andric  let NumMicroOps = 1;
16051ac55f4cSDimitry Andric}
16061ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteALIGN], (instregex
16071ac55f4cSDimitry Andric        "(V?)PALIGNR(Z?|Z128?|Z256?)(rri|rrik|rrikz)"
16081ac55f4cSDimitry Andric	)>;
16091ac55f4cSDimitry Andric
16101ac55f4cSDimitry Andric//PACK Instructions
16111ac55f4cSDimitry Andricdef Zn4WritePACK: SchedWriteRes<[Zn4FPFMisc12]> {
16121ac55f4cSDimitry Andric  let Latency = 2;
16135f757f3fSDimitry Andric  let ReleaseAtCycles = [2];
16141ac55f4cSDimitry Andric  let NumMicroOps = 1;
16151ac55f4cSDimitry Andric}
16161ac55f4cSDimitry Andricdef : InstRW<[Zn4WritePACK], (instregex
16171ac55f4cSDimitry Andric        "(V?)PACK(SS|US)(DW|WB)(Z?|Z128?|Z256?)(rr|rrk|rrkz)"
16181ac55f4cSDimitry Andric	)>;
16191ac55f4cSDimitry Andric
16201ac55f4cSDimitry Andric// MAX and MIN Instructions
16211ac55f4cSDimitry Andricdef Zn4WriteFCmp64: SchedWriteRes<[Zn4FPFMisc01]> {
16221ac55f4cSDimitry Andric  let Latency = 2;
16235f757f3fSDimitry Andric  let ReleaseAtCycles = [2];
16241ac55f4cSDimitry Andric  let NumMicroOps = 1;
16251ac55f4cSDimitry Andric}
16261ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteFCmp64], (instregex
16271ac55f4cSDimitry Andric        "(V?)CMP(S|P)(S|D)(rr|rri|rr_Int)",
16281ac55f4cSDimitry Andric        "(V?|VP?)(MAX|MIN|MINC|MAXC)(S|P|U)(S|D|Q)(Z?|Z128?|Z256?)(rr|rri|rrk|rrkz)(_Int?)",
16291ac55f4cSDimitry Andric        "VP(MAX|MIN)(SQ|UQ)(Z|Z128|Z256)(rr|rrk|rrkz)",
16301ac55f4cSDimitry Andric        "(V?)(MAX|MAXC|MIN|MINC)PD(Z|Z128|Z256?)(rr|rrk|rrkz)"
16311ac55f4cSDimitry Andric	)>;
16321ac55f4cSDimitry Andric
16331ac55f4cSDimitry Andric// MOV Instructions
1634*0fca6ea1SDimitry Andricdef Zn4MOVDUPZ: SchedWriteRes<[Zn4FPFMisc12]> {
16351ac55f4cSDimitry Andric  let Latency = 2;
16365f757f3fSDimitry Andric  let ReleaseAtCycles = [2];
16371ac55f4cSDimitry Andric  let NumMicroOps = 1;
16381ac55f4cSDimitry Andric}
1639*0fca6ea1SDimitry Andricdef : InstRW<[Zn4MOVDUPZ], (instregex
1640*0fca6ea1SDimitry Andric        "(V?)VMOVDDUP(Z|Z128|Z256)(rr|rrk|rrkz)"
1641*0fca6ea1SDimitry Andric	)>;
1642*0fca6ea1SDimitry Andric
1643*0fca6ea1SDimitry Andricdef Zn4MOVS: SchedWriteRes<[Zn4FPFMisc12]> {
1644*0fca6ea1SDimitry Andric  let Latency = 2;
1645*0fca6ea1SDimitry Andric  let ReleaseAtCycles = [1];
1646*0fca6ea1SDimitry Andric  let NumMicroOps = 1;
1647*0fca6ea1SDimitry Andric}
16481ac55f4cSDimitry Andricdef : InstRW<[Zn4MOVS], (instregex
16491ac55f4cSDimitry Andric        "(V?)PMOV(SX|ZX)(BD|BQ|BW|WD|WQ|DQ)(Z128?|Z256?)(rr|rrk|rrkz)",
16501ac55f4cSDimitry Andric        "(V?)PMOV(SX|QD|UZ|ZX)(BD|BQ|BW?)(Y|Z128?)(rr|rrk|rrkz)",
16511ac55f4cSDimitry Andric        "(V?)PMOV(SX|US|ZX)(DQ|WD|QW|WQ?)(Y|Z128?)(rr|rrk|rrkz)",
1652*0fca6ea1SDimitry Andric        "VPMOV(DB|DW|QB|QD|QW|SDB|SDW|SQB|SQD|SQW|SWB|USDB|USDW|USQB|USQD|USWB|WB)(Z128?|Z256?)(rr|rrk|rrkz)"
16531ac55f4cSDimitry Andric	)>;
16541ac55f4cSDimitry Andric
16551ac55f4cSDimitry Andricdef Zn4MOVSZ: SchedWriteRes<[Zn4FPFMisc12]> {
16561ac55f4cSDimitry Andric  let Latency = 4;
16575f757f3fSDimitry Andric  let ReleaseAtCycles = [4];
16581ac55f4cSDimitry Andric  let NumMicroOps = 1;
16591ac55f4cSDimitry Andric}
16601ac55f4cSDimitry Andricdef : InstRW<[Zn4MOVSZ], (instregex
16611ac55f4cSDimitry Andric        "(V?)PMOV(SX|ZX)(BD|BQ|BW|WD|WQ|DQ)(Z?)(rr|rrk|rrkz)"
16621ac55f4cSDimitry Andric	)>;
16631ac55f4cSDimitry Andric
16641ac55f4cSDimitry Andricdef Zn4MOVSrr: SchedWriteRes<[Zn4FPFMisc12]> {
16651ac55f4cSDimitry Andric  let Latency = 5;
16665f757f3fSDimitry Andric  let ReleaseAtCycles = [5];
16671ac55f4cSDimitry Andric  let NumMicroOps = 1;
16681ac55f4cSDimitry Andric}
16691ac55f4cSDimitry Andricdef : InstRW<[Zn4MOVSrr], (instregex
16701ac55f4cSDimitry Andric        "(V?)PMOV(DB|QB|QW|SDB|SQB|SQW|USDB|USQB|USQW)(Z?)(rr|rrk|rrkz)"
16711ac55f4cSDimitry Andric	)>;
16721ac55f4cSDimitry Andric
16731ac55f4cSDimitry Andric
16741ac55f4cSDimitry Andric//VPTEST Instructions
16751ac55f4cSDimitry Andricdef Zn4VPTESTZ128: SchedWriteRes<[Zn4FPFMisc01]> {
16761ac55f4cSDimitry Andric  let Latency = 3;
16775f757f3fSDimitry Andric  let ReleaseAtCycles = [3];
16781ac55f4cSDimitry Andric  let NumMicroOps = 1;
16791ac55f4cSDimitry Andric}
16801ac55f4cSDimitry Andricdef : InstRW<[Zn4VPTESTZ128], (instregex
16811ac55f4cSDimitry Andric        "(V?)PTEST(N?)(MB|MD|MQ|MW)(Z128?)(rrk)"
16821ac55f4cSDimitry Andric	)>;
16831ac55f4cSDimitry Andric
16841ac55f4cSDimitry Andricdef Zn4VPTESTZ256: SchedWriteRes<[Zn4FPFMisc01]> {
16851ac55f4cSDimitry Andric  let Latency = 4;
16865f757f3fSDimitry Andric  let ReleaseAtCycles = [4];
16871ac55f4cSDimitry Andric  let NumMicroOps = 1;
16881ac55f4cSDimitry Andric}
16891ac55f4cSDimitry Andricdef : InstRW<[Zn4VPTESTZ256], (instregex
16901ac55f4cSDimitry Andric        "(V?)PTEST(N?)(MB|MD|MQ|MW)(Z256?)(rr|rrk)"
16911ac55f4cSDimitry Andric	)>;
16921ac55f4cSDimitry Andric
16931ac55f4cSDimitry Andricdef Zn4VPTESTZ: SchedWriteRes<[Zn4FPFMisc01]> {
16941ac55f4cSDimitry Andric  let Latency = 5;
16955f757f3fSDimitry Andric  let ReleaseAtCycles = [5];
16961ac55f4cSDimitry Andric  let NumMicroOps = 1;
16971ac55f4cSDimitry Andric}
16981ac55f4cSDimitry Andricdef : InstRW<[Zn4VPTESTZ], (instregex
16991ac55f4cSDimitry Andric        "(V?)PTEST(N?)(MB|MD|MQ|MW)(Z?)(rrk)"
17001ac55f4cSDimitry Andric	)>;
17011ac55f4cSDimitry Andric
17021ac55f4cSDimitry Andric// CONFLICT Instructions
17031ac55f4cSDimitry Andricdef Zn4CONFLICTZ128: SchedWriteRes<[Zn4FPFMisc01]> {
17041ac55f4cSDimitry Andric  let Latency = 2;
17055f757f3fSDimitry Andric  let ReleaseAtCycles = [2];
17061ac55f4cSDimitry Andric  let NumMicroOps = 1;
17071ac55f4cSDimitry Andric}
17081ac55f4cSDimitry Andricdef : InstRW<[Zn4CONFLICTZ128], (instregex
17091ac55f4cSDimitry Andric        "VPCONFLICT(D|Q)(Z128)(rr|rrk|rrkz)"
17101ac55f4cSDimitry Andric	)>;
17111ac55f4cSDimitry Andric
17121ac55f4cSDimitry Andricdef Zn4CONFLICTrr: SchedWriteRes<[Zn4FPFMisc01,Zn4FPFMisc12,Zn4FPFMisc23]> {
17131ac55f4cSDimitry Andric  let Latency = 6;
17145f757f3fSDimitry Andric  let ReleaseAtCycles = [2,2,2];
17151ac55f4cSDimitry Andric  let NumMicroOps = 4;
17161ac55f4cSDimitry Andric}
17171ac55f4cSDimitry Andricdef : InstRW<[Zn4CONFLICTrr], (instregex
17181ac55f4cSDimitry Andric        "VPCONFLICT(D|Q)(Z|Z256)(rr|rrkz)"
17191ac55f4cSDimitry Andric	)>;
17201ac55f4cSDimitry Andric
17211ac55f4cSDimitry Andric// RSQRT Instructions
17221ac55f4cSDimitry Andricdef Zn4VRSQRT14PDZ256: SchedWriteRes<[Zn4FPFMisc01]> {
17231ac55f4cSDimitry Andric  let Latency = 5;
17245f757f3fSDimitry Andric  let ReleaseAtCycles = [2];
17251ac55f4cSDimitry Andric  let NumMicroOps = 1;
17261ac55f4cSDimitry Andric}
17271ac55f4cSDimitry Andricdef : InstRW<[Zn4VRSQRT14PDZ256], (instregex
17281ac55f4cSDimitry Andric        "VRSQRT14(PD|PS)(Z?|Z128?|Z256?)(r|rr|rk|rrk|rkz|rrkz)"
17291ac55f4cSDimitry Andric	)>;
17301ac55f4cSDimitry Andric
17311ac55f4cSDimitry Andric
17321ac55f4cSDimitry Andric// PERM Instructions
17331ac55f4cSDimitry Andricdef Zn4PERMILP: SchedWriteRes<[Zn4FPFMisc123]> {
17341ac55f4cSDimitry Andric  let Latency = 2;
17355f757f3fSDimitry Andric  let ReleaseAtCycles = [2];
17361ac55f4cSDimitry Andric  let NumMicroOps = 1;
17371ac55f4cSDimitry Andric}
17381ac55f4cSDimitry Andricdef : InstRW<[Zn4PERMILP], (instregex
17391ac55f4cSDimitry Andric        "VPERMILP(S|D)(Y|Z|Z128|Z256)(rr|rrk|rrkz)"
17401ac55f4cSDimitry Andric	)>;
17411ac55f4cSDimitry Andric
17421ac55f4cSDimitry Andricdef Zn4PERMIT2_128: SchedWriteRes<[Zn4FPFMisc12]> {
17431ac55f4cSDimitry Andric  let Latency = 3;
17445f757f3fSDimitry Andric  let ReleaseAtCycles = [2];
17451ac55f4cSDimitry Andric  let NumMicroOps = 1;
17461ac55f4cSDimitry Andric}
17471ac55f4cSDimitry Andricdef : InstRW<[Zn4PERMIT2_128], (instregex
17485f757f3fSDimitry Andric	"VPERM(I2|T2)(PS|PD|W)Z128(rr|rrk|rrkz)",
17495f757f3fSDimitry Andric	"VPERM(I2|T2)(B|D|Q)Z128(rr|rrk|rrkz)"
17501ac55f4cSDimitry Andric	)>;
17511ac55f4cSDimitry Andric
17521ac55f4cSDimitry Andricdef Zn4PERMIT2_128rr:SchedWriteRes<[Zn4FPFMisc12]> {
17531ac55f4cSDimitry Andric  let Latency = 2;
17545f757f3fSDimitry Andric  let ReleaseAtCycles = [2];
17551ac55f4cSDimitry Andric  let NumMicroOps = 1;
17561ac55f4cSDimitry Andric}
17571ac55f4cSDimitry Andricdef : InstRW<[Zn4PERMIT2_128rr], (instregex
17581ac55f4cSDimitry Andric	"V(P?)COMPRESS(B|W|D|Q|PD|PS|SD|SQ)Z128(rr|rrk|rrkz)",
17591ac55f4cSDimitry Andric	"VPERM(B|D|Q|W)(Z128?)(rr|rrk|rrkz)"
17601ac55f4cSDimitry Andric	)>;
17611ac55f4cSDimitry Andric
17621ac55f4cSDimitry Andricdef Zn4PERMIT2_256: SchedWriteRes<[Zn4FPFMisc12]> {
17631ac55f4cSDimitry Andric  let Latency = 4;
17645f757f3fSDimitry Andric  let ReleaseAtCycles = [2];
17651ac55f4cSDimitry Andric  let NumMicroOps = 1;
17661ac55f4cSDimitry Andric}
17671ac55f4cSDimitry Andricdef : InstRW<[Zn4PERMIT2_256], (instregex
17685f757f3fSDimitry Andric	"VPERM(I2|T2)(PS|PD|W)Z256(rr|rrk|rrkz)",
17691ac55f4cSDimitry Andric	"VPERMP(S|D)Z256(rr|rrk|rrkz)",
17701ac55f4cSDimitry Andric	"V(P?)COMPRESS(B|W|D|Q|PD|PS|SD|SQ)Z256(rr|rrk|rrkz)",
17711ac55f4cSDimitry Andric	"VPERM(B|D|Q|W)Z256(rr|rrk|rrkz)",
17725f757f3fSDimitry Andric	"VPERM(I2|Q|T2)(B|D|Q)Z256(rr|rrk|rrkz)",
17731ac55f4cSDimitry Andric	"VPEXPAND(B|W)Z256(rr|rrk|rrkz)"
17741ac55f4cSDimitry Andric	)>;
17751ac55f4cSDimitry Andric
17761ac55f4cSDimitry Andricdef Zn4PERMIT2Z: SchedWriteRes<[Zn4FPFMisc12]> {
17771ac55f4cSDimitry Andric  let Latency = 5;
17785f757f3fSDimitry Andric  let ReleaseAtCycles = [2];
17791ac55f4cSDimitry Andric  let NumMicroOps = 1;
17801ac55f4cSDimitry Andric}
17811ac55f4cSDimitry Andricdef : InstRW<[Zn4PERMIT2Z], (instregex
17825f757f3fSDimitry Andric	"VPERM(I2|T2)(PS|PD|W)Z(rr|rrk|rrkz)",
17831ac55f4cSDimitry Andric	"VPERM(B|D|W)Z(rr|rrk|rrkz)",
17845f757f3fSDimitry Andric	"VPERM(I2|Q|T2)(B|D|Q)Z(rr|rrk|rrkz)",
17851ac55f4cSDimitry Andric	"V(P?)COMPRESS(B|W|D|Q|PD|PS|SD|SQ)Z(rr|rrk|rrkz)",
17861ac55f4cSDimitry Andric	"VPEXPAND(B|W)Z(rr|rrk|rrkz)",
17871ac55f4cSDimitry Andric	"VPERMP(S|D)Z(rr|rrk|rrkz)"
17881ac55f4cSDimitry Andric	)>;
17891ac55f4cSDimitry Andric
17901ac55f4cSDimitry Andric// ALU SLOW Misc Instructions
17911ac55f4cSDimitry Andricdef Zn4VecALUZSlow: SchedWriteRes<[Zn4FPFMisc01]> {
17921ac55f4cSDimitry Andric  let Latency = 2;
17935f757f3fSDimitry Andric  let ReleaseAtCycles = [2];
17941ac55f4cSDimitry Andric  let NumMicroOps = 1;
17951ac55f4cSDimitry Andric}
17961ac55f4cSDimitry Andricdef : InstRW<[Zn4VecALUZSlow], (instrs
17971ac55f4cSDimitry Andric	VPABSBZ128rr,      VPABSBZ128rrk,  VPABSBZ128rrkz,   VPABSDZ128rr,
17981ac55f4cSDimitry Andric	VPABSDZ128rrk,     VPABSDZ128rrkz, VPABSQZ128rr,     VPABSQZ128rrk,
17991ac55f4cSDimitry Andric	VPABSQZ128rrkz,    VPABSWZ128rr,   VPABSWZ128rrk,    VPABSWZ128rrkz,
18001ac55f4cSDimitry Andric	VPADDSBZ128rr,     VPADDSBZ128rrk, VPADDSBZ128rrkz,  VPADDSWZ128rr,
18011ac55f4cSDimitry Andric	VPADDSWZ128rrk,    VPADDSWZ128rrkz,VPADDUSBZ128rr,   VPADDUSBZ128rrk,
18021ac55f4cSDimitry Andric	VPADDUSBZ128rrkz,  VPADDUSWZ128rr, VPADDUSWZ128rrk,  VPADDUSWZ128rrkz,
18031ac55f4cSDimitry Andric	VPAVGBZ128rr,      VPAVGBZ128rrk,  VPAVGBZ128rrkz,   VPAVGWZ128rr,
18041ac55f4cSDimitry Andric	VPAVGWZ128rrk,     VPAVGWZ128rrkz, VPOPCNTBZ128rr,   VPOPCNTBZ128rrk,
18051ac55f4cSDimitry Andric	VPOPCNTBZ128rrkz,  VPOPCNTDZ128rr, VPOPCNTDZ128rrk,  VPOPCNTDZ128rrkz,
18061ac55f4cSDimitry Andric	VPOPCNTQZ128rr,    VPOPCNTQZ128rrk,VPOPCNTQZ128rrkz, VPOPCNTWZ128rr,
18071ac55f4cSDimitry Andric	VPOPCNTWZ128rrk,   VPOPCNTWZ128rrkz,VPSUBSBZ128rr,   VPSUBSBZ128rrk,
18081ac55f4cSDimitry Andric	VPSUBSBZ128rrkz,   VPSUBSWZ128rr,   VPSUBSWZ128rrk,  VPSUBSWZ128rrkz,
18091ac55f4cSDimitry Andric	VPSUBUSBZ128rr,    VPSUBUSBZ128rrk, VPSUBUSBZ128rrkz,VPSUBUSWZ128rr,
18101ac55f4cSDimitry Andric	VPSUBUSWZ128rrk,   VPSUBUSWZ128rrkz
18111ac55f4cSDimitry Andric	)>;
18121ac55f4cSDimitry Andric
18131ac55f4cSDimitry Andric
18141ac55f4cSDimitry Andric///////////////////////////////////////////////////////////////////////////////
18151ac55f4cSDimitry Andric// Dependency breaking instructions.
18161ac55f4cSDimitry Andric///////////////////////////////////////////////////////////////////////////////
18171ac55f4cSDimitry Andric
18181ac55f4cSDimitry Andricdef Zn4WriteZeroIdiom : SchedWriteVariant<[
18191ac55f4cSDimitry Andric    SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [Zn4WriteZeroLatency]>,
18201ac55f4cSDimitry Andric    SchedVar<NoSchedPred,                          [WriteALU]>
18211ac55f4cSDimitry Andric]>;
18221ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteZeroIdiom], (instrs XOR32rr, XOR32rr_REV,
18231ac55f4cSDimitry Andric                                          XOR64rr, XOR64rr_REV,
18241ac55f4cSDimitry Andric                                          SUB32rr, SUB32rr_REV,
18251ac55f4cSDimitry Andric                                          SUB64rr, SUB64rr_REV)>;
18261ac55f4cSDimitry Andric
18271ac55f4cSDimitry Andricdef Zn4WriteZeroIdiomEFLAGS : SchedWriteVariant<[
18281ac55f4cSDimitry Andric    SchedVar<MCSchedPredicate<CheckSameRegOperand<0, 1>>, [Zn4WriteZeroLatency]>,
18291ac55f4cSDimitry Andric    SchedVar<NoSchedPred,                                 [WriteALU]>
18301ac55f4cSDimitry Andric]>;
18311ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteZeroIdiomEFLAGS], (instrs CMP8rr,  CMP8rr_REV,
18321ac55f4cSDimitry Andric                                                CMP16rr, CMP16rr_REV,
18331ac55f4cSDimitry Andric                                                CMP32rr, CMP32rr_REV,
18341ac55f4cSDimitry Andric                                                CMP64rr, CMP64rr_REV)>;
18351ac55f4cSDimitry Andric
18361ac55f4cSDimitry Andricdef Zn4WriteFZeroIdiom : SchedWriteVariant<[
18371ac55f4cSDimitry Andric    SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [Zn4WriteZeroLatency]>,
18381ac55f4cSDimitry Andric    SchedVar<NoSchedPred,                          [WriteFLogic]>
18391ac55f4cSDimitry Andric]>;
18401ac55f4cSDimitry Andric// NOTE: XORPSrr, XORPDrr are not zero-cycle!
18411ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteFZeroIdiom], (instrs VXORPSrr, VXORPDrr,
18421ac55f4cSDimitry Andric                                           VANDNPSrr, VANDNPDrr)>;
18431ac55f4cSDimitry Andric
18441ac55f4cSDimitry Andricdef Zn4WriteFZeroIdiomY : SchedWriteVariant<[
18451ac55f4cSDimitry Andric    SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [Zn4WriteZeroLatency]>,
18461ac55f4cSDimitry Andric    SchedVar<NoSchedPred,                          [WriteFLogicY]>
18471ac55f4cSDimitry Andric]>;
18481ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteFZeroIdiomY], (instrs VXORPSYrr, VXORPDYrr,
18491ac55f4cSDimitry Andric                                            VANDNPSYrr, VANDNPDYrr)>;
18501ac55f4cSDimitry Andric
18511ac55f4cSDimitry Andricdef Zn4WriteVZeroIdiomLogicX : SchedWriteVariant<[
18521ac55f4cSDimitry Andric    SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [Zn4WriteZeroLatency]>,
18531ac55f4cSDimitry Andric    SchedVar<NoSchedPred,                          [WriteVecLogicX]>
18541ac55f4cSDimitry Andric]>;
18551ac55f4cSDimitry Andric// NOTE: PXORrr,PANDNrr are not zero-cycle!
18561ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteVZeroIdiomLogicX], (instrs VPXORrr, VPANDNrr)>;
18571ac55f4cSDimitry Andric
18581ac55f4cSDimitry Andricdef Zn4WriteVZeroIdiomLogicY : SchedWriteVariant<[
18591ac55f4cSDimitry Andric    SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [Zn4WriteZeroLatency]>,
18601ac55f4cSDimitry Andric    SchedVar<NoSchedPred,                          [WriteVecLogicY]>
18611ac55f4cSDimitry Andric]>;
18621ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteVZeroIdiomLogicY], (instrs VPXORYrr, VPANDNYrr)>;
18631ac55f4cSDimitry Andric
18641ac55f4cSDimitry Andricdef Zn4WriteVZeroIdiomALUX : SchedWriteVariant<[
18651ac55f4cSDimitry Andric    SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [Zn4WriteZeroLatency]>,
18661ac55f4cSDimitry Andric    SchedVar<NoSchedPred,                          [WriteVecALUX]>
18671ac55f4cSDimitry Andric]>;
18681ac55f4cSDimitry Andric// NOTE: PSUBBrr, PSUBWrr, PSUBDrr, PSUBQrr,
18691ac55f4cSDimitry Andric//       PCMPGTBrr, PCMPGTWrr, PCMPGTDrr, PCMPGTQrr are not zero-cycle!
18701ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteVZeroIdiomALUX],
18711ac55f4cSDimitry Andric             (instrs VPSUBBrr, VPSUBWrr, VPSUBDrr, VPSUBQrr,
18721ac55f4cSDimitry Andric                     VPCMPGTBrr, VPCMPGTWrr, VPCMPGTDrr, VPCMPGTQrr)>;
18731ac55f4cSDimitry Andric
18741ac55f4cSDimitry Andricdef Zn4WriteVZeroIdiomALUY : SchedWriteVariant<[
18751ac55f4cSDimitry Andric    SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [Zn4WriteZeroLatency]>,
18761ac55f4cSDimitry Andric    SchedVar<NoSchedPred,                          [WriteVecALUY]>
18771ac55f4cSDimitry Andric]>;
18781ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteVZeroIdiomALUY],
18791ac55f4cSDimitry Andric             (instrs VPSUBBYrr, VPSUBWYrr, VPSUBDYrr, VPSUBQYrr,
18801ac55f4cSDimitry Andric                     VPCMPGTBYrr, VPCMPGTWYrr, VPCMPGTDYrr, VPCMPGTQYrr)>;
18811ac55f4cSDimitry Andric
18821ac55f4cSDimitry Andricdef : IsZeroIdiomFunction<[
18831ac55f4cSDimitry Andric  // GPR Zero-idioms.
18841ac55f4cSDimitry Andric  DepBreakingClass<[ XOR32rr, XOR32rr_REV,
18851ac55f4cSDimitry Andric                     XOR64rr, XOR64rr_REV,
18861ac55f4cSDimitry Andric                     SUB32rr, SUB32rr_REV,
18871ac55f4cSDimitry Andric                     SUB64rr, SUB64rr_REV ], ZeroIdiomPredicate>,
18881ac55f4cSDimitry Andric
18891ac55f4cSDimitry Andric  // SSE XMM Zero-idioms.
18901ac55f4cSDimitry Andric  DepBreakingClass<[
18911ac55f4cSDimitry Andric    // fp variants.
18921ac55f4cSDimitry Andric    XORPSrr, XORPDrr,
18931ac55f4cSDimitry Andric    ANDNPSrr, ANDNPDrr,
18941ac55f4cSDimitry Andric
18951ac55f4cSDimitry Andric    // int variants.
18961ac55f4cSDimitry Andric    PXORrr,
18971ac55f4cSDimitry Andric    PANDNrr,
18981ac55f4cSDimitry Andric    PSUBBrr, PSUBWrr, PSUBDrr, PSUBQrr,
18991ac55f4cSDimitry Andric    PSUBSBrr, PSUBSWrr,
19001ac55f4cSDimitry Andric    PSUBUSBrr, PSUBUSWrr,
19011ac55f4cSDimitry Andric    PCMPGTBrr, PCMPGTWrr, PCMPGTDrr, PCMPGTQrr
19021ac55f4cSDimitry Andric  ], ZeroIdiomPredicate>,
19031ac55f4cSDimitry Andric
19041ac55f4cSDimitry Andric  // AVX XMM Zero-idioms.
19051ac55f4cSDimitry Andric  DepBreakingClass<[
19061ac55f4cSDimitry Andric    // fp variants.
19071ac55f4cSDimitry Andric    VXORPSrr, VXORPDrr,
19081ac55f4cSDimitry Andric    VANDNPSrr, VANDNPDrr,
19091ac55f4cSDimitry Andric
19101ac55f4cSDimitry Andric    // int variants.
19111ac55f4cSDimitry Andric    VPXORrr,
19121ac55f4cSDimitry Andric    VPANDNrr,
19131ac55f4cSDimitry Andric    VPSUBBrr, VPSUBWrr, VPSUBDrr, VPSUBQrr,
19141ac55f4cSDimitry Andric    VPSUBSBrr, VPSUBSWrr,
19151ac55f4cSDimitry Andric    VPSUBUSBrr, VPSUBUSWrr,
19161ac55f4cSDimitry Andric    VPCMPGTBrr, VPCMPGTWrr, VPCMPGTDrr, VPCMPGTQrr,
19171ac55f4cSDimitry Andric  ], ZeroIdiomPredicate>,
19181ac55f4cSDimitry Andric
19191ac55f4cSDimitry Andric  // AVX YMM Zero-idioms.
19201ac55f4cSDimitry Andric  DepBreakingClass<[
19211ac55f4cSDimitry Andric    // fp variants.
19221ac55f4cSDimitry Andric    VXORPSYrr, VXORPDYrr,
19231ac55f4cSDimitry Andric    VANDNPSYrr, VANDNPDYrr,
19241ac55f4cSDimitry Andric
19251ac55f4cSDimitry Andric    // int variants.
19261ac55f4cSDimitry Andric    VPXORYrr,
19271ac55f4cSDimitry Andric    VPANDNYrr,
19281ac55f4cSDimitry Andric    VPSUBBYrr, VPSUBWYrr, VPSUBDYrr, VPSUBQYrr,
19291ac55f4cSDimitry Andric    VPSUBSBYrr, VPSUBSWYrr,
19301ac55f4cSDimitry Andric    VPSUBUSBYrr, VPSUBUSWYrr,
19311ac55f4cSDimitry Andric    VPCMPGTBYrr, VPCMPGTWYrr, VPCMPGTDYrr, VPCMPGTQYrr
19321ac55f4cSDimitry Andric  ], ZeroIdiomPredicate>,
19331ac55f4cSDimitry Andric]>;
19341ac55f4cSDimitry Andric
19351ac55f4cSDimitry Andricdef : IsDepBreakingFunction<[
19361ac55f4cSDimitry Andric  // GPR
19371ac55f4cSDimitry Andric  DepBreakingClass<[ SBB32rr, SBB32rr_REV,
19381ac55f4cSDimitry Andric                     SBB64rr, SBB64rr_REV ], ZeroIdiomPredicate>,
19391ac55f4cSDimitry Andric  DepBreakingClass<[ CMP8rr,  CMP8rr_REV,
19401ac55f4cSDimitry Andric                     CMP16rr, CMP16rr_REV,
19411ac55f4cSDimitry Andric                     CMP32rr, CMP32rr_REV,
19421ac55f4cSDimitry Andric                     CMP64rr, CMP64rr_REV ], CheckSameRegOperand<0, 1> >,
19431ac55f4cSDimitry Andric  // SSE
19441ac55f4cSDimitry Andric  DepBreakingClass<[
19451ac55f4cSDimitry Andric    PCMPEQBrr, PCMPEQWrr, PCMPEQDrr, PCMPEQQrr
19461ac55f4cSDimitry Andric  ], ZeroIdiomPredicate>,
19471ac55f4cSDimitry Andric
19481ac55f4cSDimitry Andric  // AVX XMM
19491ac55f4cSDimitry Andric  DepBreakingClass<[
19501ac55f4cSDimitry Andric    VPCMPEQBrr, VPCMPEQWrr, VPCMPEQDrr, VPCMPEQQrr
19511ac55f4cSDimitry Andric  ], ZeroIdiomPredicate>,
19521ac55f4cSDimitry Andric
19531ac55f4cSDimitry Andric  // AVX YMM
19541ac55f4cSDimitry Andric  DepBreakingClass<[
19551ac55f4cSDimitry Andric    VPCMPEQBYrr, VPCMPEQWYrr, VPCMPEQDYrr, VPCMPEQQYrr
19561ac55f4cSDimitry Andric  ], ZeroIdiomPredicate>,
19571ac55f4cSDimitry Andric]>;
19581ac55f4cSDimitry Andric
19591ac55f4cSDimitry Andric} // SchedModel
19601ac55f4cSDimitry Andric
1961