11ac55f4cSDimitry Andric//=- X86ScheduleZnver4.td - X86 Znver4 Scheduling ------------*- tablegen -*-=// 21ac55f4cSDimitry Andric// 31ac55f4cSDimitry Andric// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 41ac55f4cSDimitry Andric// See https://llvm.org/LICENSE.txt for license information. 51ac55f4cSDimitry Andric// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 61ac55f4cSDimitry Andric// 71ac55f4cSDimitry Andric//===----------------------------------------------------------------------===// 81ac55f4cSDimitry Andric// 91ac55f4cSDimitry Andric// This file defines the machine model for Znver4 to support instruction 101ac55f4cSDimitry Andric// scheduling and other instruction cost heuristics. 111ac55f4cSDimitry Andric// Based on: 121ac55f4cSDimitry Andric// * AMD Software Optimization Guide for AMD Family 19h Processors. 131ac55f4cSDimitry Andric// https://www.amd.com/system/files/TechDocs/56665.zip 141ac55f4cSDimitry Andric//===----------------------------------------------------------------------===// 151ac55f4cSDimitry Andric 161ac55f4cSDimitry Andricdef Znver4Model : SchedMachineModel { 171ac55f4cSDimitry Andric // AMD SOG 19h, 2.9.6 Dispatch 181ac55f4cSDimitry Andric // The processor may dispatch up to 6 macro ops per cycle 191ac55f4cSDimitry Andric // into the execution engine. 201ac55f4cSDimitry Andric let IssueWidth = 6; 211ac55f4cSDimitry Andric // AMD SOG 19h, 2.10.3 221ac55f4cSDimitry Andric // The retire control unit (RCU) tracks the completion status of all 231ac55f4cSDimitry Andric // outstanding operations (integer, load/store, and floating-point) and is 241ac55f4cSDimitry Andric // the final arbiter for exception processing and recovery. 251ac55f4cSDimitry Andric // The unit can receive up to 6 macro ops dispatched per cycle and track up 261ac55f4cSDimitry Andric // to 320 macro ops in-flight in non-SMT mode or 160 per thread in SMT mode. 271ac55f4cSDimitry Andric let MicroOpBufferSize = 320; 281ac55f4cSDimitry Andric // AMD SOG 19h, 2.9.1 Op Cache 291ac55f4cSDimitry Andric // The op cache is organized as an associative cache with 64 sets and 8 ways. 301ac55f4cSDimitry Andric // At each set-way intersection is an entry containing up to 8 macro ops. 31*0fca6ea1SDimitry Andric // The maximum capacity of the op cache is 6.75K ops. 32*0fca6ea1SDimitry Andric // Assuming a maximum dispatch of 9 ops/cy and a mispredict cost of 12cy from 33*0fca6ea1SDimitry Andric // the op-cache, we limit the loop buffer to 9*12 = 108 to avoid loop 34*0fca6ea1SDimitry Andric // unrolling leading to excessive filling of the op-cache from frontend. 35*0fca6ea1SDimitry Andric let LoopMicroOpBufferSize = 108; 361ac55f4cSDimitry Andric // AMD SOG 19h, 2.6.2 L1 Data Cache 371ac55f4cSDimitry Andric // The L1 data cache has a 4- or 5- cycle integer load-to-use latency. 381ac55f4cSDimitry Andric // AMD SOG 19h, 2.12 L1 Data Cache 391ac55f4cSDimitry Andric // The AGU and LS pipelines are optimized for simple address generation modes. 401ac55f4cSDimitry Andric // <...> and can achieve 4-cycle load-to-use integer load latency. 411ac55f4cSDimitry Andric let LoadLatency = 4; 421ac55f4cSDimitry Andric // AMD SOG 19h, 2.12 L1 Data Cache 431ac55f4cSDimitry Andric // The AGU and LS pipelines are optimized for simple address generation modes. 441ac55f4cSDimitry Andric // <...> and can achieve <...> 7-cycle load-to-use FP load latency. 451ac55f4cSDimitry Andric int VecLoadLatency = 7; 461ac55f4cSDimitry Andric // Latency of a simple store operation. 471ac55f4cSDimitry Andric int StoreLatency = 1; 481ac55f4cSDimitry Andric // FIXME: 491ac55f4cSDimitry Andric let HighLatency = 25; // FIXME: any better choice? 501ac55f4cSDimitry Andric // AMD SOG 19h, 2.8 Optimizing Branching 511ac55f4cSDimitry Andric // The branch misprediction penalty is in the range from 11 to 18 cycles, 521ac55f4cSDimitry Andric // <...>. The common case penalty is 13 cycles. 531ac55f4cSDimitry Andric let MispredictPenalty = 13; 541ac55f4cSDimitry Andric 551ac55f4cSDimitry Andric let PostRAScheduler = 1; // Enable Post RegAlloc Scheduler pass. 561ac55f4cSDimitry Andric 571ac55f4cSDimitry Andric let CompleteModel = 1; 581ac55f4cSDimitry Andric} 591ac55f4cSDimitry Andric 601ac55f4cSDimitry Andriclet SchedModel = Znver4Model in { 611ac55f4cSDimitry Andric 621ac55f4cSDimitry Andric 631ac55f4cSDimitry Andric//===----------------------------------------------------------------------===// 641ac55f4cSDimitry Andric// RCU 651ac55f4cSDimitry Andric//===----------------------------------------------------------------------===// 661ac55f4cSDimitry Andric 671ac55f4cSDimitry Andric// AMD SOG 19h, 2.10.3 Retire Control Unit 681ac55f4cSDimitry Andric// The unit can receive up to 6 macro ops dispatched per cycle and track up to 691ac55f4cSDimitry Andric// 320 macro ops in-flight in non-SMT mode or 128 per thread in SMT mode. <...> 701ac55f4cSDimitry Andric// The retire unit handles in-order commit of up to nine macro ops per cycle. 711ac55f4cSDimitry Andricdef Zn4RCU : RetireControlUnit<Znver4Model.MicroOpBufferSize, 9>; 721ac55f4cSDimitry Andric 731ac55f4cSDimitry Andric//===----------------------------------------------------------------------===// 741ac55f4cSDimitry Andric// Integer Execution Unit 751ac55f4cSDimitry Andric// 761ac55f4cSDimitry Andric 771ac55f4cSDimitry Andric// AMD SOG 19h, 2.4 Superscalar Organization 781ac55f4cSDimitry Andric// The processor uses four decoupled independent integer scheduler queues, 791ac55f4cSDimitry Andric// each one servicing one ALU pipeline and one or two other pipelines 801ac55f4cSDimitry Andric 811ac55f4cSDimitry Andric// 821ac55f4cSDimitry Andric// Execution pipes 831ac55f4cSDimitry Andric//===----------------------------------------------------------------------===// 841ac55f4cSDimitry Andric 851ac55f4cSDimitry Andric// AMD SOG 19h, 2.10.2 Execution Units 861ac55f4cSDimitry Andric// The processor contains 4 general purpose integer execution pipes. 871ac55f4cSDimitry Andric// Each pipe has an ALU capable of general purpose integer operations. 881ac55f4cSDimitry Andricdef Zn4ALU0 : ProcResource<1>; 891ac55f4cSDimitry Andricdef Zn4ALU1 : ProcResource<1>; 901ac55f4cSDimitry Andricdef Zn4ALU2 : ProcResource<1>; 911ac55f4cSDimitry Andricdef Zn4ALU3 : ProcResource<1>; 921ac55f4cSDimitry Andric 931ac55f4cSDimitry Andric// AMD SOG 19h, 2.10.2 Execution Units 941ac55f4cSDimitry Andric// There is also a separate branch execution unit. 951ac55f4cSDimitry Andricdef Zn4BRU1 : ProcResource<1>; 961ac55f4cSDimitry Andric 971ac55f4cSDimitry Andric// AMD SOG 19h, 2.10.2 Execution Units 981ac55f4cSDimitry Andric// There are three Address Generation Units (AGUs) for all load and store 991ac55f4cSDimitry Andric// address generation. There are also 3 store data movement units 1001ac55f4cSDimitry Andric// associated with the same schedulers as the AGUs. 1011ac55f4cSDimitry Andricdef Zn4AGU0 : ProcResource<1>; 1021ac55f4cSDimitry Andricdef Zn4AGU1 : ProcResource<1>; 1031ac55f4cSDimitry Andricdef Zn4AGU2 : ProcResource<1>; 1041ac55f4cSDimitry Andric 1051ac55f4cSDimitry Andric// 1061ac55f4cSDimitry Andric// Execution Units 1071ac55f4cSDimitry Andric//===----------------------------------------------------------------------===// 1081ac55f4cSDimitry Andric 1091ac55f4cSDimitry Andric// AMD SOG 19h, 2.10.2 Execution Units 1101ac55f4cSDimitry Andric// ALU0 additionally has divide <...> execution capability. 1111ac55f4cSDimitry Andricdefvar Zn4Divider = Zn4ALU0; 1121ac55f4cSDimitry Andric 1131ac55f4cSDimitry Andric// AMD SOG 19h, 2.10.2 Execution Units 1141ac55f4cSDimitry Andric// ALU0 additionally has <...> branch execution capability. 1151ac55f4cSDimitry Andricdefvar Zn4BRU0 = Zn4ALU0; 1161ac55f4cSDimitry Andric 1171ac55f4cSDimitry Andric// Integer Multiplication issued on ALU1. 1181ac55f4cSDimitry Andricdefvar Zn4Multiplier = Zn4ALU1; 1191ac55f4cSDimitry Andric 1201ac55f4cSDimitry Andric// Execution pipeline grouping 1211ac55f4cSDimitry Andric//===----------------------------------------------------------------------===// 1221ac55f4cSDimitry Andric 1231ac55f4cSDimitry Andric// General ALU operations 1241ac55f4cSDimitry Andricdef Zn4ALU0123 : ProcResGroup<[Zn4ALU0, Zn4ALU1, Zn4ALU2, Zn4ALU3]>; 1251ac55f4cSDimitry Andric 1261ac55f4cSDimitry Andric// General AGU operations 1271ac55f4cSDimitry Andricdef Zn4AGU012 : ProcResGroup<[Zn4AGU0, Zn4AGU1, Zn4AGU2]>; 1281ac55f4cSDimitry Andric 1291ac55f4cSDimitry Andric// Control flow: jumps, calls 1301ac55f4cSDimitry Andricdef Zn4BRU01 : ProcResGroup<[Zn4BRU0, Zn4BRU1]>; 1311ac55f4cSDimitry Andric 1321ac55f4cSDimitry Andric// Everything that isn't control flow, but still needs to access CC register, 1331ac55f4cSDimitry Andric// namely: conditional moves, SETcc. 1341ac55f4cSDimitry Andricdef Zn4ALU03 : ProcResGroup<[Zn4ALU0, Zn4ALU3]>; 1351ac55f4cSDimitry Andric 1361ac55f4cSDimitry Andric// Zn4ALU1 handles complex bit twiddling: CRC/PDEP/PEXT 1371ac55f4cSDimitry Andric 1381ac55f4cSDimitry Andric// Simple bit twiddling: bit test, shift/rotate, bit extraction 1391ac55f4cSDimitry Andricdef Zn4ALU12 : ProcResGroup<[Zn4ALU1, Zn4ALU2]>; 1401ac55f4cSDimitry Andric 1411ac55f4cSDimitry Andric 1421ac55f4cSDimitry Andric// 1431ac55f4cSDimitry Andric// Scheduling 1441ac55f4cSDimitry Andric//===----------------------------------------------------------------------===// 1451ac55f4cSDimitry Andric 1461ac55f4cSDimitry Andric// AMD SOG 19h, 2.10.3 Retire Control Unit 1471ac55f4cSDimitry Andric// The integer physical register file (PRF) consists of 224 registers. 1481ac55f4cSDimitry Andricdef Zn4IntegerPRF : RegisterFile<224, [GR64, CCR], [1, 1], [1, 0], 1491ac55f4cSDimitry Andric 6, // Max moves that can be eliminated per cycle. 1501ac55f4cSDimitry Andric 0>; // Restrict move elimination to zero regs. 1511ac55f4cSDimitry Andric 1521ac55f4cSDimitry Andric// anandtech, The integer scheduler has a 4*24 entry macro op capacity. 1531ac55f4cSDimitry Andric// AMD SOG 19h, 2.10.1 Schedulers 1541ac55f4cSDimitry Andric// The schedulers can receive up to six macro ops per cycle, with a limit of 1551ac55f4cSDimitry Andric// two per scheduler. Each scheduler can issue one micro op per cycle into 1561ac55f4cSDimitry Andric// each of its associated pipelines 1571ac55f4cSDimitry Andricdef Zn4Int : ProcResGroup<[Zn4ALU0, Zn4AGU0, Zn4BRU0, // scheduler 0 1581ac55f4cSDimitry Andric Zn4ALU1, Zn4AGU1, // scheduler 1 1591ac55f4cSDimitry Andric Zn4ALU2, Zn4AGU2, // scheduler 2 1601ac55f4cSDimitry Andric Zn4ALU3, Zn4BRU1 // scheduler 3 1611ac55f4cSDimitry Andric ]> { 1621ac55f4cSDimitry Andric let BufferSize = !mul(4, 24); 1631ac55f4cSDimitry Andric} 1641ac55f4cSDimitry Andric 1651ac55f4cSDimitry Andric 1661ac55f4cSDimitry Andric//===----------------------------------------------------------------------===// 1671ac55f4cSDimitry Andric// Floating-Point Unit 1681ac55f4cSDimitry Andric// 1691ac55f4cSDimitry Andric 1701ac55f4cSDimitry Andric// AMD SOG 19h, 2.4 Superscalar Organization 1711ac55f4cSDimitry Andric// The processor uses <...> two decoupled independent floating point schedulers 1721ac55f4cSDimitry Andric// each servicing two FP pipelines and one store or FP-to-integer pipeline. 1731ac55f4cSDimitry Andric 1741ac55f4cSDimitry Andric// 1751ac55f4cSDimitry Andric// Execution pipes 1761ac55f4cSDimitry Andric//===----------------------------------------------------------------------===// 1771ac55f4cSDimitry Andric 1781ac55f4cSDimitry Andric// AMD SOG 19h, 2.10.1 Schedulers 1791ac55f4cSDimitry Andric// <...>, and six FPU pipes. 1801ac55f4cSDimitry Andric// Agner, 22.10 Floating point execution pipes 1811ac55f4cSDimitry Andric// There are six floating point/vector execution pipes, 1821ac55f4cSDimitry Andricdef Zn4FP0 : ProcResource<1>; 1831ac55f4cSDimitry Andricdef Zn4FP1 : ProcResource<1>; 1841ac55f4cSDimitry Andricdef Zn4FP2 : ProcResource<1>; 1851ac55f4cSDimitry Andricdef Zn4FP3 : ProcResource<1>; 1861ac55f4cSDimitry Andricdef Zn4FP45 : ProcResource<2>; 1871ac55f4cSDimitry Andric 1881ac55f4cSDimitry Andric// 1891ac55f4cSDimitry Andric// Execution Units 1901ac55f4cSDimitry Andric//===----------------------------------------------------------------------===// 1911ac55f4cSDimitry Andric// AMD SOG 19h, 2.11.1 Floating Point Execution Resources 1921ac55f4cSDimitry Andric 1931ac55f4cSDimitry Andric// (v)FMUL*, (v)FMA*, Floating Point Compares, Blendv(DQ) 1941ac55f4cSDimitry Andricdefvar Zn4FPFMul0 = Zn4FP0; 1951ac55f4cSDimitry Andricdefvar Zn4FPFMul1 = Zn4FP1; 1961ac55f4cSDimitry Andric 1971ac55f4cSDimitry Andric// (v)FADD* 1981ac55f4cSDimitry Andricdefvar Zn4FPFAdd0 = Zn4FP2; 1991ac55f4cSDimitry Andricdefvar Zn4FPFAdd1 = Zn4FP3; 2001ac55f4cSDimitry Andric 2011ac55f4cSDimitry Andric// All convert operations except pack/unpack 2021ac55f4cSDimitry Andricdefvar Zn4FPFCvt0 = Zn4FP2; 2031ac55f4cSDimitry Andricdefvar Zn4FPFCvt1 = Zn4FP3; 2041ac55f4cSDimitry Andric 2051ac55f4cSDimitry Andric// All Divide and Square Root except Reciprocal Approximation 2061ac55f4cSDimitry Andric// AMD SOG 19h, 2.11.1 Floating Point Execution Resources 2071ac55f4cSDimitry Andric// FDIV unit can support 2 simultaneous operations in flight 2081ac55f4cSDimitry Andric// even though it occupies a single pipe. 2091ac55f4cSDimitry Andric// FIXME: BufferSize=2 ? 2101ac55f4cSDimitry Andricdefvar Zn4FPFDiv = Zn4FP1; 2111ac55f4cSDimitry Andric 2121ac55f4cSDimitry Andric// Moves and Logical operations on Floating Point Data Types 2131ac55f4cSDimitry Andricdefvar Zn4FPFMisc0 = Zn4FP0; 2141ac55f4cSDimitry Andricdefvar Zn4FPFMisc1 = Zn4FP1; 2151ac55f4cSDimitry Andricdefvar Zn4FPFMisc2 = Zn4FP2; 2161ac55f4cSDimitry Andricdefvar Zn4FPFMisc3 = Zn4FP3; 2171ac55f4cSDimitry Andric 2181ac55f4cSDimitry Andric// Integer Adds, Subtracts, and Compares 2191ac55f4cSDimitry Andric// Some complex VADD operations are not available in all pipes. 2201ac55f4cSDimitry Andricdefvar Zn4FPVAdd0 = Zn4FP0; 2211ac55f4cSDimitry Andricdefvar Zn4FPVAdd1 = Zn4FP1; 2221ac55f4cSDimitry Andricdefvar Zn4FPVAdd2 = Zn4FP2; 2231ac55f4cSDimitry Andricdefvar Zn4FPVAdd3 = Zn4FP3; 2241ac55f4cSDimitry Andric 2251ac55f4cSDimitry Andric// Integer Multiplies, SAD, Blendvb 2261ac55f4cSDimitry Andricdefvar Zn4FPVMul0 = Zn4FP0; 2271ac55f4cSDimitry Andricdefvar Zn4FPVMul1 = Zn4FP3; 2281ac55f4cSDimitry Andric 2291ac55f4cSDimitry Andric// Data Shuffles, Packs, Unpacks, Permute 2301ac55f4cSDimitry Andric// Some complex shuffle operations are only available in pipe1. 2311ac55f4cSDimitry Andricdefvar Zn4FPVShuf = Zn4FP1; 2321ac55f4cSDimitry Andricdefvar Zn4FPVShufAux = Zn4FP2; 2331ac55f4cSDimitry Andric 2341ac55f4cSDimitry Andric// Bit Shift Left/Right operations 2351ac55f4cSDimitry Andricdefvar Zn4FPVShift0 = Zn4FP1; 2361ac55f4cSDimitry Andricdefvar Zn4FPVShift1 = Zn4FP2; 2371ac55f4cSDimitry Andric 2381ac55f4cSDimitry Andric// Moves and Logical operations on Packed Integer Data Types 2391ac55f4cSDimitry Andricdefvar Zn4FPVMisc0 = Zn4FP0; 2401ac55f4cSDimitry Andricdefvar Zn4FPVMisc1 = Zn4FP1; 2411ac55f4cSDimitry Andricdefvar Zn4FPVMisc2 = Zn4FP2; 2421ac55f4cSDimitry Andricdefvar Zn4FPVMisc3 = Zn4FP3; 2431ac55f4cSDimitry Andric 2441ac55f4cSDimitry Andric// *AES* 2451ac55f4cSDimitry Andricdefvar Zn4FPAES0 = Zn4FP0; 2461ac55f4cSDimitry Andricdefvar Zn4FPAES1 = Zn4FP1; 2471ac55f4cSDimitry Andric 2481ac55f4cSDimitry Andric// *CLM* 2491ac55f4cSDimitry Andricdefvar Zn4FPCLM0 = Zn4FP0; 2501ac55f4cSDimitry Andricdefvar Zn4FPCLM1 = Zn4FP1; 2511ac55f4cSDimitry Andric 2521ac55f4cSDimitry Andric// Execution pipeline grouping 2531ac55f4cSDimitry Andric//===----------------------------------------------------------------------===// 2541ac55f4cSDimitry Andric 2551ac55f4cSDimitry Andric// AMD SOG 19h, 2.11 Floating-Point Unit 2561ac55f4cSDimitry Andric// Stores and floating point to general purpose register transfer 2571ac55f4cSDimitry Andric// have 2 dedicated pipelines (pipe 5 and 6). 2581ac55f4cSDimitry Andricdef Zn4FPU0123 : ProcResGroup<[Zn4FP0, Zn4FP1, Zn4FP2, Zn4FP3]>; 2591ac55f4cSDimitry Andric 2601ac55f4cSDimitry Andric// (v)FMUL*, (v)FMA*, Floating Point Compares, Blendv(DQ) 2611ac55f4cSDimitry Andricdef Zn4FPFMul01 : ProcResGroup<[Zn4FPFMul0, Zn4FPFMul1]>; 2621ac55f4cSDimitry Andric 2631ac55f4cSDimitry Andric// (v)FADD* 2641ac55f4cSDimitry Andric// Some complex VADD operations are not available in all pipes. 2651ac55f4cSDimitry Andricdef Zn4FPFAdd01 : ProcResGroup<[Zn4FPFAdd0, Zn4FPFAdd1]>; 2661ac55f4cSDimitry Andric 2671ac55f4cSDimitry Andric// All convert operations except pack/unpack 2681ac55f4cSDimitry Andricdef Zn4FPFCvt01 : ProcResGroup<[Zn4FPFCvt0, Zn4FPFCvt1]>; 2691ac55f4cSDimitry Andric 2701ac55f4cSDimitry Andric// All Divide and Square Root except Reciprocal Approximation 2711ac55f4cSDimitry Andric// def Zn4FPFDiv : ProcResGroup<[Zn4FPFDiv]>; 2721ac55f4cSDimitry Andric 2731ac55f4cSDimitry Andric// Moves and Logical operations on Floating Point Data Types 2741ac55f4cSDimitry Andricdef Zn4FPFMisc0123 : ProcResGroup<[Zn4FPFMisc0, Zn4FPFMisc1, Zn4FPFMisc2, Zn4FPFMisc3]>; 2751ac55f4cSDimitry Andric 2761ac55f4cSDimitry Andric// FIXUP and RANGE use FP01 pipelines 2771ac55f4cSDimitry Andricdef Zn4FPFMisc01 : ProcResGroup<[Zn4FPFMisc0, Zn4FPFMisc1]>; 2781ac55f4cSDimitry Andricdef Zn4FPFMisc12 : ProcResGroup<[Zn4FPFMisc1, Zn4FPFMisc2]>; 2791ac55f4cSDimitry Andric// SCALE instructions use FP23 pipelines 2801ac55f4cSDimitry Andricdef Zn4FPFMisc23 : ProcResGroup<[Zn4FPFMisc2, Zn4FPFMisc3]>; 2811ac55f4cSDimitry Andricdef Zn4FPFMisc123 : ProcResGroup<[Zn4FPFMisc1,Zn4FPFMisc2, Zn4FPFMisc3]>; 2821ac55f4cSDimitry Andric 2831ac55f4cSDimitry Andric// Loads, Stores and Move to General Register (EX) Operations 2841ac55f4cSDimitry Andric// AMD SOG 19h, 2.11 Floating-Point Unit 2851ac55f4cSDimitry Andric// Stores and floating point to general purpose register transfer 2861ac55f4cSDimitry Andric// have 2 dedicated pipelines (pipe 5 and 6). 2871ac55f4cSDimitry Andricdefvar Zn4FPLd01 = Zn4FP45; 2881ac55f4cSDimitry Andric 2891ac55f4cSDimitry Andric// AMD SOG 19h, 2.11 Floating-Point Unit 2901ac55f4cSDimitry Andric// Note that FP stores are supported on two pipelines, 2911ac55f4cSDimitry Andric// but throughput is limited to one per cycle. 2921ac55f4cSDimitry Andriclet Super = Zn4FP45 in 2931ac55f4cSDimitry Andricdef Zn4FPSt : ProcResource<1>; 2941ac55f4cSDimitry Andric 2951ac55f4cSDimitry Andric// Integer Adds, Subtracts, and Compares 2961ac55f4cSDimitry Andric// Some complex VADD operations are not available in all pipes. 2971ac55f4cSDimitry Andricdef Zn4FPVAdd0123 : ProcResGroup<[Zn4FPVAdd0, Zn4FPVAdd1, Zn4FPVAdd2, Zn4FPVAdd3]>; 2981ac55f4cSDimitry Andric 2991ac55f4cSDimitry Andricdef Zn4FPVAdd01: ProcResGroup<[Zn4FPVAdd0, Zn4FPVAdd1]>; 3001ac55f4cSDimitry Andricdef Zn4FPVAdd12: ProcResGroup<[Zn4FPVAdd1, Zn4FPVAdd2]>; 3011ac55f4cSDimitry Andric 3021ac55f4cSDimitry Andric// AVX512 Opmask pipelines 3031ac55f4cSDimitry Andricdef Zn4FPOpMask01: ProcResGroup<[Zn4FP2, Zn4FP3]>; 3041ac55f4cSDimitry Andricdef Zn4FPOpMask4: ProcResGroup<[Zn4FP45]>; 3051ac55f4cSDimitry Andric 3061ac55f4cSDimitry Andric// Integer Multiplies, SAD, Blendvb 3071ac55f4cSDimitry Andricdef Zn4FPVMul01 : ProcResGroup<[Zn4FPVMul0, Zn4FPVMul1]>; 3081ac55f4cSDimitry Andric 3091ac55f4cSDimitry Andric// Data Shuffles, Packs, Unpacks, Permute 3101ac55f4cSDimitry Andric// Some complex shuffle operations are only available in pipe1. 3111ac55f4cSDimitry Andricdef Zn4FPVShuf01 : ProcResGroup<[Zn4FPVShuf, Zn4FPVShufAux]>; 3121ac55f4cSDimitry Andric 3131ac55f4cSDimitry Andric// Bit Shift Left/Right operations 3141ac55f4cSDimitry Andricdef Zn4FPVShift01 : ProcResGroup<[Zn4FPVShift0, Zn4FPVShift1]>; 3151ac55f4cSDimitry Andric 3161ac55f4cSDimitry Andric// Moves and Logical operations on Packed Integer Data Types 3171ac55f4cSDimitry Andricdef Zn4FPVMisc0123 : ProcResGroup<[Zn4FPVMisc0, Zn4FPVMisc1, Zn4FPVMisc2, Zn4FPVMisc3]>; 3181ac55f4cSDimitry Andric 3191ac55f4cSDimitry Andric// *AES* 3201ac55f4cSDimitry Andricdef Zn4FPAES01 : ProcResGroup<[Zn4FPAES0, Zn4FPAES1]>; 3211ac55f4cSDimitry Andric 3221ac55f4cSDimitry Andric// *CLM* 3231ac55f4cSDimitry Andricdef Zn4FPCLM01 : ProcResGroup<[Zn4FPCLM0, Zn4FPCLM1]>; 3241ac55f4cSDimitry Andric 3251ac55f4cSDimitry Andric 3261ac55f4cSDimitry Andric// 3271ac55f4cSDimitry Andric// Scheduling 3281ac55f4cSDimitry Andric//===----------------------------------------------------------------------===// 3291ac55f4cSDimitry Andric 3301ac55f4cSDimitry Andric// Agner, 21.8 Register renaming and out-of-order schedulers 3311ac55f4cSDimitry Andric// The floating point register file has 192 vector registers 3321ac55f4cSDimitry Andric// of 512b each in zen4. 3331ac55f4cSDimitry Andricdef Zn4FpPRF : RegisterFile<192, [VR64, VR128, VR256, VR512], [1, 1, 1, 1], [0, 1, 1], 3341ac55f4cSDimitry Andric 6, // Max moves that can be eliminated per cycle. 3351ac55f4cSDimitry Andric 0>; // Restrict move elimination to zero regs. 3361ac55f4cSDimitry Andric 3371ac55f4cSDimitry Andric// AMD SOG 19h, 2.11 Floating-Point Unit 3381ac55f4cSDimitry Andric// The floating-point scheduler has a 2*32 entry macro op capacity. 3391ac55f4cSDimitry Andric// AMD SOG 19h, 2.11 Floating-Point Unit 3401ac55f4cSDimitry Andric// <...> the scheduler can issue 1 micro op per cycle for each pipe. 3411ac55f4cSDimitry Andric// FIXME: those are two separate schedulers, not a single big one. 3421ac55f4cSDimitry Andricdef Zn4FP : ProcResGroup<[Zn4FP0, Zn4FP2, /*Zn4FP4,*/ // scheduler 0 3431ac55f4cSDimitry Andric Zn4FP1, Zn4FP3, Zn4FP45 /*Zn4FP5*/ // scheduler 1 3441ac55f4cSDimitry Andric ]> { 3451ac55f4cSDimitry Andric let BufferSize = !mul(2, 32); 3461ac55f4cSDimitry Andric} 3471ac55f4cSDimitry Andric 3481ac55f4cSDimitry Andric// AMD SOG 19h, 2.11 Floating-Point Unit 3491ac55f4cSDimitry Andric// Macro ops can be dispatched to the 64 entry Non Scheduling Queue (NSQ) 3501ac55f4cSDimitry Andric// even if floating-point scheduler is full. 3511ac55f4cSDimitry Andric// FIXME: how to model this properly? 3521ac55f4cSDimitry Andric 3531ac55f4cSDimitry Andric 3541ac55f4cSDimitry Andric//===----------------------------------------------------------------------===// 3551ac55f4cSDimitry Andric// Load-Store Unit 3561ac55f4cSDimitry Andric// 3571ac55f4cSDimitry Andric 3581ac55f4cSDimitry Andric// AMD SOG 19h, 2.12 Load-Store Unit 3591ac55f4cSDimitry Andric// The LS unit contains three largely independent pipe-lines 3601ac55f4cSDimitry Andric// enabling the execution of three 256-bit memory operations per cycle. 3611ac55f4cSDimitry Andricdef Zn4LSU : ProcResource<3>; 3621ac55f4cSDimitry Andric 3631ac55f4cSDimitry Andric// AMD SOG 19h, 2.12 Load-Store Unit 3641ac55f4cSDimitry Andric// All three memory operations can be loads. 3651ac55f4cSDimitry Andriclet Super = Zn4LSU in 3661ac55f4cSDimitry Andricdef Zn4Load : ProcResource<3> { 3671ac55f4cSDimitry Andric // AMD SOG 19h, 2.12 Load-Store Unit 3681ac55f4cSDimitry Andric // The LS unit can process up to 72 out-of-order loads. 3691ac55f4cSDimitry Andric let BufferSize = 72; 3701ac55f4cSDimitry Andric} 3711ac55f4cSDimitry Andric 3721ac55f4cSDimitry Andricdef Zn4LoadQueue : LoadQueue<Zn4Load>; 3731ac55f4cSDimitry Andric 3741ac55f4cSDimitry Andric// AMD SOG 19h, 2.12 Load-Store Unit 3751ac55f4cSDimitry Andric// A maximum of two of the memory operations can be stores. 3761ac55f4cSDimitry Andriclet Super = Zn4LSU in 3771ac55f4cSDimitry Andricdef Zn4Store : ProcResource<2> { 3781ac55f4cSDimitry Andric // AMD SOG 19h, 2.12 Load-Store Unit 3791ac55f4cSDimitry Andric // The LS unit utilizes a 64-entry store queue (STQ). 3801ac55f4cSDimitry Andric let BufferSize = 64; 3811ac55f4cSDimitry Andric} 3821ac55f4cSDimitry Andric 3831ac55f4cSDimitry Andricdef Zn4StoreQueue : StoreQueue<Zn4Store>; 3841ac55f4cSDimitry Andric 3851ac55f4cSDimitry Andric//===----------------------------------------------------------------------===// 3861ac55f4cSDimitry Andric// Basic helper classes. 3871ac55f4cSDimitry Andric//===----------------------------------------------------------------------===// 3881ac55f4cSDimitry Andric 3891ac55f4cSDimitry Andric// Many SchedWrites are defined in pairs with and without a folded load. 3901ac55f4cSDimitry Andric// Instructions with folded loads are usually micro-fused, so they only appear 3911ac55f4cSDimitry Andric// as two micro-ops when dispatched by the schedulers. 3921ac55f4cSDimitry Andric// This multiclass defines the resource usage for variants with and without 3931ac55f4cSDimitry Andric// folded loads. 3941ac55f4cSDimitry Andric 3951ac55f4cSDimitry Andricmulticlass __Zn4WriteRes<SchedWrite SchedRW, list<ProcResourceKind> ExePorts, 3961ac55f4cSDimitry Andric int Lat = 1, list<int> Res = [], int UOps = 1> { 3971ac55f4cSDimitry Andric def : WriteRes<SchedRW, ExePorts> { 3981ac55f4cSDimitry Andric let Latency = Lat; 3995f757f3fSDimitry Andric let ReleaseAtCycles = Res; 4001ac55f4cSDimitry Andric let NumMicroOps = UOps; 4011ac55f4cSDimitry Andric } 4021ac55f4cSDimitry Andric} 4031ac55f4cSDimitry Andric 4041ac55f4cSDimitry Andricmulticlass __Zn4WriteResPair<X86FoldableSchedWrite SchedRW, 4051ac55f4cSDimitry Andric list<ProcResourceKind> ExePorts, int Lat, 4061ac55f4cSDimitry Andric list<int> Res, int UOps, int LoadLat, int LoadUOps, 4071ac55f4cSDimitry Andric ProcResourceKind AGU, int LoadRes> { 4081ac55f4cSDimitry Andric defm : __Zn4WriteRes<SchedRW, ExePorts, Lat, Res, UOps>; 4091ac55f4cSDimitry Andric 4101ac55f4cSDimitry Andric defm : __Zn4WriteRes<SchedRW.Folded, 4111ac55f4cSDimitry Andric !listconcat([AGU, Zn4Load], ExePorts), 4121ac55f4cSDimitry Andric !add(Lat, LoadLat), 4131ac55f4cSDimitry Andric !if(!and(!empty(Res), !eq(LoadRes, 1)), 4141ac55f4cSDimitry Andric [], 4151ac55f4cSDimitry Andric !listconcat([1, LoadRes], 4161ac55f4cSDimitry Andric !if(!empty(Res), 4171ac55f4cSDimitry Andric !listsplat(1, !size(ExePorts)), 4181ac55f4cSDimitry Andric Res))), 4191ac55f4cSDimitry Andric !add(UOps, LoadUOps)>; 4201ac55f4cSDimitry Andric} 4211ac55f4cSDimitry Andric 4221ac55f4cSDimitry Andric// For classes without folded loads. 4231ac55f4cSDimitry Andricmulticlass Zn4WriteResInt<SchedWrite SchedRW, 4241ac55f4cSDimitry Andric list<ProcResourceKind> ExePorts, int Lat = 1, 4251ac55f4cSDimitry Andric list<int> Res = [], int UOps = 1> { 4261ac55f4cSDimitry Andric defm : __Zn4WriteRes<SchedRW, ExePorts, Lat, Res, UOps>; 4271ac55f4cSDimitry Andric} 4281ac55f4cSDimitry Andric 4291ac55f4cSDimitry Andricmulticlass Zn4WriteResXMM<SchedWrite SchedRW, 4301ac55f4cSDimitry Andric list<ProcResourceKind> ExePorts, int Lat = 1, 4311ac55f4cSDimitry Andric list<int> Res = [], int UOps = 1> { 4321ac55f4cSDimitry Andric defm : __Zn4WriteRes<SchedRW, ExePorts, Lat, Res, UOps>; 4331ac55f4cSDimitry Andric} 4341ac55f4cSDimitry Andric 4351ac55f4cSDimitry Andricmulticlass Zn4WriteResYMM<SchedWrite SchedRW, 4361ac55f4cSDimitry Andric list<ProcResourceKind> ExePorts, int Lat = 1, 4371ac55f4cSDimitry Andric list<int> Res = [], int UOps = 1> { 4381ac55f4cSDimitry Andric defm : __Zn4WriteRes<SchedRW, ExePorts, Lat, Res, UOps>; 4391ac55f4cSDimitry Andric} 4401ac55f4cSDimitry Andric 4411ac55f4cSDimitry Andricmulticlass Zn4WriteResZMM<SchedWrite SchedRW, 4421ac55f4cSDimitry Andric list<ProcResourceKind> ExePorts, int Lat = 1, 4431ac55f4cSDimitry Andric list<int> Res = [], int UOps = 1> { 4441ac55f4cSDimitry Andric defm : __Zn4WriteRes<SchedRW, ExePorts, Lat, Res, UOps>; 4451ac55f4cSDimitry Andric} 4461ac55f4cSDimitry Andric 4471ac55f4cSDimitry Andric// For classes with folded loads. 4481ac55f4cSDimitry Andricmulticlass Zn4WriteResIntPair<X86FoldableSchedWrite SchedRW, 4491ac55f4cSDimitry Andric list<ProcResourceKind> ExePorts, int Lat = 1, 4501ac55f4cSDimitry Andric list<int> Res = [], int UOps = 1, 4511ac55f4cSDimitry Andric int LoadUOps = 0, int LoadRes = 1> { 4521ac55f4cSDimitry Andric defm : __Zn4WriteResPair<SchedRW, ExePorts, Lat, Res, UOps, 4531ac55f4cSDimitry Andric Znver4Model.LoadLatency, 4541ac55f4cSDimitry Andric LoadUOps, Zn4AGU012, LoadRes>; 4551ac55f4cSDimitry Andric} 4561ac55f4cSDimitry Andric 4571ac55f4cSDimitry Andricmulticlass Zn4WriteResXMMPair<X86FoldableSchedWrite SchedRW, 4581ac55f4cSDimitry Andric list<ProcResourceKind> ExePorts, int Lat = 1, 4591ac55f4cSDimitry Andric list<int> Res = [], int UOps = 1, 4601ac55f4cSDimitry Andric int LoadUOps = 0, int LoadRes = 1> { 4611ac55f4cSDimitry Andric defm : __Zn4WriteResPair<SchedRW, ExePorts, Lat, Res, UOps, 4621ac55f4cSDimitry Andric Znver4Model.VecLoadLatency, 4631ac55f4cSDimitry Andric LoadUOps, Zn4FPLd01, LoadRes>; 4641ac55f4cSDimitry Andric} 4651ac55f4cSDimitry Andric 4661ac55f4cSDimitry Andricmulticlass Zn4WriteResYMMPair<X86FoldableSchedWrite SchedRW, 4671ac55f4cSDimitry Andric list<ProcResourceKind> ExePorts, int Lat = 1, 4681ac55f4cSDimitry Andric list<int> Res = [], int UOps = 1, 4691ac55f4cSDimitry Andric int LoadUOps = 0, int LoadRes = 1> { 4701ac55f4cSDimitry Andric defm : __Zn4WriteResPair<SchedRW, ExePorts, Lat, Res, UOps, 4711ac55f4cSDimitry Andric Znver4Model.VecLoadLatency, 4721ac55f4cSDimitry Andric LoadUOps, Zn4FPLd01, LoadRes>; 4731ac55f4cSDimitry Andric} 4741ac55f4cSDimitry Andric 4751ac55f4cSDimitry Andricmulticlass Zn4WriteResZMMPair<X86FoldableSchedWrite SchedRW, 4761ac55f4cSDimitry Andric list<ProcResourceKind> ExePorts, int Lat = 1, 4771ac55f4cSDimitry Andric list<int> Res = [], int UOps = 2, 4781ac55f4cSDimitry Andric int LoadUOps = 0, int LoadRes = 1> { 4791ac55f4cSDimitry Andric defm : __Zn4WriteResPair<SchedRW, ExePorts, Lat, Res, UOps, 4801ac55f4cSDimitry Andric Znver4Model.VecLoadLatency, 4811ac55f4cSDimitry Andric LoadUOps, Zn4FPLd01, LoadRes>; 4821ac55f4cSDimitry Andric} 4831ac55f4cSDimitry Andric 4841ac55f4cSDimitry Andric//===----------------------------------------------------------------------===// 4851ac55f4cSDimitry Andric// Here be dragons. 4861ac55f4cSDimitry Andric//===----------------------------------------------------------------------===// 4871ac55f4cSDimitry Andric 4881ac55f4cSDimitry Andricdef : ReadAdvance<ReadAfterLd, Znver4Model.LoadLatency>; 4891ac55f4cSDimitry Andric 4901ac55f4cSDimitry Andricdef : ReadAdvance<ReadAfterVecLd, Znver4Model.VecLoadLatency>; 4911ac55f4cSDimitry Andricdef : ReadAdvance<ReadAfterVecXLd, Znver4Model.VecLoadLatency>; 4921ac55f4cSDimitry Andricdef : ReadAdvance<ReadAfterVecYLd, Znver4Model.VecLoadLatency>; 4931ac55f4cSDimitry Andric 4941ac55f4cSDimitry Andric// AMD SOG 19h, 2.11 Floating-Point Unit 4951ac55f4cSDimitry Andric// There is 1 cycle of added latency for a result to cross 4961ac55f4cSDimitry Andric// from F to I or I to F domain. 4971ac55f4cSDimitry Andricdef : ReadAdvance<ReadInt2Fpu, -1>; 4981ac55f4cSDimitry Andric 4991ac55f4cSDimitry Andric// Instructions with both a load and a store folded are modeled as a folded 5001ac55f4cSDimitry Andric// load + WriteRMW. 5011ac55f4cSDimitry Andricdefm : Zn4WriteResInt<WriteRMW, [Zn4AGU012, Zn4Store], Znver4Model.StoreLatency, [1, 1], 0>; 5021ac55f4cSDimitry Andric 5031ac55f4cSDimitry Andric// Loads, stores, and moves, not folded with other operations. 5041ac55f4cSDimitry Andricdefm : Zn4WriteResInt<WriteLoad, [Zn4AGU012, Zn4Load], !add(Znver4Model.LoadLatency, 1), [1, 1], 1>; 5051ac55f4cSDimitry Andric 5061ac55f4cSDimitry Andric// Model the effect of clobbering the read-write mask operand of the GATHER operation. 5071ac55f4cSDimitry Andric// Does not cost anything by itself, only has latency, matching that of the WriteLoad, 5081ac55f4cSDimitry Andricdefm : Zn4WriteResInt<WriteVecMaskedGatherWriteback, [], !add(Znver4Model.LoadLatency, 1), [], 0>; 5091ac55f4cSDimitry Andric 5101ac55f4cSDimitry Andricdef Zn4WriteMOVSlow : SchedWriteRes<[Zn4AGU012, Zn4Load]> { 5111ac55f4cSDimitry Andric let Latency = !add(Znver4Model.LoadLatency, 1); 5125f757f3fSDimitry Andric let ReleaseAtCycles = [3, 1]; 5131ac55f4cSDimitry Andric let NumMicroOps = 1; 5141ac55f4cSDimitry Andric} 5151ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteMOVSlow], (instrs MOV8rm, MOV8rm_NOREX, MOV16rm, MOVSX16rm16, MOVSX16rm32, MOVZX16rm16, MOVSX16rm8, MOVZX16rm8)>; 5161ac55f4cSDimitry Andric 5171ac55f4cSDimitry Andricdefm : Zn4WriteResInt<WriteStore, [Zn4AGU012, Zn4Store], Znver4Model.StoreLatency, [1, 2], 1>; 5181ac55f4cSDimitry Andricdefm : Zn4WriteResInt<WriteStoreNT, [Zn4AGU012, Zn4Store], Znver4Model.StoreLatency, [1, 2], 1>; 5191ac55f4cSDimitry Andricdefm : Zn4WriteResInt<WriteMove, [Zn4ALU0123], 1, [4], 1>; 5201ac55f4cSDimitry Andric 5211ac55f4cSDimitry Andric// Treat misc copies as a move. 5221ac55f4cSDimitry Andricdef : InstRW<[WriteMove], (instrs COPY)>; 5231ac55f4cSDimitry Andric 5241ac55f4cSDimitry Andricdef Zn4WriteMOVBE16rm : SchedWriteRes<[Zn4AGU012, Zn4Load, Zn4ALU0123]> { 5251ac55f4cSDimitry Andric let Latency = Znver4Model.LoadLatency; 5265f757f3fSDimitry Andric let ReleaseAtCycles = [1, 1, 4]; 5271ac55f4cSDimitry Andric let NumMicroOps = 1; 5281ac55f4cSDimitry Andric} 5291ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteMOVBE16rm], (instrs MOVBE16rm)>; 5301ac55f4cSDimitry Andric 5311ac55f4cSDimitry Andricdef Zn4WriteMOVBEmr : SchedWriteRes<[Zn4ALU0123, Zn4AGU012, Zn4Store]> { 5321ac55f4cSDimitry Andric let Latency = Znver4Model.StoreLatency; 5335f757f3fSDimitry Andric let ReleaseAtCycles = [4, 1, 1]; 5341ac55f4cSDimitry Andric let NumMicroOps = 2; 5351ac55f4cSDimitry Andric} 5361ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteMOVBEmr], (instrs MOVBE16mr, MOVBE32mr, MOVBE64mr)>; 5371ac55f4cSDimitry Andric 5381ac55f4cSDimitry Andric// Arithmetic. 5391ac55f4cSDimitry Andricdefm : Zn4WriteResIntPair<WriteALU, [Zn4ALU0123], 1, [1], 1>; // Simple integer ALU op. 5401ac55f4cSDimitry Andric 5411ac55f4cSDimitry Andricdef Zn4WriteALUSlow : SchedWriteRes<[Zn4ALU0123]> { 5421ac55f4cSDimitry Andric let Latency = 1; 5435f757f3fSDimitry Andric let ReleaseAtCycles = [4]; 5441ac55f4cSDimitry Andric let NumMicroOps = 1; 5451ac55f4cSDimitry Andric} 5461ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteALUSlow], (instrs ADD8i8, ADD16i16, ADD32i32, ADD64i32, 5471ac55f4cSDimitry Andric AND8i8, AND16i16, AND32i32, AND64i32, 5481ac55f4cSDimitry Andric OR8i8, OR16i16, OR32i32, OR64i32, 5491ac55f4cSDimitry Andric SUB8i8, SUB16i16, SUB32i32, SUB64i32, 5501ac55f4cSDimitry Andric XOR8i8, XOR16i16, XOR32i32, XOR64i32)>; 5511ac55f4cSDimitry Andric 5521ac55f4cSDimitry Andricdef Zn4WriteMoveExtend : SchedWriteRes<[Zn4ALU0123]> { 5531ac55f4cSDimitry Andric let Latency = 1; 5545f757f3fSDimitry Andric let ReleaseAtCycles = [4]; 5551ac55f4cSDimitry Andric let NumMicroOps = 1; 5561ac55f4cSDimitry Andric} 5571ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteMoveExtend], (instrs MOVSX16rr16, MOVSX16rr32, MOVZX16rr16, MOVSX16rr8, MOVZX16rr8)>; 5581ac55f4cSDimitry Andric 5591ac55f4cSDimitry Andricdef Zn4WriteMaterialize32bitImm: SchedWriteRes<[Zn4ALU0123]> { 5601ac55f4cSDimitry Andric let Latency = 1; 5615f757f3fSDimitry Andric let ReleaseAtCycles = [2]; 5621ac55f4cSDimitry Andric let NumMicroOps = 1; 5631ac55f4cSDimitry Andric} 5641ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteMaterialize32bitImm], (instrs MOV32ri, MOV32ri_alt, MOV64ri32)>; 5651ac55f4cSDimitry Andric 5661ac55f4cSDimitry Andricdef Zn4WritePDEP_PEXT : SchedWriteRes<[Zn4ALU1]> { 5671ac55f4cSDimitry Andric let Latency = 3; 5685f757f3fSDimitry Andric let ReleaseAtCycles = [1]; 5691ac55f4cSDimitry Andric let NumMicroOps = 1; 5701ac55f4cSDimitry Andric} 5711ac55f4cSDimitry Andricdef : InstRW<[Zn4WritePDEP_PEXT], (instrs PDEP32rr, PDEP64rr, 5721ac55f4cSDimitry Andric PEXT32rr, PEXT64rr)>; 5731ac55f4cSDimitry Andric 5741ac55f4cSDimitry Andricdefm : Zn4WriteResIntPair<WriteADC, [Zn4ALU0123], 1, [4], 1>; // Integer ALU + flags op. 5751ac55f4cSDimitry Andric 5761ac55f4cSDimitry Andricdef Zn4WriteADC8mr_SBB8mr : SchedWriteRes<[Zn4AGU012, Zn4Load, Zn4ALU0123, Zn4Store]> { 5771ac55f4cSDimitry Andric let Latency = 1; 5785f757f3fSDimitry Andric let ReleaseAtCycles = [1, 1, 7, 1]; 5791ac55f4cSDimitry Andric let NumMicroOps = 1; 5801ac55f4cSDimitry Andric} 5811ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteADC8mr_SBB8mr], (instrs ADC8mr, SBB8mr)>; 5821ac55f4cSDimitry Andric 5831ac55f4cSDimitry Andric// This is for simple LEAs with one or two input operands. 5841ac55f4cSDimitry Andricdefm : Zn4WriteResInt<WriteLEA, [Zn4AGU012], 1, [1], 1>; // LEA instructions can't fold loads. 5851ac55f4cSDimitry Andric 5861ac55f4cSDimitry Andric// This write is used for slow LEA instructions. 5871ac55f4cSDimitry Andricdef Zn4Write3OpsLEA : SchedWriteRes<[Zn4ALU0123]> { 5881ac55f4cSDimitry Andric let Latency = 2; 5895f757f3fSDimitry Andric let ReleaseAtCycles = [1]; 5901ac55f4cSDimitry Andric let NumMicroOps = 2; 5911ac55f4cSDimitry Andric} 5921ac55f4cSDimitry Andric 5931ac55f4cSDimitry Andric// On Znver4, a slow LEA is either a 3Ops LEA (base, index, offset), 5941ac55f4cSDimitry Andric// or an LEA with a `Scale` value different than 1. 5951ac55f4cSDimitry Andricdef Zn4SlowLEAPredicate : MCSchedPredicate< 5961ac55f4cSDimitry Andric CheckAny<[ 5971ac55f4cSDimitry Andric // A 3-operand LEA (base, index, offset). 5981ac55f4cSDimitry Andric IsThreeOperandsLEAFn, 5991ac55f4cSDimitry Andric // An LEA with a "Scale" different than 1. 6001ac55f4cSDimitry Andric CheckAll<[ 6011ac55f4cSDimitry Andric CheckIsImmOperand<2>, 6021ac55f4cSDimitry Andric CheckNot<CheckImmOperand<2, 1>> 6031ac55f4cSDimitry Andric ]> 6041ac55f4cSDimitry Andric ]> 6051ac55f4cSDimitry Andric>; 6061ac55f4cSDimitry Andric 6071ac55f4cSDimitry Andricdef Zn4WriteLEA : SchedWriteVariant<[ 6081ac55f4cSDimitry Andric SchedVar<Zn4SlowLEAPredicate, [Zn4Write3OpsLEA]>, 6091ac55f4cSDimitry Andric SchedVar<NoSchedPred, [WriteLEA]> 6101ac55f4cSDimitry Andric]>; 6111ac55f4cSDimitry Andric 6121ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteLEA], (instrs LEA32r, LEA64r, LEA64_32r)>; 6131ac55f4cSDimitry Andric 6141ac55f4cSDimitry Andricdef Zn4SlowLEA16r : SchedWriteRes<[Zn4ALU0123]> { 6151ac55f4cSDimitry Andric let Latency = 2; // FIXME: not from llvm-exegesis 6165f757f3fSDimitry Andric let ReleaseAtCycles = [4]; 6171ac55f4cSDimitry Andric let NumMicroOps = 2; 6181ac55f4cSDimitry Andric} 6191ac55f4cSDimitry Andric 6201ac55f4cSDimitry Andricdef : InstRW<[Zn4SlowLEA16r], (instrs LEA16r)>; 6211ac55f4cSDimitry Andric 6221ac55f4cSDimitry Andric// Integer multiplication 6231ac55f4cSDimitry Andricdefm : Zn4WriteResIntPair<WriteIMul8, [Zn4Multiplier], 3, [3], 1>; // Integer 8-bit multiplication. 6241ac55f4cSDimitry Andricdefm : Zn4WriteResIntPair<WriteIMul16, [Zn4Multiplier], 3, [3], 3, /*LoadUOps=*/1>; // Integer 16-bit multiplication. 6251ac55f4cSDimitry Andricdefm : Zn4WriteResIntPair<WriteIMul16Imm, [Zn4Multiplier], 4, [4], 2>; // Integer 16-bit multiplication by immediate. 6261ac55f4cSDimitry Andricdefm : Zn4WriteResIntPair<WriteIMul16Reg, [Zn4Multiplier], 3, [1], 1>; // Integer 16-bit multiplication by register. 6271ac55f4cSDimitry Andricdefm : Zn4WriteResIntPair<WriteIMul32, [Zn4Multiplier], 3, [3], 2>; // Integer 32-bit multiplication. 6281ac55f4cSDimitry Andricdefm : Zn4WriteResIntPair<WriteMULX32, [Zn4Multiplier], 3, [1], 2>; // Integer 32-bit Unsigned Multiply Without Affecting Flags. 6291ac55f4cSDimitry Andricdefm : Zn4WriteResIntPair<WriteIMul32Imm, [Zn4Multiplier], 3, [1], 1>; // Integer 32-bit multiplication by immediate. 6301ac55f4cSDimitry Andricdefm : Zn4WriteResIntPair<WriteIMul32Reg, [Zn4Multiplier], 3, [1], 1>; // Integer 32-bit multiplication by register. 6311ac55f4cSDimitry Andricdefm : Zn4WriteResIntPair<WriteIMul64, [Zn4Multiplier], 3, [3], 2>; // Integer 64-bit multiplication. 6321ac55f4cSDimitry Andricdefm : Zn4WriteResIntPair<WriteMULX64, [Zn4Multiplier], 3, [1], 2>; // Integer 32-bit Unsigned Multiply Without Affecting Flags. 6331ac55f4cSDimitry Andricdefm : Zn4WriteResIntPair<WriteIMul64Imm, [Zn4Multiplier], 3, [1], 1>; // Integer 64-bit multiplication by immediate. 6341ac55f4cSDimitry Andricdefm : Zn4WriteResIntPair<WriteIMul64Reg, [Zn4Multiplier], 3, [1], 1>; // Integer 64-bit multiplication by register. 6351ac55f4cSDimitry Andricdefm : Zn4WriteResInt<WriteIMulHLd, [], !add(4, Znver4Model.LoadLatency), [], 0>; // Integer multiplication, high part. 6361ac55f4cSDimitry Andricdefm : Zn4WriteResInt<WriteIMulH, [], 4, [], 0>; // Integer multiplication, high part. 6371ac55f4cSDimitry Andric 6381ac55f4cSDimitry Andricdefm : Zn4WriteResInt<WriteBSWAP32, [Zn4ALU0123], 1, [1], 1>; // Byte Order (Endianness) 32-bit Swap. 6391ac55f4cSDimitry Andricdefm : Zn4WriteResInt<WriteBSWAP64, [Zn4ALU0123], 1, [1], 1>; // Byte Order (Endianness) 64-bit Swap. 6401ac55f4cSDimitry Andric 6411ac55f4cSDimitry Andricdefm : Zn4WriteResIntPair<WriteCMPXCHG, [Zn4ALU0123], 3, [12], 5>; // Compare and set, compare and swap. 6421ac55f4cSDimitry Andric 6431ac55f4cSDimitry Andricdef Zn4WriteCMPXCHG8rr : SchedWriteRes<[Zn4ALU0123]> { 6441ac55f4cSDimitry Andric let Latency = 3; 6455f757f3fSDimitry Andric let ReleaseAtCycles = [12]; 6461ac55f4cSDimitry Andric let NumMicroOps = 3; 6471ac55f4cSDimitry Andric} 6481ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteCMPXCHG8rr], (instrs CMPXCHG8rr)>; 6491ac55f4cSDimitry Andric 6501ac55f4cSDimitry Andricdefm : Zn4WriteResInt<WriteCMPXCHGRMW, [Zn4ALU0123], 3, [12], 6>; // Compare and set, compare and swap. 6511ac55f4cSDimitry Andric 6521ac55f4cSDimitry Andricdef Zn4WriteCMPXCHG8rm_LCMPXCHG8 : SchedWriteRes<[Zn4AGU012, Zn4Load, Zn4ALU0123]> { 6531ac55f4cSDimitry Andric let Latency = !add(Znver4Model.LoadLatency, Zn4WriteCMPXCHG8rr.Latency); 6545f757f3fSDimitry Andric let ReleaseAtCycles = [1, 1, 12]; 6551ac55f4cSDimitry Andric let NumMicroOps = !add(Zn4WriteCMPXCHG8rr.NumMicroOps, 2); 6561ac55f4cSDimitry Andric} 6571ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteCMPXCHG8rm_LCMPXCHG8], (instrs CMPXCHG8rm, LCMPXCHG8)>; 6581ac55f4cSDimitry Andric 6591ac55f4cSDimitry Andricdef Zn4WriteCMPXCHG8B : SchedWriteRes<[Zn4ALU0123]> { 6601ac55f4cSDimitry Andric let Latency = 3; // FIXME: not from llvm-exegesis 6615f757f3fSDimitry Andric let ReleaseAtCycles = [24]; 6621ac55f4cSDimitry Andric let NumMicroOps = 19; 6631ac55f4cSDimitry Andric} 6641ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteCMPXCHG8B], (instrs CMPXCHG8B)>; 6651ac55f4cSDimitry Andric 6661ac55f4cSDimitry Andricdef Zn4WriteCMPXCHG16B_LCMPXCHG16B : SchedWriteRes<[Zn4ALU0123]> { 6671ac55f4cSDimitry Andric let Latency = 4; // FIXME: not from llvm-exegesis 6685f757f3fSDimitry Andric let ReleaseAtCycles = [59]; 6691ac55f4cSDimitry Andric let NumMicroOps = 28; 6701ac55f4cSDimitry Andric} 6711ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteCMPXCHG16B_LCMPXCHG16B], (instrs CMPXCHG16B, LCMPXCHG16B)>; 6721ac55f4cSDimitry Andric 6731ac55f4cSDimitry Andricdef Zn4WriteWriteXCHGUnrenameable : SchedWriteRes<[Zn4ALU0123]> { 6741ac55f4cSDimitry Andric let Latency = 1; 6755f757f3fSDimitry Andric let ReleaseAtCycles = [2]; 6761ac55f4cSDimitry Andric let NumMicroOps = 2; 6771ac55f4cSDimitry Andric} 6781ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteWriteXCHGUnrenameable], (instrs XCHG8rr, XCHG16rr, XCHG16ar)>; 6791ac55f4cSDimitry Andric 6801ac55f4cSDimitry Andricdef Zn4WriteXCHG8rm_XCHG16rm : SchedWriteRes<[Zn4AGU012, Zn4Load, Zn4ALU0123]> { 6811ac55f4cSDimitry Andric let Latency = !add(Znver4Model.LoadLatency, 3); // FIXME: not from llvm-exegesis 6825f757f3fSDimitry Andric let ReleaseAtCycles = [1, 1, 2]; 6831ac55f4cSDimitry Andric let NumMicroOps = 5; 6841ac55f4cSDimitry Andric} 6851ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteXCHG8rm_XCHG16rm], (instrs XCHG8rm, XCHG16rm)>; 6861ac55f4cSDimitry Andric 6871ac55f4cSDimitry Andricdef Zn4WriteXCHG32rm_XCHG64rm : SchedWriteRes<[Zn4AGU012, Zn4Load, Zn4ALU0123]> { 6881ac55f4cSDimitry Andric let Latency = !add(Znver4Model.LoadLatency, 2); // FIXME: not from llvm-exegesis 6895f757f3fSDimitry Andric let ReleaseAtCycles = [1, 1, 2]; 6901ac55f4cSDimitry Andric let NumMicroOps = 2; 6911ac55f4cSDimitry Andric} 6921ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteXCHG32rm_XCHG64rm], (instrs XCHG32rm, XCHG64rm)>; 6931ac55f4cSDimitry Andric 6941ac55f4cSDimitry Andric// Integer division. 6951ac55f4cSDimitry Andric// FIXME: uops for 8-bit division measures as 2. for others it's a guess. 6961ac55f4cSDimitry Andric// FIXME: latency for 8-bit division measures as 10. for others it's a guess. 6971ac55f4cSDimitry Andricdefm : Zn4WriteResIntPair<WriteDiv8, [Zn4Divider], 10, [10], 2>; 6981ac55f4cSDimitry Andricdefm : Zn4WriteResIntPair<WriteDiv16, [Zn4Divider], 11, [11], 2>; 6991ac55f4cSDimitry Andricdefm : Zn4WriteResIntPair<WriteDiv32, [Zn4Divider], 13, [13], 2>; 7001ac55f4cSDimitry Andricdefm : Zn4WriteResIntPair<WriteDiv64, [Zn4Divider], 17, [17], 2>; 7011ac55f4cSDimitry Andricdefm : Zn4WriteResIntPair<WriteIDiv8, [Zn4Divider], 10, [10], 2>; 7021ac55f4cSDimitry Andricdefm : Zn4WriteResIntPair<WriteIDiv16, [Zn4Divider], 11, [11], 2>; 7031ac55f4cSDimitry Andricdefm : Zn4WriteResIntPair<WriteIDiv32, [Zn4Divider], 13, [13], 2>; 7041ac55f4cSDimitry Andricdefm : Zn4WriteResIntPair<WriteIDiv64, [Zn4Divider], 17, [17], 2>; 7051ac55f4cSDimitry Andric 7061ac55f4cSDimitry Andricdefm : Zn4WriteResIntPair<WriteBSF, [Zn4ALU1], 1, [1], 6, /*LoadUOps=*/1>; // Bit scan forward. 7071ac55f4cSDimitry Andricdefm : Zn4WriteResIntPair<WriteBSR, [Zn4ALU1], 1, [1], 6, /*LoadUOps=*/1>; // Bit scan reverse. 7081ac55f4cSDimitry Andric 7091ac55f4cSDimitry Andricdefm : Zn4WriteResIntPair<WritePOPCNT, [Zn4ALU0123], 1, [1], 1>; // Bit population count. 7101ac55f4cSDimitry Andric 7111ac55f4cSDimitry Andricdef Zn4WritePOPCNT16rr : SchedWriteRes<[Zn4ALU0123]> { 7121ac55f4cSDimitry Andric let Latency = 1; 7135f757f3fSDimitry Andric let ReleaseAtCycles = [4]; 7141ac55f4cSDimitry Andric let NumMicroOps = 1; 7151ac55f4cSDimitry Andric} 7161ac55f4cSDimitry Andricdef : InstRW<[Zn4WritePOPCNT16rr], (instrs POPCNT16rr)>; 7171ac55f4cSDimitry Andric 7181ac55f4cSDimitry Andricdefm : Zn4WriteResIntPair<WriteLZCNT, [Zn4ALU0123], 1, [1], 1>; // Leading zero count. 7191ac55f4cSDimitry Andric 7201ac55f4cSDimitry Andricdef Zn4WriteLZCNT16rr : SchedWriteRes<[Zn4ALU0123]> { 7211ac55f4cSDimitry Andric let Latency = 1; 7225f757f3fSDimitry Andric let ReleaseAtCycles = [4]; 7231ac55f4cSDimitry Andric let NumMicroOps = 1; 7241ac55f4cSDimitry Andric} 7251ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteLZCNT16rr], (instrs LZCNT16rr)>; 7261ac55f4cSDimitry Andric 7271ac55f4cSDimitry Andricdefm : Zn4WriteResIntPair<WriteTZCNT, [Zn4ALU12], 2, [1], 2>; // Trailing zero count. 7281ac55f4cSDimitry Andric 7291ac55f4cSDimitry Andricdef Zn4WriteTZCNT16rr : SchedWriteRes<[Zn4ALU0123]> { 7301ac55f4cSDimitry Andric let Latency = 2; 7315f757f3fSDimitry Andric let ReleaseAtCycles = [4]; 7321ac55f4cSDimitry Andric let NumMicroOps = 2; 7331ac55f4cSDimitry Andric} 7341ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteTZCNT16rr], (instrs TZCNT16rr)>; 7351ac55f4cSDimitry Andric 7361ac55f4cSDimitry Andricdefm : Zn4WriteResIntPair<WriteCMOV, [Zn4ALU03], 1, [1], 1>; // Conditional move. 7371ac55f4cSDimitry Andricdefm : Zn4WriteResInt<WriteFCMOV, [Zn4ALU0123], 7, [28], 7>; // FIXME: not from llvm-exegesis // X87 conditional move. 7381ac55f4cSDimitry Andricdefm : Zn4WriteResInt<WriteSETCC, [Zn4ALU03], 1, [2], 1>; // Set register based on condition code. 7391ac55f4cSDimitry Andricdefm : Zn4WriteResInt<WriteSETCCStore, [Zn4ALU03, Zn4AGU012, Zn4Store], 2, [2, 1, 1], 2>; // FIXME: latency not from llvm-exegesis 7401ac55f4cSDimitry Andricdefm : Zn4WriteResInt<WriteLAHFSAHF, [Zn4ALU3], 1, [1], 1>; // Load/Store flags in AH. 7411ac55f4cSDimitry Andric 7421ac55f4cSDimitry Andricdefm : Zn4WriteResInt<WriteBitTest, [Zn4ALU12], 1, [1], 1>; // Bit Test 7431ac55f4cSDimitry Andricdefm : Zn4WriteResInt<WriteBitTestImmLd, [Zn4AGU012, Zn4Load, Zn4ALU12], !add(Znver4Model.LoadLatency, 1), [1, 1, 1], 2>; 7441ac55f4cSDimitry Andricdefm : Zn4WriteResInt<WriteBitTestRegLd, [Zn4AGU012, Zn4Load, Zn4ALU12], !add(Znver4Model.LoadLatency, 1), [1, 1, 1], 7>; 7451ac55f4cSDimitry Andric 7461ac55f4cSDimitry Andricdefm : Zn4WriteResInt<WriteBitTestSet, [Zn4ALU12], 2, [2], 2>; // Bit Test + Set 7471ac55f4cSDimitry Andricdefm : Zn4WriteResInt<WriteBitTestSetImmLd, [Zn4AGU012, Zn4Load, Zn4ALU12], !add(Znver4Model.LoadLatency, 2), [1, 1, 1], 4>; 7481ac55f4cSDimitry Andricdefm : Zn4WriteResInt<WriteBitTestSetRegLd, [Zn4AGU012, Zn4Load, Zn4ALU12], !add(Znver4Model.LoadLatency, 2), [1, 1, 1], 9>; 7491ac55f4cSDimitry Andric 7501ac55f4cSDimitry Andric// Integer shifts and rotates. 7511ac55f4cSDimitry Andricdefm : Zn4WriteResIntPair<WriteShift, [Zn4ALU12], 1, [1], 1, /*LoadUOps=*/1>; 7521ac55f4cSDimitry Andricdefm : Zn4WriteResIntPair<WriteShiftCL, [Zn4ALU12], 1, [1], 1, /*LoadUOps=*/1>; 7531ac55f4cSDimitry Andricdefm : Zn4WriteResIntPair<WriteRotate, [Zn4ALU12], 1, [1], 1, /*LoadUOps=*/1>; 7541ac55f4cSDimitry Andric 7551ac55f4cSDimitry Andricdef Zn4WriteRotateR1 : SchedWriteRes<[Zn4ALU12]> { 7561ac55f4cSDimitry Andric let Latency = 1; 7575f757f3fSDimitry Andric let ReleaseAtCycles = [2]; 7581ac55f4cSDimitry Andric let NumMicroOps = 1; 7591ac55f4cSDimitry Andric} 7601ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteRotateR1], (instrs RCL8r1, RCL16r1, RCL32r1, RCL64r1, 7611ac55f4cSDimitry Andric RCR8r1, RCR16r1, RCR32r1, RCR64r1)>; 7621ac55f4cSDimitry Andric 7631ac55f4cSDimitry Andricdef Zn4WriteRotateM1 : SchedWriteRes<[Zn4AGU012, Zn4Load, Zn4ALU12]> { 7641ac55f4cSDimitry Andric let Latency = !add(Znver4Model.LoadLatency, Zn4WriteRotateR1.Latency); 7655f757f3fSDimitry Andric let ReleaseAtCycles = [1, 1, 2]; 7661ac55f4cSDimitry Andric let NumMicroOps = !add(Zn4WriteRotateR1.NumMicroOps, 1); 7671ac55f4cSDimitry Andric} 7681ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteRotateM1], (instrs RCL8m1, RCL16m1, RCL32m1, RCL64m1, 7691ac55f4cSDimitry Andric RCR8m1, RCR16m1, RCR32m1, RCR64m1)>; 7701ac55f4cSDimitry Andric 7711ac55f4cSDimitry Andricdef Zn4WriteRotateRightRI : SchedWriteRes<[Zn4ALU12]> { 7721ac55f4cSDimitry Andric let Latency = 3; 7735f757f3fSDimitry Andric let ReleaseAtCycles = [6]; 7741ac55f4cSDimitry Andric let NumMicroOps = 7; 7751ac55f4cSDimitry Andric} 7761ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteRotateRightRI], (instrs RCR8ri, RCR16ri, RCR32ri, RCR64ri)>; 7771ac55f4cSDimitry Andric 7781ac55f4cSDimitry Andricdef Zn4WriteRotateRightMI : SchedWriteRes<[Zn4AGU012, Zn4Load, Zn4ALU12]> { 7791ac55f4cSDimitry Andric let Latency = !add(Znver4Model.LoadLatency, Zn4WriteRotateRightRI.Latency); 7805f757f3fSDimitry Andric let ReleaseAtCycles = [1, 1, 8]; 7811ac55f4cSDimitry Andric let NumMicroOps = !add(Zn4WriteRotateRightRI.NumMicroOps, 3); 7821ac55f4cSDimitry Andric} 7831ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteRotateRightMI], (instrs RCR8mi, RCR16mi, RCR32mi, RCR64mi)>; 7841ac55f4cSDimitry Andric 7851ac55f4cSDimitry Andricdef Zn4WriteRotateLeftRI : SchedWriteRes<[Zn4ALU12]> { 7861ac55f4cSDimitry Andric let Latency = 4; 7875f757f3fSDimitry Andric let ReleaseAtCycles = [8]; 7881ac55f4cSDimitry Andric let NumMicroOps = 9; 7891ac55f4cSDimitry Andric} 7901ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteRotateLeftRI], (instrs RCL8ri, RCL16ri, RCL32ri, RCL64ri)>; 7911ac55f4cSDimitry Andric 7921ac55f4cSDimitry Andricdef Zn4WriteRotateLeftMI : SchedWriteRes<[Zn4AGU012, Zn4Load, Zn4ALU12]> { 7931ac55f4cSDimitry Andric let Latency = !add(Znver4Model.LoadLatency, Zn4WriteRotateLeftRI.Latency); 7945f757f3fSDimitry Andric let ReleaseAtCycles = [1, 1, 8]; 7951ac55f4cSDimitry Andric let NumMicroOps = !add(Zn4WriteRotateLeftRI.NumMicroOps, 2); 7961ac55f4cSDimitry Andric} 7971ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteRotateLeftMI], (instrs RCL8mi, RCL16mi, RCL32mi, RCL64mi)>; 7981ac55f4cSDimitry Andric 7991ac55f4cSDimitry Andricdefm : Zn4WriteResIntPair<WriteRotateCL, [Zn4ALU12], 1, [1], 1, /*LoadUOps=*/1>; 8001ac55f4cSDimitry Andric 8011ac55f4cSDimitry Andricdef Zn4WriteRotateRightRCL : SchedWriteRes<[Zn4ALU12]> { 8021ac55f4cSDimitry Andric let Latency = 3; 8035f757f3fSDimitry Andric let ReleaseAtCycles = [6]; 8041ac55f4cSDimitry Andric let NumMicroOps = 7; 8051ac55f4cSDimitry Andric} 8061ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteRotateRightRCL], (instrs RCR8rCL, RCR16rCL, RCR32rCL, RCR64rCL)>; 8071ac55f4cSDimitry Andric 8081ac55f4cSDimitry Andricdef Zn4WriteRotateRightMCL : SchedWriteRes<[Zn4AGU012, Zn4Load, Zn4ALU12]> { 8091ac55f4cSDimitry Andric let Latency = !add(Znver4Model.LoadLatency, Zn4WriteRotateRightRCL.Latency); 8105f757f3fSDimitry Andric let ReleaseAtCycles = [1, 1, 8]; 8111ac55f4cSDimitry Andric let NumMicroOps = !add(Zn4WriteRotateRightRCL.NumMicroOps, 2); 8121ac55f4cSDimitry Andric} 8131ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteRotateRightMCL], (instrs RCR8mCL, RCR16mCL, RCR32mCL, RCR64mCL)>; 8141ac55f4cSDimitry Andric 8151ac55f4cSDimitry Andricdef Zn4WriteRotateLeftRCL : SchedWriteRes<[Zn4ALU12]> { 8161ac55f4cSDimitry Andric let Latency = 4; 8175f757f3fSDimitry Andric let ReleaseAtCycles = [8]; 8181ac55f4cSDimitry Andric let NumMicroOps = 9; 8191ac55f4cSDimitry Andric} 8201ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteRotateLeftRCL], (instrs RCL8rCL, RCL16rCL, RCL32rCL, RCL64rCL)>; 8211ac55f4cSDimitry Andric 8221ac55f4cSDimitry Andricdef Zn4WriteRotateLeftMCL : SchedWriteRes<[Zn4AGU012, Zn4Load, Zn4ALU12]> { 8231ac55f4cSDimitry Andric let Latency = !add(Znver4Model.LoadLatency, Zn4WriteRotateLeftRCL.Latency); 8245f757f3fSDimitry Andric let ReleaseAtCycles = [1, 1, 8]; 8251ac55f4cSDimitry Andric let NumMicroOps = !add(Zn4WriteRotateLeftRCL.NumMicroOps, 2); 8261ac55f4cSDimitry Andric} 8271ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteRotateLeftMCL], (instrs RCL8mCL, RCL16mCL, RCL32mCL, RCL64mCL)>; 8281ac55f4cSDimitry Andric 8291ac55f4cSDimitry Andric// Double shift instructions. 8301ac55f4cSDimitry Andricdefm : Zn4WriteResInt<WriteSHDrri, [Zn4ALU12], 2, [3], 4>; 8311ac55f4cSDimitry Andricdefm : Zn4WriteResInt<WriteSHDrrcl, [Zn4ALU12], 2, [3], 5>; 8321ac55f4cSDimitry Andricdefm : Zn4WriteResInt<WriteSHDmri, [Zn4AGU012, Zn4Load, Zn4ALU12], !add(Znver4Model.LoadLatency, 2), [1, 1, 4], 6>; 8331ac55f4cSDimitry Andricdefm : Zn4WriteResInt<WriteSHDmrcl, [Zn4AGU012, Zn4Load, Zn4ALU12], !add(Znver4Model.LoadLatency, 2), [1, 1, 4], 6>; 8341ac55f4cSDimitry Andric 8351ac55f4cSDimitry Andric// BMI1 BEXTR/BLS, BMI2 BZHI 8361ac55f4cSDimitry Andricdefm : Zn4WriteResIntPair<WriteBEXTR, [Zn4ALU12], 1, [1], 1, /*LoadUOps=*/1>; 8371ac55f4cSDimitry Andricdefm : Zn4WriteResIntPair<WriteBLS, [Zn4ALU0123], 1, [1], 1, /*LoadUOps=*/1>; 8381ac55f4cSDimitry Andricdefm : Zn4WriteResIntPair<WriteBZHI, [Zn4ALU12], 1, [1], 1, /*LoadUOps=*/1>; 8391ac55f4cSDimitry Andric 8401ac55f4cSDimitry Andric// Idioms that clear a register, like xorps %xmm0, %xmm0. 8411ac55f4cSDimitry Andric// These can often bypass execution ports completely. 8421ac55f4cSDimitry Andricdefm : Zn4WriteResInt<WriteZero, [Zn4ALU0123], 0, [0], 1>; 8431ac55f4cSDimitry Andric 8441ac55f4cSDimitry Andric// Branches don't produce values, so they have no latency, but they still 8451ac55f4cSDimitry Andric// consume resources. Indirect branches can fold loads. 8461ac55f4cSDimitry Andricdefm : Zn4WriteResIntPair<WriteJump, [Zn4BRU01], 1, [1], 1>; // FIXME: not from llvm-exegesis 8471ac55f4cSDimitry Andric 8481ac55f4cSDimitry Andric// Floating point. This covers both scalar and vector operations. 8491ac55f4cSDimitry Andricdefm : Zn4WriteResInt<WriteFLD0, [Zn4FPLd01, Zn4Load, Zn4FP1], !add(Znver4Model.LoadLatency, 4), [1, 1, 1], 1>; 8501ac55f4cSDimitry Andricdefm : Zn4WriteResInt<WriteFLD1, [Zn4FPLd01, Zn4Load, Zn4FP1], !add(Znver4Model.LoadLatency, 7), [1, 1, 1], 1>; 8511ac55f4cSDimitry Andricdefm : Zn4WriteResInt<WriteFLDC, [Zn4FPLd01, Zn4Load, Zn4FP1], !add(Znver4Model.LoadLatency, 7), [1, 1, 1], 1>; 8521ac55f4cSDimitry Andricdefm : Zn4WriteResXMM<WriteFLoad, [Zn4FPLd01, Zn4Load], !add(Znver4Model.VecLoadLatency, 1), [1, 1], 1>; 8531ac55f4cSDimitry Andricdefm : Zn4WriteResXMM<WriteFLoadX, [Zn4FPLd01, Zn4Load], !add(Znver4Model.VecLoadLatency, 1), [1, 1], 1>; 8541ac55f4cSDimitry Andricdefm : Zn4WriteResYMM<WriteFLoadY, [Zn4FPLd01, Zn4Load], !add(Znver4Model.VecLoadLatency, 1), [1, 1], 1>; 8551ac55f4cSDimitry Andricdefm : Zn4WriteResXMM<WriteFMaskedLoad, [Zn4FPLd01, Zn4Load], !add(Znver4Model.VecLoadLatency, 1), [1, 1], 1>; 8561ac55f4cSDimitry Andricdefm : Zn4WriteResYMM<WriteFMaskedLoadY, [Zn4FPLd01, Zn4Load], !add(Znver4Model.VecLoadLatency, 1), [1, 1], 1>; 8571ac55f4cSDimitry Andricdefm : Zn4WriteResXMM<WriteFStore, [Zn4FPSt, Zn4Store], Znver4Model.StoreLatency, [1, 1], 1>; 8581ac55f4cSDimitry Andric 8591ac55f4cSDimitry Andricdef Zn4WriteWriteFStoreMMX : SchedWriteRes<[Zn4FPSt, Zn4Store]> { 8601ac55f4cSDimitry Andric let Latency = 2; // FIXME: not from llvm-exegesis 8615f757f3fSDimitry Andric let ReleaseAtCycles = [1, 1]; 8621ac55f4cSDimitry Andric let NumMicroOps = 2; 8631ac55f4cSDimitry Andric} 8641ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteWriteFStoreMMX], (instrs MOVHPDmr, MOVHPSmr, 8651ac55f4cSDimitry Andric VMOVHPDmr, VMOVHPSmr)>; 8661ac55f4cSDimitry Andric 8671ac55f4cSDimitry Andricdefm : Zn4WriteResXMM<WriteFStoreX, [Zn4FPSt, Zn4Store], Znver4Model.StoreLatency, [1, 1], 1>; 8681ac55f4cSDimitry Andricdefm : Zn4WriteResYMM<WriteFStoreY, [Zn4FPSt, Zn4Store], Znver4Model.StoreLatency, [1, 1], 1>; 8691ac55f4cSDimitry Andricdefm : Zn4WriteResXMM<WriteFStoreNT, [Zn4FPSt, Zn4Store], Znver4Model.StoreLatency, [1, 1], 1>; 8701ac55f4cSDimitry Andricdefm : Zn4WriteResXMM<WriteFStoreNTX, [Zn4FPSt, Zn4Store], Znver4Model.StoreLatency, [1, 1], 1>; 8711ac55f4cSDimitry Andricdefm : Zn4WriteResYMM<WriteFStoreNTY, [Zn4FPSt, Zn4Store], Znver4Model.StoreLatency, [1, 1], 1>; 8721ac55f4cSDimitry Andric 8731ac55f4cSDimitry Andricdefm : Zn4WriteResXMM<WriteFMaskedStore32, [Zn4FPSt, Zn4Store], Znver4Model.StoreLatency, [6, 1], 18>; 8741ac55f4cSDimitry Andricdefm : Zn4WriteResXMM<WriteFMaskedStore64, [Zn4FPSt, Zn4Store], Znver4Model.StoreLatency, [4, 1], 10>; 8751ac55f4cSDimitry Andricdefm : Zn4WriteResYMM<WriteFMaskedStore32Y, [Zn4FPSt, Zn4Store], Znver4Model.StoreLatency, [12, 1], 42>; 8761ac55f4cSDimitry Andricdefm : Zn4WriteResYMM<WriteFMaskedStore64Y, [Zn4FPSt, Zn4Store], Znver4Model.StoreLatency, [6, 1], 18>; 8771ac55f4cSDimitry Andric 8781ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteFAdd, [Zn4FPFAdd01], 3, [1], 1>; // Floating point add/sub. 8791ac55f4cSDimitry Andric 8801ac55f4cSDimitry Andricdef Zn4WriteX87Arith : SchedWriteRes<[Zn4AGU012, Zn4Load, Zn4FPU0123]> { 8811ac55f4cSDimitry Andric let Latency = !add(Znver4Model.LoadLatency, 1); // FIXME: not from llvm-exegesis 8825f757f3fSDimitry Andric let ReleaseAtCycles = [1, 1, 24]; 8831ac55f4cSDimitry Andric let NumMicroOps = 2; 8841ac55f4cSDimitry Andric} 8851ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteX87Arith], (instrs ADD_FI16m, ADD_FI32m, 8861ac55f4cSDimitry Andric SUB_FI16m, SUB_FI32m, 8871ac55f4cSDimitry Andric SUBR_FI16m, SUBR_FI32m, 8881ac55f4cSDimitry Andric MUL_FI16m, MUL_FI32m)>; 8891ac55f4cSDimitry Andric 8901ac55f4cSDimitry Andricdef Zn4WriteX87Div : SchedWriteRes<[Zn4AGU012, Zn4Load, Zn4FPU0123]> { 8911ac55f4cSDimitry Andric let Latency = !add(Znver4Model.LoadLatency, 1); // FIXME: not from llvm-exegesis 8925f757f3fSDimitry Andric let ReleaseAtCycles = [1, 1, 62]; 8931ac55f4cSDimitry Andric let NumMicroOps = 2; 8941ac55f4cSDimitry Andric} 8951ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteX87Div], (instrs DIV_FI16m, DIV_FI32m, 8961ac55f4cSDimitry Andric DIVR_FI16m, DIVR_FI32m)>; 8971ac55f4cSDimitry Andric 8981ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteFAddX, [Zn4FPFAdd01], 3, [1], 1>; // Floating point add/sub (XMM). 8991ac55f4cSDimitry Andricdefm : Zn4WriteResYMMPair<WriteFAddY, [Zn4FPFAdd01], 3, [1], 1>; // Floating point add/sub (YMM). 9001ac55f4cSDimitry Andricdefm : Zn4WriteResZMMPair<WriteFAddZ, [Zn4FPFAdd01], 3, [2], 1>; // Floating point add/sub (ZMM). 9011ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteFAdd64, [Zn4FPFAdd01], 3, [1], 1>; // Floating point double add/sub. 9021ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteFAdd64X, [Zn4FPFAdd01], 3, [1], 1>; // Floating point double add/sub (XMM). 9031ac55f4cSDimitry Andricdefm : Zn4WriteResYMMPair<WriteFAdd64Y, [Zn4FPFAdd01], 3, [1], 1>; // Floating point double add/sub (YMM). 9041ac55f4cSDimitry Andricdefm : Zn4WriteResZMMPair<WriteFAdd64Z, [Zn4FPFAdd01], 3, [2], 1>; // Floating point double add/sub (ZMM). 9051ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteFCmp, [Zn4FPFMul01], 2, [2], 1>; // Floating point compare. 90606c3fb27SDimitry Andricdefm : Zn4WriteResXMMPair<WriteFCmpX, [Zn4FPFMul01], 2, [1], 1>; // Floating point compare (XMM). 90706c3fb27SDimitry Andricdefm : Zn4WriteResYMMPair<WriteFCmpY, [Zn4FPFMul01], 2, [1], 1>; // Floating point compare (YMM). 90806c3fb27SDimitry Andricdefm : Zn4WriteResZMMPair<WriteFCmpZ, [Zn4FPFMul01], 2, [2], 1>; // Floating point compare (ZMM). 9091ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteFCmp64, [Zn4FPFMul01], 1, [1], 1>; // Floating point double compare. 91006c3fb27SDimitry Andricdefm : Zn4WriteResXMMPair<WriteFCmp64X, [Zn4FPFMul01], 2, [1], 1>; // Floating point double compare (XMM). 91106c3fb27SDimitry Andricdefm : Zn4WriteResYMMPair<WriteFCmp64Y, [Zn4FPFMul01], 2, [1], 1>; // Floating point double compare (YMM). 91206c3fb27SDimitry Andricdefm : Zn4WriteResZMMPair<WriteFCmp64Z, [Zn4FPFMul01], 2, [2], 1>; // Floating point double compare (ZMM). 9131ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteFCom, [Zn4FPFMul01], 3, [2], 1>; // FIXME: latency not from llvm-exegesis // Floating point compare to flags (X87). 9141ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteFComX, [Zn4FPFMul01], 4, [2], 2>; // FIXME: latency not from llvm-exegesis // Floating point compare to flags (SSE). 9151ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteFMul, [Zn4FPFMul01], 3, [1], 1>; // Floating point multiplication. 9161ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteFMulX, [Zn4FPFMul01], 3, [1], 1>; // Floating point multiplication (XMM). 9171ac55f4cSDimitry Andricdefm : Zn4WriteResYMMPair<WriteFMulY, [Zn4FPFMul01], 3, [1], 1>; // Floating point multiplication (YMM). 9181ac55f4cSDimitry Andricdefm : Zn4WriteResZMMPair<WriteFMulZ, [Zn4FPFMul01], 3, [2], 1>; // Floating point multiplication (ZMM). 9191ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteFMul64, [Zn4FPFMul01], 3, [1], 1>; // Floating point double multiplication. 9201ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteFMul64X, [Zn4FPFMul01], 3, [1], 1>; // Floating point double multiplication (XMM). 9211ac55f4cSDimitry Andricdefm : Zn4WriteResYMMPair<WriteFMul64Y, [Zn4FPFMul01], 3, [1], 1>; // Floating point double multiplication (YMM). 9221ac55f4cSDimitry Andricdefm : Zn4WriteResZMMPair<WriteFMul64Z, [Zn4FPFMul01], 3, [2], 1>; // Floating point double multiplication (ZMM). 9231ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteFDiv, [Zn4FPFDiv], 11, [3], 1>; // Floating point division. 9241ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteFDivX, [Zn4FPFDiv], 11, [3], 1>; // Floating point division (XMM). 9251ac55f4cSDimitry Andricdefm : Zn4WriteResYMMPair<WriteFDivY, [Zn4FPFDiv], 11, [3], 1>; // Floating point division (YMM). 9261ac55f4cSDimitry Andricdefm : Zn4WriteResZMMPair<WriteFDivZ, [Zn4FPFDiv], 11, [6], 1>; // Floating point division (ZMM). 9271ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteFDiv64, [Zn4FPFDiv], 13, [5], 1>; // Floating point double division. 9281ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteFDiv64X, [Zn4FPFDiv], 13, [5], 1>; // Floating point double division (XMM). 9291ac55f4cSDimitry Andricdefm : Zn4WriteResYMMPair<WriteFDiv64Y, [Zn4FPFDiv], 13, [5], 1>; // Floating point double division (YMM). 9301ac55f4cSDimitry Andricdefm : Zn4WriteResZMMPair<WriteFDiv64Z, [Zn4FPFDiv], 13, [10], 1>; // Floating point double division (ZMM). 9311ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteFSqrt, [Zn4FPFDiv], 15, [5], 1>; // Floating point square root. 9321ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteFSqrtX, [Zn4FPFDiv], 15, [5], 1>; // Floating point square root (XMM). 9331ac55f4cSDimitry Andricdefm : Zn4WriteResYMMPair<WriteFSqrtY, [Zn4FPFDiv], 15, [5], 1>; // Floating point square root (YMM). 9341ac55f4cSDimitry Andricdefm : Zn4WriteResZMMPair<WriteFSqrtZ, [Zn4FPFDiv], 15, [10], 1>; // Floating point square root (ZMM). 9351ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteFSqrt64, [Zn4FPFDiv], 21, [9], 1>; // Floating point double square root. 9361ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteFSqrt64X, [Zn4FPFDiv], 21, [9], 1>; // Floating point double square root (XMM). 9371ac55f4cSDimitry Andricdefm : Zn4WriteResYMMPair<WriteFSqrt64Y, [Zn4FPFDiv], 21, [9], 1>; // Floating point double square root (YMM). 9381ac55f4cSDimitry Andricdefm : Zn4WriteResZMMPair<WriteFSqrt64Z, [Zn4FPFDiv], 21, [18], 1>; // Floating point double square root (ZMM). 9391ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteFSqrt80, [Zn4FPFDiv], 22, [23], 1>; // FIXME: latency not from llvm-exegesis // Floating point long double square root. 9401ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteFRcp, [Zn4FPFMul01], 4, [1], 1>; // Floating point reciprocal estimate. 9411ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteFRcpX, [Zn4FPFMul01], 4, [1], 1>; // Floating point reciprocal estimate (XMM). 9421ac55f4cSDimitry Andricdefm : Zn4WriteResYMMPair<WriteFRcpY, [Zn4FPFMul01], 5, [1], 1>; // Floating point reciprocal estimate (YMM). 9431ac55f4cSDimitry Andricdefm : Zn4WriteResZMMPair<WriteFRcpZ, [Zn4FPFMul01], 5, [2], 1>; // Floating point reciprocal estimate (ZMM). 9441ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteFRsqrt, [Zn4FPFDiv], 4, [1], 1>; // Floating point reciprocal square root estimate. 9451ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteFRsqrtX, [Zn4FPFDiv], 4, [1], 1>; // Floating point reciprocal square root estimate (XMM). 9461ac55f4cSDimitry Andricdefm : Zn4WriteResYMMPair<WriteFRsqrtY, [Zn4FPFDiv], 4, [1], 1>; // Floating point reciprocal square root estimate (YMM). 9471ac55f4cSDimitry Andricdefm : Zn4WriteResZMMPair<WriteFRsqrtZ, [Zn4FPFDiv], 5, [2], 1>; // Floating point reciprocal square root estimate (ZMM). 9481ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteFMA, [Zn4FPFMul01], 4, [2], 1>; // Fused Multiply Add. 94906c3fb27SDimitry Andricdefm : Zn4WriteResXMMPair<WriteFMAX, [Zn4FPFMul01], 4, [1], 1>; // Fused Multiply Add (XMM). 95006c3fb27SDimitry Andricdefm : Zn4WriteResYMMPair<WriteFMAY, [Zn4FPFMul01], 4, [1], 1>; // Fused Multiply Add (YMM). 95106c3fb27SDimitry Andricdefm : Zn4WriteResZMMPair<WriteFMAZ, [Zn4FPFMul01], 4, [2], 1>; // Fused Multiply Add (ZMM). 9521ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteDPPD, [Zn4FPFMul01], 7, [6], 3, /*LoadUOps=*/2>; // Floating point double dot product. 9531ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteDPPS, [Zn4FPFMul01], 11, [8], 8, /*LoadUOps=*/2>; // Floating point single dot product. 9541ac55f4cSDimitry Andricdefm : Zn4WriteResYMMPair<WriteDPPSY, [Zn4FPFMul01], 11, [8], 7, /*LoadUOps=*/1>; // Floating point single dot product (YMM). 9551ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteFSign, [Zn4FPFMul01], 1, [2], 1>; // FIXME: latency not from llvm-exegesis // Floating point fabs/fchs. 9561ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteFRnd, [Zn4FPFCvt01], 3, [1], 1>; // Floating point rounding. 9571ac55f4cSDimitry Andricdefm : Zn4WriteResYMMPair<WriteFRndY, [Zn4FPFCvt01], 3, [1], 1>; // Floating point rounding (YMM). 9581ac55f4cSDimitry Andricdefm : Zn4WriteResZMMPair<WriteFRndZ, [Zn4FPFCvt01], 3, [2], 1>; // Floating point rounding (ZMM). 9591ac55f4cSDimitry Andric 9601ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteFLogic, [Zn4FPVMisc0123], 1, [1], 1>; // Floating point and/or/xor logicals. 9611ac55f4cSDimitry Andricdefm : Zn4WriteResYMMPair<WriteFLogicY, [Zn4FPVMisc0123], 1, [1], 1>; // Floating point and/or/xor logicals (YMM). 9621ac55f4cSDimitry Andricdefm : Zn4WriteResZMMPair<WriteFLogicZ, [Zn4FPVMisc0123], 1, [2], 1>; // Floating point and/or/xor logicals (ZMM). 9631ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteFTest, [Zn4FPFMisc12], 1, [2], 2>; // FIXME: latency not from llvm-exegesis // Floating point TEST instructions. 9641ac55f4cSDimitry Andricdefm : Zn4WriteResYMMPair<WriteFTestY, [Zn4FPFMisc12], 1, [2], 2>; // FIXME: latency not from llvm-exegesis // Floating point TEST instructions (YMM). 9651ac55f4cSDimitry Andricdefm : Zn4WriteResZMMPair<WriteFTestZ, [Zn4FPFMisc12], 1, [4], 1>; // FIXME: latency not from llvm-exegesis // Floating point TEST instructions (ZMM). 9661ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteFShuffle, [Zn4FPVShuf01], 1, [1], 1>; // Floating point vector shuffles. 9671ac55f4cSDimitry Andricdefm : Zn4WriteResYMMPair<WriteFShuffleY, [Zn4FPVShuf01], 1, [1], 1>; // Floating point vector shuffles (YMM). 9681ac55f4cSDimitry Andricdefm : Zn4WriteResZMMPair<WriteFShuffleZ, [Zn4FPVShuf01], 1, [2], 1>; // Floating point vector shuffles (ZMM). 9691ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteFVarShuffle, [Zn4FPVShuf01], 3, [1], 1>; // Floating point vector variable shuffles. 9701ac55f4cSDimitry Andricdefm : Zn4WriteResYMMPair<WriteFVarShuffleY, [Zn4FPVShuf01], 3, [1], 1>; // Floating point vector variable shuffles (YMM). 9711ac55f4cSDimitry Andricdefm : Zn4WriteResZMMPair<WriteFVarShuffleZ, [Zn4FPVShuf01], 3, [2], 1>; // Floating point vector variable shuffles (ZMM). 9721ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteFBlend, [Zn4FPFMul01], 1, [1], 1>; // Floating point vector blends. 9731ac55f4cSDimitry Andricdefm : Zn4WriteResYMMPair<WriteFBlendY, [Zn4FPFMul01], 1, [1], 1>; // Floating point vector blends (YMM). 9741ac55f4cSDimitry Andricdefm : Zn4WriteResZMMPair<WriteFBlendZ, [Zn4FPFMul01], 1, [2], 1>; // Floating point vector blends (ZMM). 9751ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteFVarBlend, [Zn4FPFMul01], 1, [1], 1>; // Fp vector variable blends. 9761ac55f4cSDimitry Andricdefm : Zn4WriteResYMMPair<WriteFVarBlendY, [Zn4FPFMul01], 1, [1], 1>; // Fp vector variable blends (YMM). 9771ac55f4cSDimitry Andricdefm : Zn4WriteResZMMPair<WriteFVarBlendZ, [Zn4FPFMul01], 1, [2], 1>; // Fp vector variable blends (ZMM). 9781ac55f4cSDimitry Andric 9791ac55f4cSDimitry Andric// Horizontal Add/Sub (float and integer) 9801ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteFHAdd, [Zn4FPFAdd0], 4, [2], 3>; 9811ac55f4cSDimitry Andricdefm : Zn4WriteResYMMPair<WriteFHAddY, [Zn4FPFAdd0], 4, [2], 3, /*LoadUOps=*/1>; 9821ac55f4cSDimitry Andricdefm : Zn4WriteResZMMPair<WriteFHAddZ, [Zn4FPFAdd0], 6, [4], 3, /*LoadUOps=*/1>; 9831ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WritePHAdd, [Zn4FPVAdd0], 2, [2], 3, /*LoadUOps=*/1>; 9841ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WritePHAddX, [Zn4FPVAdd0], 2, [2], 3>; 9851ac55f4cSDimitry Andricdefm : Zn4WriteResYMMPair<WritePHAddY, [Zn4FPVAdd0], 3, [3], 3, /*LoadUOps=*/1>; 9861ac55f4cSDimitry Andricdefm : Zn4WriteResZMMPair<WritePHAddZ, [Zn4FPVAdd0], 2, [4], 3, /*LoadUOps=*/1>; 9871ac55f4cSDimitry Andric 9881ac55f4cSDimitry Andric// Vector integer operations. 9891ac55f4cSDimitry Andricdefm : Zn4WriteResXMM<WriteVecLoad, [Zn4FPLd01, Zn4Load], !add(Znver4Model.VecLoadLatency, 1), [1, 1], 1>; 9901ac55f4cSDimitry Andricdefm : Zn4WriteResXMM<WriteVecLoadX, [Zn4FPLd01, Zn4Load], !add(Znver4Model.VecLoadLatency, 1), [1, 1], 1>; 9911ac55f4cSDimitry Andricdefm : Zn4WriteResYMM<WriteVecLoadY, [Zn4FPLd01, Zn4Load], !add(Znver4Model.VecLoadLatency, 1), [1, 1], 1>; 9921ac55f4cSDimitry Andricdefm : Zn4WriteResXMM<WriteVecLoadNT, [Zn4FPLd01, Zn4Load], !add(Znver4Model.VecLoadLatency, 1), [1, 1], 1>; 9931ac55f4cSDimitry Andricdefm : Zn4WriteResYMM<WriteVecLoadNTY, [Zn4FPLd01, Zn4Load], !add(Znver4Model.VecLoadLatency, 1), [1, 1], 1>; 9941ac55f4cSDimitry Andricdefm : Zn4WriteResXMM<WriteVecMaskedLoad, [Zn4FPLd01, Zn4Load], !add(Znver4Model.VecLoadLatency, 1), [1, 1], 1>; 9951ac55f4cSDimitry Andricdefm : Zn4WriteResYMM<WriteVecMaskedLoadY, [Zn4FPLd01, Zn4Load], !add(Znver4Model.VecLoadLatency, 1), [1, 1], 1>; 9961ac55f4cSDimitry Andricdefm : Zn4WriteResXMM<WriteVecStore, [Zn4FPSt, Zn4Store], Znver4Model.StoreLatency, [1, 1], 1>; 9971ac55f4cSDimitry Andricdefm : Zn4WriteResXMM<WriteVecStoreX, [Zn4FPSt, Zn4Store], Znver4Model.StoreLatency, [1, 1], 1>; 9981ac55f4cSDimitry Andric 9991ac55f4cSDimitry Andricdef Zn4WriteVEXTRACTF128rr_VEXTRACTI128rr : SchedWriteRes<[Zn4FPFMisc0]> { 10001ac55f4cSDimitry Andric let Latency = 4; 10015f757f3fSDimitry Andric let ReleaseAtCycles = [1]; 10021ac55f4cSDimitry Andric let NumMicroOps = 1; 10031ac55f4cSDimitry Andric} 10041ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteVEXTRACTF128rr_VEXTRACTI128rr], (instrs VEXTRACTF128rr, VEXTRACTI128rr)>; 10051ac55f4cSDimitry Andric 10061ac55f4cSDimitry Andricdef Zn4WriteVEXTRACTI128mr : SchedWriteRes<[Zn4FPFMisc0, Zn4FPSt, Zn4Store]> { 10071ac55f4cSDimitry Andric let Latency = !add(Znver4Model.LoadLatency, Zn4WriteVEXTRACTF128rr_VEXTRACTI128rr.Latency); 10085f757f3fSDimitry Andric let ReleaseAtCycles = [1, 1, 1]; 10091ac55f4cSDimitry Andric let NumMicroOps = !add(Zn4WriteVEXTRACTF128rr_VEXTRACTI128rr.NumMicroOps, 1); 10101ac55f4cSDimitry Andric} 10111ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteVEXTRACTI128mr], (instrs VEXTRACTI128mr, VEXTRACTF128mr)>; 10121ac55f4cSDimitry Andric 10131ac55f4cSDimitry Andricdef Zn4WriteVINSERTF128rmr : SchedWriteRes<[Zn4AGU012, Zn4Load, Zn4FPFMisc0]> { 10141ac55f4cSDimitry Andric let Latency = !add(Znver4Model.LoadLatency, Zn4WriteVEXTRACTF128rr_VEXTRACTI128rr.Latency); 10155f757f3fSDimitry Andric let ReleaseAtCycles = [1, 1, 1]; 10161ac55f4cSDimitry Andric let NumMicroOps = !add(Zn4WriteVEXTRACTF128rr_VEXTRACTI128rr.NumMicroOps, 0); 10171ac55f4cSDimitry Andric} 10181ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteVINSERTF128rmr], (instrs VINSERTF128rm)>; 10191ac55f4cSDimitry Andric 10201ac55f4cSDimitry Andricdefm : Zn4WriteResYMM<WriteVecStoreY, [Zn4FPSt, Zn4Store], Znver4Model.StoreLatency, [1, 1], 1>; 10211ac55f4cSDimitry Andricdefm : Zn4WriteResXMM<WriteVecStoreNT, [Zn4FPSt, Zn4Store], Znver4Model.StoreLatency, [1, 1], 1>; 10221ac55f4cSDimitry Andricdefm : Zn4WriteResYMM<WriteVecStoreNTY, [Zn4FPSt, Zn4Store], Znver4Model.StoreLatency, [1, 1], 1>; 10231ac55f4cSDimitry Andricdefm : Zn4WriteResXMM<WriteVecMaskedStore32, [Zn4FPSt, Zn4Store], Znver4Model.StoreLatency, [6, 1], 18>; 10241ac55f4cSDimitry Andricdefm : Zn4WriteResXMM<WriteVecMaskedStore64, [Zn4FPSt, Zn4Store], Znver4Model.StoreLatency, [4, 1], 10>; 10251ac55f4cSDimitry Andricdefm : Zn4WriteResYMM<WriteVecMaskedStore32Y, [Zn4FPSt, Zn4Store], Znver4Model.StoreLatency, [12, 1], 42>; 10261ac55f4cSDimitry Andricdefm : Zn4WriteResYMM<WriteVecMaskedStore64Y, [Zn4FPSt, Zn4Store], Znver4Model.StoreLatency, [6, 1], 18>; 10271ac55f4cSDimitry Andric 10281ac55f4cSDimitry Andricdefm : Zn4WriteResXMM<WriteVecMoveToGpr, [Zn4FPLd01], 1, [2], 1>; 10291ac55f4cSDimitry Andricdefm : Zn4WriteResXMM<WriteVecMoveFromGpr, [Zn4FPLd01], 1, [2], 1>; 10301ac55f4cSDimitry Andric 10311ac55f4cSDimitry Andricdef Zn4WriteMOVMMX : SchedWriteRes<[Zn4FPLd01, Zn4FPFMisc0123]> { 10321ac55f4cSDimitry Andric let Latency = 1; 10335f757f3fSDimitry Andric let ReleaseAtCycles = [1, 2]; 10341ac55f4cSDimitry Andric let NumMicroOps = 2; 10351ac55f4cSDimitry Andric} 10361ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteMOVMMX], (instrs MMX_MOVQ2FR64rr, MMX_MOVQ2DQrr)>; 10371ac55f4cSDimitry Andric 10381ac55f4cSDimitry Andricdef Zn4WriteMOVMMXSlow : SchedWriteRes<[Zn4FPLd01, Zn4FPFMisc0123]> { 10391ac55f4cSDimitry Andric let Latency = 1; 10405f757f3fSDimitry Andric let ReleaseAtCycles = [1, 4]; 10411ac55f4cSDimitry Andric let NumMicroOps = 2; 10421ac55f4cSDimitry Andric} 10431ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteMOVMMXSlow], (instrs MMX_MOVD64rr, MMX_MOVD64to64rr)>; 10441ac55f4cSDimitry Andric 10451ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteVecALU, [Zn4FPVAdd0123], 1, [1], 1>; // Vector integer ALU op, no logicals. 10461ac55f4cSDimitry Andric 10471ac55f4cSDimitry Andricdef Zn4WriteEXTRQ_INSERTQ : SchedWriteRes<[Zn4FPVShuf01, Zn4FPLd01]> { 10481ac55f4cSDimitry Andric let Latency = 3; 10495f757f3fSDimitry Andric let ReleaseAtCycles = [1, 1]; 10501ac55f4cSDimitry Andric let NumMicroOps = 1; 10511ac55f4cSDimitry Andric} 10521ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteEXTRQ_INSERTQ], (instrs EXTRQ, INSERTQ)>; 10531ac55f4cSDimitry Andric 10541ac55f4cSDimitry Andricdef Zn4WriteEXTRQI_INSERTQI : SchedWriteRes<[Zn4FPVShuf01, Zn4FPLd01]> { 10551ac55f4cSDimitry Andric let Latency = 3; 10565f757f3fSDimitry Andric let ReleaseAtCycles = [1, 1]; 10571ac55f4cSDimitry Andric let NumMicroOps = 2; 10581ac55f4cSDimitry Andric} 10591ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteEXTRQI_INSERTQI], (instrs EXTRQI, INSERTQI)>; 10601ac55f4cSDimitry Andric 10611ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteVecALUX, [Zn4FPVAdd0123], 1, [1], 1>; // Vector integer ALU op, no logicals (XMM). 10621ac55f4cSDimitry Andric 10631ac55f4cSDimitry Andricdef Zn4WriteVecALUXSlow : SchedWriteRes<[Zn4FPVAdd01]> { 10641ac55f4cSDimitry Andric let Latency = 2; 10655f757f3fSDimitry Andric let ReleaseAtCycles = [2]; 10661ac55f4cSDimitry Andric let NumMicroOps = 1; 10671ac55f4cSDimitry Andric} 10681ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteVecALUXSlow], (instrs PABSBrr, PABSDrr, PABSWrr, 10691ac55f4cSDimitry Andric PADDSBrr, PADDSWrr, PADDUSBrr, PADDUSWrr, 10701ac55f4cSDimitry Andric PAVGBrr, PAVGWrr, 10711ac55f4cSDimitry Andric PSIGNBrr, PSIGNDrr, PSIGNWrr, 10721ac55f4cSDimitry Andric VPABSBrr, VPABSDrr, VPABSWrr, 10731ac55f4cSDimitry Andric VPADDSBrr, VPADDSWrr, VPADDUSBrr, VPADDUSWrr, 10741ac55f4cSDimitry Andric VPAVGBrr, VPAVGWrr, 10751ac55f4cSDimitry Andric VPCMPEQQrr, 10761ac55f4cSDimitry Andric VPSIGNBrr, VPSIGNDrr, VPSIGNWrr, 10771ac55f4cSDimitry Andric PSUBSBrr, PSUBSWrr, PSUBUSBrr, PSUBUSWrr, VPSUBSBrr, VPSUBSWrr, VPSUBUSBrr, VPSUBUSWrr)>; 10781ac55f4cSDimitry Andric 10791ac55f4cSDimitry Andricdef Zn4WriteVecOpMask : SchedWriteRes<[Zn4FPOpMask01]> { 10801ac55f4cSDimitry Andric let Latency = 1; 10815f757f3fSDimitry Andric let ReleaseAtCycles = [1]; 10821ac55f4cSDimitry Andric let NumMicroOps = 1; 10831ac55f4cSDimitry Andric} 10841ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteVecOpMask], (instrs KADDBrr, KADDDrr, KADDQrr, KADDWrr, 10851ac55f4cSDimitry Andric KANDBrr, KANDDrr, KANDQrr, KANDWrr, 10861ac55f4cSDimitry Andric KANDNBrr, KANDNDrr, KANDNQrr, KANDNWrr, 10871ac55f4cSDimitry Andric KMOVBkk, KMOVDkk, KMOVQkk, KMOVWkk, 10881ac55f4cSDimitry Andric KMOVBrk, KMOVDrk, KMOVQrk, KMOVWrk, 10891ac55f4cSDimitry Andric KNOTBrr, KNOTDrr, KNOTQrr, KNOTWrr, 10901ac55f4cSDimitry Andric KORBrr, KORDrr, KORQrr, KORWrr, 10911ac55f4cSDimitry Andric KORTESTBrr, KORTESTDrr, KORTESTQrr, KORTESTWrr, 10921ac55f4cSDimitry Andric KTESTBrr, KTESTDrr, KTESTQrr, KTESTWrr, 10931ac55f4cSDimitry Andric KUNPCKBWrr, KUNPCKDQrr, KUNPCKWDrr, 10941ac55f4cSDimitry Andric KXNORBrr, KXNORDrr, KXNORQrr, KXNORWrr, 10951ac55f4cSDimitry Andric KXORBrr, KXORDrr, KXORQrr, KXORWrr)>; 10961ac55f4cSDimitry Andric 10971ac55f4cSDimitry Andricdef Zn4WriteVecOpMaskMemMov : SchedWriteRes<[Zn4FPOpMask4]> { 10981ac55f4cSDimitry Andric let Latency = 1; 10995f757f3fSDimitry Andric let ReleaseAtCycles = [1]; 11001ac55f4cSDimitry Andric let NumMicroOps = 1; 11011ac55f4cSDimitry Andric} 11021ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteVecOpMaskMemMov], (instrs KMOVBmk, KMOVDmk, KMOVQmk, KMOVWmk)>; 11031ac55f4cSDimitry Andric 11041ac55f4cSDimitry Andricdef Zn4WriteVecOpMaskKRMov : SchedWriteRes<[Zn4FPOpMask4]> { 11051ac55f4cSDimitry Andric let Latency = 1; 11065f757f3fSDimitry Andric let ReleaseAtCycles = [1]; 11071ac55f4cSDimitry Andric let NumMicroOps = 1; 11081ac55f4cSDimitry Andric} 11091ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteVecOpMaskKRMov], (instrs KMOVBkr, KMOVDkr, KMOVQkr, KMOVWkr)>; 11101ac55f4cSDimitry Andric 11111ac55f4cSDimitry Andricdef Zn4WriteVecALU2Slow : SchedWriteRes<[Zn4FPVAdd12]> { 11121ac55f4cSDimitry Andric // TODO: All align instructions are expected to be of 4 cycle latency 11131ac55f4cSDimitry Andric let Latency = 4; 11145f757f3fSDimitry Andric let ReleaseAtCycles = [1]; 11151ac55f4cSDimitry Andric let NumMicroOps = 1; 11161ac55f4cSDimitry Andric} 11171ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteVecALU2Slow], (instrs VALIGNDZrri, VALIGNDZ128rri, VALIGNDZ256rri, 11181ac55f4cSDimitry Andric VALIGNQZrri, VALIGNQZ128rri, VALIGNQZ256rri) 11191ac55f4cSDimitry Andric >; 11201ac55f4cSDimitry Andricdefm : Zn4WriteResYMMPair<WriteVecALUY, [Zn4FPVAdd0123], 1, [1], 1>; // Vector integer ALU op, no logicals (YMM). 11211ac55f4cSDimitry Andric 11221ac55f4cSDimitry Andricdef Zn4WriteVecALUYSlow : SchedWriteRes<[Zn4FPVAdd01]> { 11231ac55f4cSDimitry Andric let Latency = 1; 11245f757f3fSDimitry Andric let ReleaseAtCycles = [1]; 11251ac55f4cSDimitry Andric let NumMicroOps = 1; 11261ac55f4cSDimitry Andric} 11271ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteVecALUYSlow], (instrs VPABSBYrr, VPABSDYrr, VPABSWYrr, 11281ac55f4cSDimitry Andric VPADDSBYrr, VPADDSWYrr, VPADDUSBYrr, VPADDUSWYrr, 11291ac55f4cSDimitry Andric VPSUBSBYrr, VPSUBSWYrr, VPSUBUSBYrr, VPSUBUSWYrr, 11301ac55f4cSDimitry Andric VPAVGBYrr, VPAVGWYrr, 11311ac55f4cSDimitry Andric VPCMPEQQYrr, 11321ac55f4cSDimitry Andric VPSIGNBYrr, VPSIGNDYrr, VPSIGNWYrr)>; 11331ac55f4cSDimitry Andric 11341ac55f4cSDimitry Andricdefm : Zn4WriteResZMMPair<WriteVecALUZ, [Zn4FPVAdd0123], 1, [2], 1>; // Vector integer ALU op, no logicals (ZMM). 11351ac55f4cSDimitry Andric 11361ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteVecLogic, [Zn4FPVMisc0123], 1, [1], 1>; // Vector integer and/or/xor logicals. 11371ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteVecLogicX, [Zn4FPVMisc0123], 1, [1], 1>; // Vector integer and/or/xor logicals (XMM). 11381ac55f4cSDimitry Andricdefm : Zn4WriteResYMMPair<WriteVecLogicY, [Zn4FPVMisc0123], 1, [1], 1>; // Vector integer and/or/xor logicals (YMM). 11391ac55f4cSDimitry Andricdefm : Zn4WriteResZMMPair<WriteVecLogicZ, [Zn4FPVMisc0123], 1, [2], 1>; // Vector integer and/or/xor logicals (ZMM). 11401ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteVecTest, [Zn4FPVAdd12, Zn4FPSt], 1, [1, 1], 2>; // FIXME: latency not from llvm-exegesis // Vector integer TEST instructions. 11411ac55f4cSDimitry Andricdefm : Zn4WriteResYMMPair<WriteVecTestY, [Zn4FPVAdd12, Zn4FPSt], 1, [1, 1], 2>; // FIXME: latency not from llvm-exegesis // Vector integer TEST instructions (YMM). 11421ac55f4cSDimitry Andricdefm : Zn4WriteResZMMPair<WriteVecTestZ, [Zn4FPVAdd12, Zn4FPSt], 1, [2, 2], 2>; // FIXME: latency not from llvm-exegesis // Vector integer TEST instructions (ZMM). 11431ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteVecShift, [Zn4FPVShift01], 1, [1], 1>; // Vector integer shifts (default). 11441ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteVecShiftX, [Zn4FPVShift01], 2, [2], 1>; // Vector integer shifts (XMM). 11451ac55f4cSDimitry Andricdefm : Zn4WriteResYMMPair<WriteVecShiftY, [Zn4FPVShift01], 1, [1], 1>; // Vector integer shifts (YMM). 11461ac55f4cSDimitry Andricdefm : Zn4WriteResZMMPair<WriteVecShiftZ, [Zn4FPVShift01], 1, [2], 1>; // Vector integer shifts (ZMM). 11471ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteVecShiftImm, [Zn4FPVShift01], 1, [1], 1>; // Vector integer immediate shifts (default). 11481ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteVecShiftImmX, [Zn4FPVShift01], 1, [1], 1>; // Vector integer immediate shifts (XMM). 11491ac55f4cSDimitry Andricdefm : Zn4WriteResYMMPair<WriteVecShiftImmY, [Zn4FPVShift01], 1, [1], 1>; // Vector integer immediate shifts (YMM). 11501ac55f4cSDimitry Andricdefm : Zn4WriteResZMMPair<WriteVecShiftImmZ, [Zn4FPVShift01], 1, [2], 1>; // Vector integer immediate shifts (ZMM). 11511ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteVecIMul, [Zn4FPVMul01], 3, [1], 1>; // Vector integer multiply (default). 11521ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteVecIMulX, [Zn4FPVMul01], 3, [1], 1>; // Vector integer multiply (XMM). 11531ac55f4cSDimitry Andricdefm : Zn4WriteResYMMPair<WriteVecIMulY, [Zn4FPVMul01], 3, [1], 1>; // Vector integer multiply (YMM). 11541ac55f4cSDimitry Andricdefm : Zn4WriteResZMMPair<WriteVecIMulZ, [Zn4FPVMul01], 3, [2], 1>; // Vector integer multiply (ZMM). 11551ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WritePMULLD, [Zn4FPVMul01], 3, [1], 1>; // Vector PMULLD. 11561ac55f4cSDimitry Andricdefm : Zn4WriteResYMMPair<WritePMULLDY, [Zn4FPVMul01], 3, [1], 1>; // Vector PMULLD (YMM). 11571ac55f4cSDimitry Andricdefm : Zn4WriteResZMMPair<WritePMULLDZ, [Zn4FPVMul01], 3, [2], 1>; // Vector PMULLD (ZMM). 11581ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteShuffle, [Zn4FPVShuf01], 1, [1], 1>; // Vector shuffles. 11591ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteShuffleX, [Zn4FPVShuf01], 1, [1], 1>; // Vector shuffles (XMM). 11601ac55f4cSDimitry Andricdefm : Zn4WriteResYMMPair<WriteShuffleY, [Zn4FPVShuf01], 1, [1], 1>; // Vector shuffles (YMM). 11611ac55f4cSDimitry Andricdefm : Zn4WriteResZMMPair<WriteShuffleZ, [Zn4FPVShuf01], 1, [2], 1>; // Vector shuffles (ZMM). 11621ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteVarShuffle, [Zn4FPVShuf01], 1, [1], 1>; // Vector variable shuffles. 11631ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteVarShuffleX, [Zn4FPVShuf01], 1, [1], 1>; // Vector variable shuffles (XMM). 11641ac55f4cSDimitry Andricdefm : Zn4WriteResYMMPair<WriteVarShuffleY, [Zn4FPVShuf01], 1, [1], 1>; // Vector variable shuffles (YMM). 11651ac55f4cSDimitry Andricdefm : Zn4WriteResZMMPair<WriteVarShuffleZ, [Zn4FPVShuf01], 1, [2], 1>; // Vector variable shuffles (ZMM). 11661ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteBlend, [Zn4FPVMisc0123], 1, [1], 1>; // Vector blends. 11671ac55f4cSDimitry Andricdefm : Zn4WriteResYMMPair<WriteBlendY, [Zn4FPVMisc0123], 1, [1], 1>; // Vector blends (YMM). 11681ac55f4cSDimitry Andricdefm : Zn4WriteResZMMPair<WriteBlendZ, [Zn4FPVMisc0123], 1, [2], 1>; // Vector blends (ZMM). 11691ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteVarBlend, [Zn4FPVMul01], 1, [1], 1>; // Vector variable blends. 11701ac55f4cSDimitry Andricdefm : Zn4WriteResYMMPair<WriteVarBlendY, [Zn4FPVMul01], 1, [1], 1>; // Vector variable blends (YMM). 11711ac55f4cSDimitry Andricdefm : Zn4WriteResZMMPair<WriteVarBlendZ, [Zn4FPVMul01], 1, [2], 1>; // Vector variable blends (ZMM). 11721ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WritePSADBW, [Zn4FPVAdd0123], 3, [2], 1>; // Vector PSADBW. 11731ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WritePSADBWX, [Zn4FPVAdd0123], 3, [2], 1>; // Vector PSADBW (XMM). 11741ac55f4cSDimitry Andricdefm : Zn4WriteResYMMPair<WritePSADBWY, [Zn4FPVAdd0123], 3, [2], 1>; // Vector PSADBW (YMM). 11751ac55f4cSDimitry Andricdefm : Zn4WriteResZMMPair<WritePSADBWZ, [Zn4FPVAdd0123], 4, [4], 1>; // Vector PSADBW (ZMM). 11761ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteMPSAD, [Zn4FPVAdd0123], 4, [8], 4, /*LoadUOps=*/2>; // Vector MPSAD. 11771ac55f4cSDimitry Andricdefm : Zn4WriteResYMMPair<WriteMPSADY, [Zn4FPVAdd0123], 4, [8], 3, /*LoadUOps=*/1>; // Vector MPSAD (YMM). 11781ac55f4cSDimitry Andricdefm : Zn4WriteResZMMPair<WriteMPSADZ, [Zn4FPVAdd0123], 4, [16], 3, /*LoadUOps=*/1>; // Vector MPSAD (ZMM). 11791ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WritePHMINPOS, [Zn4FPVAdd01], 3, [1], 1>; // Vector PHMINPOS. 11801ac55f4cSDimitry Andric 11811ac55f4cSDimitry Andric// Vector insert/extract operations. 11821ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteVecInsert, [Zn4FPLd01], 1, [2], 2, /*LoadUOps=*/-1>; // Insert gpr to vector element. 11831ac55f4cSDimitry Andricdefm : Zn4WriteResXMM<WriteVecExtract, [Zn4FPLd01], 1, [2], 2>; // Extract vector element to gpr. 11841ac55f4cSDimitry Andricdefm : Zn4WriteResXMM<WriteVecExtractSt, [Zn4FPSt, Zn4Store], !add(1, Znver4Model.StoreLatency), [1, 1], 2>; // Extract vector element and store. 11851ac55f4cSDimitry Andric 11861ac55f4cSDimitry Andric// MOVMSK operations. 11871ac55f4cSDimitry Andricdefm : Zn4WriteResXMM<WriteFMOVMSK, [Zn4FPVMisc2], 1, [1], 1>; 11881ac55f4cSDimitry Andricdefm : Zn4WriteResXMM<WriteVecMOVMSK, [Zn4FPVMisc2], 1, [1], 1>; 11891ac55f4cSDimitry Andricdefm : Zn4WriteResYMM<WriteVecMOVMSKY, [Zn4FPVMisc2], 1, [1], 1>; 11901ac55f4cSDimitry Andricdefm : Zn4WriteResXMM<WriteMMXMOVMSK, [Zn4FPVMisc2], 1, [1], 1>; 11911ac55f4cSDimitry Andric 11921ac55f4cSDimitry Andric// Conversion between integer and float. 11931ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteCvtSD2I, [Zn4FPFCvt01], 1, [1], 1>; // Double -> Integer. 11941ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteCvtPD2I, [Zn4FPFCvt01], 3, [2], 1>; // Double -> Integer (XMM). 11951ac55f4cSDimitry Andricdefm : Zn4WriteResYMMPair<WriteCvtPD2IY, [Zn4FPFCvt01], 3, [2], 2>; // Double -> Integer (YMM). 11961ac55f4cSDimitry Andricdefm : Zn4WriteResZMMPair<WriteCvtPD2IZ, [Zn4FPFCvt01], 3, [4], 2>; // Double -> Integer (ZMM). 11971ac55f4cSDimitry Andric 11981ac55f4cSDimitry Andricdef Zn4WriteCvtPD2IMMX : SchedWriteRes<[Zn4FPFCvt01]> { 11991ac55f4cSDimitry Andric let Latency = 1; 12005f757f3fSDimitry Andric let ReleaseAtCycles = [2]; 12011ac55f4cSDimitry Andric let NumMicroOps = 2; 12021ac55f4cSDimitry Andric} 12031ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteCvtSS2I, [Zn4FPFCvt01], 5, [5], 2>; // Float -> Integer. 12041ac55f4cSDimitry Andric 12051ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteCvtPS2I, [Zn4FPFCvt01], 3, [1], 1>; // Float -> Integer (XMM). 12061ac55f4cSDimitry Andricdefm : Zn4WriteResYMMPair<WriteCvtPS2IY, [Zn4FPFCvt01], 4, [1], 1>; // Float -> Integer (YMM). 12071ac55f4cSDimitry Andricdefm : Zn4WriteResZMMPair<WriteCvtPS2IZ, [Zn4FPFCvt01], 4, [2], 2>; // Float -> Integer (ZMM). 12081ac55f4cSDimitry Andric 12091ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteCvtI2SD, [Zn4FPFCvt01], 4, [2], 2, /*LoadUOps=*/-1>; // Integer -> Double. 12101ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteCvtI2PD, [Zn4FPFCvt01], 3, [1], 1>; // Integer -> Double (XMM). 12111ac55f4cSDimitry Andricdefm : Zn4WriteResYMMPair<WriteCvtI2PDY, [Zn4FPFCvt01], 3, [2], 2, /*LoadUOps=*/-1>; // Integer -> Double (YMM). 12121ac55f4cSDimitry Andricdefm : Zn4WriteResZMMPair<WriteCvtI2PDZ, [Zn4FPFCvt01], 4, [4], 4, /*LoadUOps=*/-1>; // Integer -> Double (ZMM). 12131ac55f4cSDimitry Andric 12141ac55f4cSDimitry Andricdef Zn4WriteCvtI2PDMMX : SchedWriteRes<[Zn4FPFCvt01]> { 12151ac55f4cSDimitry Andric let Latency = 2; 12165f757f3fSDimitry Andric let ReleaseAtCycles = [6]; 12171ac55f4cSDimitry Andric let NumMicroOps = 2; 12181ac55f4cSDimitry Andric} 12191ac55f4cSDimitry Andric 12201ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteCvtI2SS, [Zn4FPFCvt01], 3, [2], 2, /*LoadUOps=*/-1>; // Integer -> Float. 12211ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteCvtI2PS, [Zn4FPFCvt01], 3, [1], 1>; // Integer -> Float (XMM). 12221ac55f4cSDimitry Andricdefm : Zn4WriteResYMMPair<WriteCvtI2PSY, [Zn4FPFCvt01], 3, [1], 1>; // Integer -> Float (YMM). 12231ac55f4cSDimitry Andricdefm : Zn4WriteResZMMPair<WriteCvtI2PSZ, [Zn4FPFCvt01], 3, [2], 2>; // Integer -> Float (ZMM). 12241ac55f4cSDimitry Andric 12251ac55f4cSDimitry Andricdef Zn4WriteCvtI2PSMMX : SchedWriteRes<[Zn4FPFCvt01]> { 12261ac55f4cSDimitry Andric let Latency = 3; 12275f757f3fSDimitry Andric let ReleaseAtCycles = [1]; 12281ac55f4cSDimitry Andric let NumMicroOps = 2; 12291ac55f4cSDimitry Andric} 12301ac55f4cSDimitry Andric 12311ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteCvtSS2SD, [Zn4FPFCvt01], 3, [1], 1>; // Float -> Double size conversion. 12321ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteCvtPS2PD, [Zn4FPFCvt01], 3, [1], 1>; // Float -> Double size conversion (XMM). 12331ac55f4cSDimitry Andricdefm : Zn4WriteResYMMPair<WriteCvtPS2PDY, [Zn4FPFCvt01], 4, [2], 2, /*LoadUOps=*/-1>; // Float -> Double size conversion (YMM). 12341ac55f4cSDimitry Andricdefm : Zn4WriteResZMMPair<WriteCvtPS2PDZ, [Zn4FPFCvt01], 6, [4], 4, /*LoadUOps=*/-1>; // Float -> Double size conversion (ZMM). 12351ac55f4cSDimitry Andric 12361ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteCvtSD2SS, [Zn4FPFCvt01], 3, [1], 1>; // Double -> Float size conversion. 12371ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteCvtPD2PS, [Zn4FPFCvt01], 3, [1], 1>; // Double -> Float size conversion (XMM). 12381ac55f4cSDimitry Andricdefm : Zn4WriteResYMMPair<WriteCvtPD2PSY, [Zn4FPFCvt01], 6, [2], 2>; // Double -> Float size conversion (YMM). 12391ac55f4cSDimitry Andricdefm : Zn4WriteResZMMPair<WriteCvtPD2PSZ, [Zn4FPFCvt01], 6, [4], 4>; // Double -> Float size conversion (ZMM). 12401ac55f4cSDimitry Andric 12411ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteCvtPH2PS, [Zn4FPFCvt01], 3, [1], 1>; // Half -> Float size conversion. 12421ac55f4cSDimitry Andricdefm : Zn4WriteResYMMPair<WriteCvtPH2PSY, [Zn4FPFCvt01], 4, [2], 2, /*LoadUOps=*/-1>; // Half -> Float size conversion (YMM). 12431ac55f4cSDimitry Andricdefm : Zn4WriteResZMMPair<WriteCvtPH2PSZ, [Zn4FPFCvt01], 4, [4], 4, /*LoadUOps=*/-1>; // Half -> Float size conversion (ZMM). 12441ac55f4cSDimitry Andric 12451ac55f4cSDimitry Andricdefm : Zn4WriteResXMM<WriteCvtPS2PH, [Zn4FPFCvt01], 3, [2], 1>; // Float -> Half size conversion. 12461ac55f4cSDimitry Andricdefm : Zn4WriteResYMM<WriteCvtPS2PHY, [Zn4FPFCvt01], 6, [2], 2>; // Float -> Half size conversion (YMM). 12471ac55f4cSDimitry Andricdefm : Zn4WriteResZMM<WriteCvtPS2PHZ, [Zn4FPFCvt01], 6, [2], 2>; // Float -> Half size conversion (ZMM). 12481ac55f4cSDimitry Andric 12491ac55f4cSDimitry Andricdefm : Zn4WriteResXMM<WriteCvtPS2PHSt, [Zn4FPFCvt01, Zn4FPSt, Zn4Store], !add(3, Znver4Model.StoreLatency), [1, 1, 1], 2>; // Float -> Half + store size conversion. 12501ac55f4cSDimitry Andricdefm : Zn4WriteResYMM<WriteCvtPS2PHYSt, [Zn4FPFCvt01, Zn4FPSt, Zn4Store], !add(6, Znver4Model.StoreLatency), [2, 1, 1], 3>; // Float -> Half + store size conversion (YMM). 12511ac55f4cSDimitry Andricdefm : Zn4WriteResYMM<WriteCvtPS2PHZSt, [Zn4FPFCvt01, Zn4FPSt, Zn4Store], !add(6, Znver4Model.StoreLatency), [2, 1, 1], 3>; // Float -> Half + store size conversion (ZMM). 12521ac55f4cSDimitry Andric 12531ac55f4cSDimitry Andric// CRC32 instruction. 12541ac55f4cSDimitry Andricdefm : Zn4WriteResIntPair<WriteCRC32, [Zn4ALU1], 3, [1], 1>; 12551ac55f4cSDimitry Andric 12561ac55f4cSDimitry Andricdef Zn4WriteSHA1MSG1rr : SchedWriteRes<[Zn4FPU0123]> { 12571ac55f4cSDimitry Andric let Latency = 2; 12585f757f3fSDimitry Andric let ReleaseAtCycles = [2]; 12591ac55f4cSDimitry Andric let NumMicroOps = 2; 12601ac55f4cSDimitry Andric} 12611ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteSHA1MSG1rr], (instrs SHA1MSG1rr)>; 12621ac55f4cSDimitry Andric 12631ac55f4cSDimitry Andricdef Zn4WriteSHA1MSG1rm : SchedWriteRes<[Zn4AGU012, Zn4Load, Zn4FPU0123]> { 12641ac55f4cSDimitry Andric let Latency = !add(Znver4Model.LoadLatency, Zn4WriteSHA1MSG1rr.Latency); 12655f757f3fSDimitry Andric let ReleaseAtCycles = [1, 1, 2]; 12661ac55f4cSDimitry Andric let NumMicroOps = !add(Zn4WriteSHA1MSG1rr.NumMicroOps, 0); 12671ac55f4cSDimitry Andric} 12681ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteSHA1MSG1rm], (instrs SHA1MSG1rm)>; 12691ac55f4cSDimitry Andric 12701ac55f4cSDimitry Andricdef Zn4WriteSHA1MSG2rr_SHA1NEXTErr : SchedWriteRes<[Zn4FPU0123]> { 12711ac55f4cSDimitry Andric let Latency = 1; 12725f757f3fSDimitry Andric let ReleaseAtCycles = [2]; 12731ac55f4cSDimitry Andric let NumMicroOps = 1; 12741ac55f4cSDimitry Andric} 12751ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteSHA1MSG2rr_SHA1NEXTErr], (instrs SHA1MSG2rr, SHA1NEXTErr)>; 12761ac55f4cSDimitry Andric 12771ac55f4cSDimitry Andricdef Zn4Writerm_SHA1MSG2rm_SHA1NEXTErm : SchedWriteRes<[Zn4AGU012, Zn4Load, Zn4FPU0123]> { 12781ac55f4cSDimitry Andric let Latency = !add(Znver4Model.LoadLatency, Zn4WriteSHA1MSG2rr_SHA1NEXTErr.Latency); 12795f757f3fSDimitry Andric let ReleaseAtCycles = [1, 1, 2]; 12801ac55f4cSDimitry Andric let NumMicroOps = !add(Zn4WriteSHA1MSG2rr_SHA1NEXTErr.NumMicroOps, 0); 12811ac55f4cSDimitry Andric} 12821ac55f4cSDimitry Andricdef : InstRW<[Zn4Writerm_SHA1MSG2rm_SHA1NEXTErm], (instrs SHA1MSG2rm, SHA1NEXTErm)>; 12831ac55f4cSDimitry Andric 12841ac55f4cSDimitry Andricdef Zn4WriteSHA256MSG1rr : SchedWriteRes<[Zn4FPU0123]> { 12851ac55f4cSDimitry Andric let Latency = 2; 12865f757f3fSDimitry Andric let ReleaseAtCycles = [3]; 12871ac55f4cSDimitry Andric let NumMicroOps = 2; 12881ac55f4cSDimitry Andric} 12891ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteSHA256MSG1rr], (instrs SHA256MSG1rr)>; 12901ac55f4cSDimitry Andric 12911ac55f4cSDimitry Andricdef Zn4Writerm_SHA256MSG1rm : SchedWriteRes<[Zn4AGU012, Zn4Load, Zn4FPU0123]> { 12921ac55f4cSDimitry Andric let Latency = !add(Znver4Model.LoadLatency, Zn4WriteSHA256MSG1rr.Latency); 12935f757f3fSDimitry Andric let ReleaseAtCycles = [1, 1, 3]; 12941ac55f4cSDimitry Andric let NumMicroOps = !add(Zn4WriteSHA256MSG1rr.NumMicroOps, 0); 12951ac55f4cSDimitry Andric} 12961ac55f4cSDimitry Andricdef : InstRW<[Zn4Writerm_SHA256MSG1rm], (instrs SHA256MSG1rm)>; 12971ac55f4cSDimitry Andric 12981ac55f4cSDimitry Andricdef Zn4WriteSHA256MSG2rr : SchedWriteRes<[Zn4FPU0123]> { 12991ac55f4cSDimitry Andric let Latency = 3; 13005f757f3fSDimitry Andric let ReleaseAtCycles = [8]; 13011ac55f4cSDimitry Andric let NumMicroOps = 4; 13021ac55f4cSDimitry Andric} 13031ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteSHA256MSG2rr], (instrs SHA256MSG2rr)>; 13041ac55f4cSDimitry Andric 13051ac55f4cSDimitry Andricdef Zn4WriteSHA256MSG2rm : SchedWriteRes<[Zn4AGU012, Zn4Load, Zn4FPU0123]> { 13061ac55f4cSDimitry Andric let Latency = !add(Znver4Model.LoadLatency, Zn4WriteSHA256MSG2rr.Latency); 13075f757f3fSDimitry Andric let ReleaseAtCycles = [1, 1, 8]; 13081ac55f4cSDimitry Andric let NumMicroOps = !add(Zn4WriteSHA256MSG2rr.NumMicroOps, 1); 13091ac55f4cSDimitry Andric} 13101ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteSHA256MSG2rm], (instrs SHA256MSG2rm)>; 13111ac55f4cSDimitry Andric 13121ac55f4cSDimitry Andricdef Zn4WriteSHA1RNDS4rri : SchedWriteRes<[Zn4FPU0123]> { 13131ac55f4cSDimitry Andric let Latency = 6; 13145f757f3fSDimitry Andric let ReleaseAtCycles = [8]; 13151ac55f4cSDimitry Andric let NumMicroOps = 1; 13161ac55f4cSDimitry Andric} 13171ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteSHA1RNDS4rri], (instrs SHA1RNDS4rri)>; 13181ac55f4cSDimitry Andric 13191ac55f4cSDimitry Andricdef Zn4WriteSHA256RNDS2rr : SchedWriteRes<[Zn4FPU0123]> { 13201ac55f4cSDimitry Andric let Latency = 4; 13215f757f3fSDimitry Andric let ReleaseAtCycles = [8]; 13221ac55f4cSDimitry Andric let NumMicroOps = 1; 13231ac55f4cSDimitry Andric} 13241ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteSHA256RNDS2rr], (instrs SHA256RNDS2rr)>; 13251ac55f4cSDimitry Andric 13261ac55f4cSDimitry Andric// Strings instructions. 13271ac55f4cSDimitry Andric// Packed Compare Implicit Length Strings, Return Mask 13281ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WritePCmpIStrM, [Zn4FPVAdd0123], 6, [8], 3, /*LoadUOps=*/1>; 13291ac55f4cSDimitry Andric// Packed Compare Explicit Length Strings, Return Mask 13301ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WritePCmpEStrM, [Zn4FPVAdd0123], 6, [12], 7, /*LoadUOps=*/5>; 13311ac55f4cSDimitry Andric// Packed Compare Implicit Length Strings, Return Index 13321ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WritePCmpIStrI, [Zn4FPVAdd0123], 2, [8], 4>; 13331ac55f4cSDimitry Andric// Packed Compare Explicit Length Strings, Return Index 13341ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WritePCmpEStrI, [Zn4FPVAdd0123], 6, [12], 8, /*LoadUOps=*/4>; 13351ac55f4cSDimitry Andric 13361ac55f4cSDimitry Andric// AES instructions. 13371ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteAESDecEnc, [Zn4FPAES01], 4, [1], 1>; // Decryption, encryption. 13381ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteAESIMC, [Zn4FPAES01], 4, [1], 1>; // InvMixColumn. 13391ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteAESKeyGen, [Zn4FPAES01], 4, [1], 1>; // Key Generation. 13401ac55f4cSDimitry Andric 13411ac55f4cSDimitry Andric// Carry-less multiplication instructions. 13421ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteCLMul, [Zn4FPCLM01], 4, [4], 4>; 13431ac55f4cSDimitry Andric 13441ac55f4cSDimitry Andric// EMMS/FEMMS 13451ac55f4cSDimitry Andricdefm : Zn4WriteResInt<WriteEMMS, [Zn4ALU0123], 2, [1], 1>; // FIXME: latency not from llvm-exegesis 13461ac55f4cSDimitry Andric 13471ac55f4cSDimitry Andric// Load/store MXCSR 13481ac55f4cSDimitry Andricdefm : Zn4WriteResInt<WriteLDMXCSR, [Zn4AGU012, Zn4Load, Zn4ALU0123], !add(Znver4Model.LoadLatency, 1), [1, 1, 6], 1>; // FIXME: latency not from llvm-exegesis 13491ac55f4cSDimitry Andricdefm : Zn4WriteResInt<WriteSTMXCSR, [Zn4ALU0123, Zn4AGU012, Zn4Store], !add(1, Znver4Model.StoreLatency), [60, 1, 1], 2>; // FIXME: latency not from llvm-exegesis 13501ac55f4cSDimitry Andric 13511ac55f4cSDimitry Andric// Catch-all for expensive system instructions. 13521ac55f4cSDimitry Andricdefm : Zn4WriteResInt<WriteSystem, [Zn4ALU0123], 100, [100], 100>; 13531ac55f4cSDimitry Andric 13541ac55f4cSDimitry Andricdef Zn4WriteVZEROUPPER : SchedWriteRes<[Zn4FPU0123]> { 13551ac55f4cSDimitry Andric let Latency = 0; // FIXME: not from llvm-exegesis 13565f757f3fSDimitry Andric let ReleaseAtCycles = [1]; 13571ac55f4cSDimitry Andric let NumMicroOps = 1; 13581ac55f4cSDimitry Andric} 13591ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteVZEROUPPER], (instrs VZEROUPPER)>; 13601ac55f4cSDimitry Andric 13611ac55f4cSDimitry Andricdef Zn4WriteVZEROALL : SchedWriteRes<[Zn4FPU0123]> { 13621ac55f4cSDimitry Andric let Latency = 10; // FIXME: not from llvm-exegesis 13635f757f3fSDimitry Andric let ReleaseAtCycles = [24]; 13641ac55f4cSDimitry Andric let NumMicroOps = 18; 13651ac55f4cSDimitry Andric} 13661ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteVZEROALL], (instrs VZEROALL)>; 13671ac55f4cSDimitry Andric 13681ac55f4cSDimitry Andric// AVX2. 13691ac55f4cSDimitry Andricdefm : Zn4WriteResYMMPair<WriteFShuffle256, [Zn4FPVShuf], 2, [1], 1, /*LoadUOps=*/2>; // Fp 256-bit width vector shuffles. 13701ac55f4cSDimitry Andricdefm : Zn4WriteResYMMPair<WriteFVarShuffle256, [Zn4FPVShuf], 7, [1], 2, /*LoadUOps=*/1>; // Fp 256-bit width variable shuffles. 13711ac55f4cSDimitry Andricdefm : Zn4WriteResYMMPair<WriteShuffle256, [Zn4FPVShuf], 1, [1], 1>; // 256-bit width vector shuffles. 13721ac55f4cSDimitry Andric 13731ac55f4cSDimitry Andricdef Zn4WriteVPERM2I128rr_VPERM2F128rr : SchedWriteRes<[Zn4FPVShuf]> { 13741ac55f4cSDimitry Andric let Latency = 3; 13755f757f3fSDimitry Andric let ReleaseAtCycles = [1]; 13761ac55f4cSDimitry Andric let NumMicroOps = 1; 13771ac55f4cSDimitry Andric} 13781ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteVPERM2I128rr_VPERM2F128rr], (instrs VPERM2I128rr, VPERM2F128rr)>; 13791ac55f4cSDimitry Andric 13801ac55f4cSDimitry Andricdef Zn4WriteVPERM2F128rm : SchedWriteRes<[Zn4AGU012, Zn4Load, Zn4FPVShuf]> { 13811ac55f4cSDimitry Andric let Latency = !add(Znver4Model.LoadLatency, Zn4WriteVPERM2I128rr_VPERM2F128rr.Latency); 13825f757f3fSDimitry Andric let ReleaseAtCycles = [1, 1, 1]; 13831ac55f4cSDimitry Andric let NumMicroOps = !add(Zn4WriteVPERM2I128rr_VPERM2F128rr.NumMicroOps, 0); 13841ac55f4cSDimitry Andric} 13851ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteVPERM2F128rm], (instrs VPERM2F128rm)>; 13861ac55f4cSDimitry Andric 13871ac55f4cSDimitry Andricdef Zn4WriteVPERMPSYrr : SchedWriteRes<[Zn4FPVShuf]> { 13881ac55f4cSDimitry Andric let Latency = 7; 13895f757f3fSDimitry Andric let ReleaseAtCycles = [1]; 13901ac55f4cSDimitry Andric let NumMicroOps = 2; 13911ac55f4cSDimitry Andric} 13921ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteVPERMPSYrr], (instrs VPERMPSYrr)>; 13931ac55f4cSDimitry Andric 13941ac55f4cSDimitry Andricdef Zn4WriteVPERMPSYrm : SchedWriteRes<[Zn4AGU012, Zn4Load, Zn4FPVShuf]> { 13951ac55f4cSDimitry Andric let Latency = !add(Znver4Model.LoadLatency, Zn4WriteVPERMPSYrr.Latency); 13965f757f3fSDimitry Andric let ReleaseAtCycles = [1, 1, 2]; 13971ac55f4cSDimitry Andric let NumMicroOps = !add(Zn4WriteVPERMPSYrr.NumMicroOps, 1); 13981ac55f4cSDimitry Andric} 13991ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteVPERMPSYrm], (instrs VPERMPSYrm)>; 14001ac55f4cSDimitry Andric 14011ac55f4cSDimitry Andricdef Zn4WriteVPERMYri : SchedWriteRes<[Zn4FPVShuf]> { 14021ac55f4cSDimitry Andric let Latency = 6; 14035f757f3fSDimitry Andric let ReleaseAtCycles = [1]; 14041ac55f4cSDimitry Andric let NumMicroOps = 2; 14051ac55f4cSDimitry Andric} 14061ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteVPERMYri], (instrs VPERMPDYri, VPERMQYri)>; 14071ac55f4cSDimitry Andric 14081ac55f4cSDimitry Andricdef Zn4WriteVPERMPDYmi : SchedWriteRes<[Zn4AGU012, Zn4Load, Zn4FPVShuf]> { 14091ac55f4cSDimitry Andric let Latency = !add(Znver4Model.LoadLatency, Zn4WriteVPERMYri.Latency); 14105f757f3fSDimitry Andric let ReleaseAtCycles = [1, 1, 2]; 14111ac55f4cSDimitry Andric let NumMicroOps = !add(Zn4WriteVPERMYri.NumMicroOps, 1); 14121ac55f4cSDimitry Andric} 14131ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteVPERMPDYmi], (instrs VPERMPDYmi)>; 14141ac55f4cSDimitry Andric 14151ac55f4cSDimitry Andricdef Zn4WriteVPERMDYrr : SchedWriteRes<[Zn4FPVShuf]> { 14161ac55f4cSDimitry Andric let Latency = 5; 14175f757f3fSDimitry Andric let ReleaseAtCycles = [1]; 14181ac55f4cSDimitry Andric let NumMicroOps = 2; 14191ac55f4cSDimitry Andric} 14201ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteVPERMDYrr], (instrs VPERMDYrr)>; 14211ac55f4cSDimitry Andric 14221ac55f4cSDimitry Andricdef Zn4WriteVPERMYm : SchedWriteRes<[Zn4AGU012, Zn4Load, Zn4FPVShuf]> { 14231ac55f4cSDimitry Andric let Latency = !add(Znver4Model.LoadLatency, Zn4WriteVPERMDYrr.Latency); 14245f757f3fSDimitry Andric let ReleaseAtCycles = [1, 1, 2]; 14251ac55f4cSDimitry Andric let NumMicroOps = !add(Zn4WriteVPERMDYrr.NumMicroOps, 0); 14261ac55f4cSDimitry Andric} 14271ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteVPERMYm], (instrs VPERMQYmi, VPERMDYrm)>; 14281ac55f4cSDimitry Andric 14291ac55f4cSDimitry Andricdefm : Zn4WriteResYMMPair<WriteVPMOV256, [Zn4FPVShuf01], 4, [3], 2, /*LoadUOps=*/-1>; // 256-bit width packed vector width-changing move. 14301ac55f4cSDimitry Andricdefm : Zn4WriteResYMMPair<WriteVarShuffle256, [Zn4FPVShuf01], 1, [1], 2>; // 256-bit width vector variable shuffles. 14311ac55f4cSDimitry Andricdefm : Zn4WriteResXMMPair<WriteVarVecShift, [Zn4FPVShift01], 1, [1], 1>; // Variable vector shifts. 14321ac55f4cSDimitry Andricdefm : Zn4WriteResYMMPair<WriteVarVecShiftY, [Zn4FPVShift01], 1, [1], 1>; // Variable vector shifts (YMM). 14331ac55f4cSDimitry Andricdefm : Zn4WriteResZMMPair<WriteVarVecShiftZ, [Zn4FPVShift01], 1, [2], 2>; // Variable vector shifts (ZMM). 14341ac55f4cSDimitry Andric 14351ac55f4cSDimitry Andric// Old microcoded instructions that nobody use. 14361ac55f4cSDimitry Andricdefm : Zn4WriteResInt<WriteMicrocoded, [Zn4ALU0123], 100, [100], 100>; 14371ac55f4cSDimitry Andric 14381ac55f4cSDimitry Andric// Fence instructions. 14391ac55f4cSDimitry Andricdefm : Zn4WriteResInt<WriteFence, [Zn4ALU0123], 1, [100], 1>; 14401ac55f4cSDimitry Andric 14411ac55f4cSDimitry Andricdef Zn4WriteLFENCE : SchedWriteRes<[Zn4LSU]> { 14421ac55f4cSDimitry Andric let Latency = 1; 14435f757f3fSDimitry Andric let ReleaseAtCycles = [30]; 14441ac55f4cSDimitry Andric let NumMicroOps = 1; 14451ac55f4cSDimitry Andric} 14461ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteLFENCE], (instrs LFENCE)>; 14471ac55f4cSDimitry Andric 14481ac55f4cSDimitry Andricdef Zn4WriteSFENCE : SchedWriteRes<[Zn4LSU]> { 14491ac55f4cSDimitry Andric let Latency = 1; 14505f757f3fSDimitry Andric let ReleaseAtCycles = [1]; 14511ac55f4cSDimitry Andric let NumMicroOps = 1; 14521ac55f4cSDimitry Andric} 14531ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteSFENCE], (instrs SFENCE)>; 14541ac55f4cSDimitry Andric 14551ac55f4cSDimitry Andric// Nop, not very useful expect it provides a model for nops! 14561ac55f4cSDimitry Andricdefm : Zn4WriteResInt<WriteNop, [Zn4ALU0123], 0, [1], 1>; // FIXME: latency not from llvm-exegesis 14571ac55f4cSDimitry Andric 14581ac55f4cSDimitry Andric 14591ac55f4cSDimitry Andric/////////////////////////////////////////////////////////////////////////////// 14601ac55f4cSDimitry Andric// Zero Cycle Move 14611ac55f4cSDimitry Andric/////////////////////////////////////////////////////////////////////////////// 14621ac55f4cSDimitry Andric 14631ac55f4cSDimitry Andricdef Zn4WriteZeroLatency : SchedWriteRes<[]> { 14641ac55f4cSDimitry Andric let Latency = 0; 14655f757f3fSDimitry Andric let ReleaseAtCycles = []; 14661ac55f4cSDimitry Andric let NumMicroOps = 1; 14671ac55f4cSDimitry Andric} 14681ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteZeroLatency], (instrs MOV32rr, MOV32rr_REV, 14691ac55f4cSDimitry Andric MOV64rr, MOV64rr_REV, 14701ac55f4cSDimitry Andric MOVSX32rr32)>; 14711ac55f4cSDimitry Andric 14721ac55f4cSDimitry Andricdef Zn4WriteSwapRenameable : SchedWriteRes<[]> { 14731ac55f4cSDimitry Andric let Latency = 0; 14745f757f3fSDimitry Andric let ReleaseAtCycles = []; 14751ac55f4cSDimitry Andric let NumMicroOps = 2; 14761ac55f4cSDimitry Andric} 14771ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteSwapRenameable], (instrs XCHG32rr, XCHG32ar, 14781ac55f4cSDimitry Andric XCHG64rr, XCHG64ar)>; 14791ac55f4cSDimitry Andric 14801ac55f4cSDimitry Andricdefm : Zn4WriteResInt<WriteXCHG, [Zn4ALU0123], 0, [8], 2>; // Compare+Exchange - TODO RMW support. 14811ac55f4cSDimitry Andric 14821ac55f4cSDimitry Andricdefm : Zn4WriteResXMM<WriteFMoveX, [], 0, [], 1>; 14831ac55f4cSDimitry Andricdefm : Zn4WriteResYMM<WriteFMoveY, [], 0, [], 1>; 14841ac55f4cSDimitry Andricdefm : Zn4WriteResYMM<WriteFMoveZ, [], 0, [], 1>; 14851ac55f4cSDimitry Andric 14861ac55f4cSDimitry Andricdefm : Zn4WriteResXMM<WriteVecMove, [Zn4FPFMisc0123], 1, [1], 1>; // MMX 14871ac55f4cSDimitry Andricdefm : Zn4WriteResXMM<WriteVecMoveX, [], 0, [], 1>; 14881ac55f4cSDimitry Andricdefm : Zn4WriteResYMM<WriteVecMoveY, [], 0, [], 1>; 14891ac55f4cSDimitry Andricdefm : Zn4WriteResYMM<WriteVecMoveZ, [], 0, [], 1>; 14901ac55f4cSDimitry Andric 14911ac55f4cSDimitry Andricdef : IsOptimizableRegisterMove<[ 14921ac55f4cSDimitry Andric InstructionEquivalenceClass<[ 14931ac55f4cSDimitry Andric // GPR variants. 14941ac55f4cSDimitry Andric MOV32rr, MOV32rr_REV, 14951ac55f4cSDimitry Andric MOV64rr, MOV64rr_REV, 14961ac55f4cSDimitry Andric MOVSX32rr32, 14971ac55f4cSDimitry Andric XCHG32rr, XCHG32ar, 14981ac55f4cSDimitry Andric XCHG64rr, XCHG64ar, 14991ac55f4cSDimitry Andric 15001ac55f4cSDimitry Andric // MMX variants. 15011ac55f4cSDimitry Andric // MMX moves are *NOT* eliminated. 15021ac55f4cSDimitry Andric 15031ac55f4cSDimitry Andric // SSE variants. 15041ac55f4cSDimitry Andric MOVAPSrr, MOVAPSrr_REV, 15051ac55f4cSDimitry Andric MOVUPSrr, MOVUPSrr_REV, 15061ac55f4cSDimitry Andric MOVAPDrr, MOVAPDrr_REV, 15071ac55f4cSDimitry Andric MOVUPDrr, MOVUPDrr_REV, 15081ac55f4cSDimitry Andric MOVDQArr, MOVDQArr_REV, 15091ac55f4cSDimitry Andric MOVDQUrr, MOVDQUrr_REV, 15101ac55f4cSDimitry Andric 15111ac55f4cSDimitry Andric // AVX variants. 15121ac55f4cSDimitry Andric VMOVAPSrr, VMOVAPSrr_REV, 15131ac55f4cSDimitry Andric VMOVUPSrr, VMOVUPSrr_REV, 15141ac55f4cSDimitry Andric VMOVAPDrr, VMOVAPDrr_REV, 15151ac55f4cSDimitry Andric VMOVUPDrr, VMOVUPDrr_REV, 15161ac55f4cSDimitry Andric VMOVDQArr, VMOVDQArr_REV, 15171ac55f4cSDimitry Andric VMOVDQUrr, VMOVDQUrr_REV, 15181ac55f4cSDimitry Andric 15191ac55f4cSDimitry Andric // AVX YMM variants. 15201ac55f4cSDimitry Andric VMOVAPSYrr, VMOVAPSYrr_REV, 15211ac55f4cSDimitry Andric VMOVUPSYrr, VMOVUPSYrr_REV, 15221ac55f4cSDimitry Andric VMOVAPDYrr, VMOVAPDYrr_REV, 15231ac55f4cSDimitry Andric VMOVUPDYrr, VMOVUPDYrr_REV, 15241ac55f4cSDimitry Andric VMOVDQAYrr, VMOVDQAYrr_REV, 15251ac55f4cSDimitry Andric VMOVDQUYrr, VMOVDQUYrr_REV, 15261ac55f4cSDimitry Andric ], TruePred > 15271ac55f4cSDimitry Andric]>; 15281ac55f4cSDimitry Andric 15291ac55f4cSDimitry Andric// FIXUP and RANGE Instructions 15301ac55f4cSDimitry Andricdef Zn4WriteVFIXUPIMMPDZrr_VRANGESDrr : SchedWriteRes<[Zn4FPFMisc01]> { 15311ac55f4cSDimitry Andric let Latency = 2; 15325f757f3fSDimitry Andric let ReleaseAtCycles = [2]; 15331ac55f4cSDimitry Andric let NumMicroOps = 1; 15341ac55f4cSDimitry Andric} 15351ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteVFIXUPIMMPDZrr_VRANGESDrr], (instregex 15361ac55f4cSDimitry Andric "VFIXUPIMM(S|P)(S|D)(Z|Z128|Z256?)rrik", "VFIXUPIMM(S|P)(S|D)(Z?|Z128?|Z256?)rrikz", 15371ac55f4cSDimitry Andric "VFIXUPIMM(S|P)(S|D)(Z128|Z256?)rri", "VRANGE(S|P)(S|D)(Z?|Z128?|Z256?)rri(b?)", 15381ac55f4cSDimitry Andric "VRANGE(S|P)(S|D)(Z|Z128|Z256?)rri(b?)k","VRANGE(S|P)(S|D)(Z?|Z128?|Z256?)rri(b?)kz" 15391ac55f4cSDimitry Andric )>; 15401ac55f4cSDimitry Andric 15411ac55f4cSDimitry Andric// SCALE & REDUCE instructions 15421ac55f4cSDimitry Andricdef Zn4WriteSCALErr: SchedWriteRes<[Zn4FPFMisc23]> { 15431ac55f4cSDimitry Andric let Latency = 6; 15445f757f3fSDimitry Andric let ReleaseAtCycles = [6]; 15451ac55f4cSDimitry Andric let NumMicroOps = 2; 15461ac55f4cSDimitry Andric} 15471ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteSCALErr], (instregex 15481ac55f4cSDimitry Andric "V(SCALEF|REDUCE)(S|P)(S|D)(Z?|Z128?|Z256?)(rr|rrb|rrkz|rrik|rrikz|rri)(_Int?|_Intkz?)", 15491ac55f4cSDimitry Andric "(V?)REDUCE(PD|PS|SD|SS)(Z?|Z128?)(rri|rrikz|rrib)" 15501ac55f4cSDimitry Andric )>; 15511ac55f4cSDimitry Andric 15521ac55f4cSDimitry Andric//BF16PS Instructions 15531ac55f4cSDimitry Andricdef Zn4WriteBF16: SchedWriteRes<[Zn4FPFMisc23]> { 15541ac55f4cSDimitry Andric let Latency = 6; 15555f757f3fSDimitry Andric let ReleaseAtCycles = [6]; 15561ac55f4cSDimitry Andric let NumMicroOps = 2; 15571ac55f4cSDimitry Andric} 15581ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteBF16], (instregex 15591ac55f4cSDimitry Andric "(V?)DPBF16PS(Z?|Z128?|Z256?)(r|rk|rkz)" 15601ac55f4cSDimitry Andric )>; 15611ac55f4cSDimitry Andric 15621ac55f4cSDimitry Andric// BUSD and VPMADD Instructions 15631ac55f4cSDimitry Andricdef Zn4WriteBUSDr_VPMADDr: SchedWriteRes<[Zn4FPFMisc01]> { 15641ac55f4cSDimitry Andric let Latency = 4; 15655f757f3fSDimitry Andric let ReleaseAtCycles = [4]; 15661ac55f4cSDimitry Andric let NumMicroOps = 1; 15671ac55f4cSDimitry Andric} 15681ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteBUSDr_VPMADDr], (instregex 15691ac55f4cSDimitry Andric "VPDP(BU|WS)(S|P)(S|D|DS)(Z|Z128|Z256)(r|rk|rkz)", 15701ac55f4cSDimitry Andric "VPMADD52(H|L)UQ(Z|Z128|Z256)(r|rk|rkz)" 15711ac55f4cSDimitry Andric )>; 15721ac55f4cSDimitry Andric 15731ac55f4cSDimitry Andric// SHIFT instructions 15741ac55f4cSDimitry Andricdef Zn4WriteSHIFTrr: SchedWriteRes<[Zn4FPFMisc01]> { 15751ac55f4cSDimitry Andric let Latency = 2; 15765f757f3fSDimitry Andric let ReleaseAtCycles = [2]; 15771ac55f4cSDimitry Andric let NumMicroOps = 1; 15781ac55f4cSDimitry Andric} 15791ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteSHIFTrr], (instregex 15801ac55f4cSDimitry Andric "VP(LZCNT|SHLD|SHRD?)(D|Q|W|VD|VQ|VW?)(Z?|Z128?|Z256?)(rr|rk|rrk|rrkz|rri|rrik|rrikz)", 15811ac55f4cSDimitry Andric "(V?)P(SLL|SRL|SRA)(D|Q|W|DQ)(Y?|Z?|Z128?|Z256?)(rr|rrk|rrkz)", 15821ac55f4cSDimitry Andric "(V?)P(SLL|SRL|SRA)DQYri", 15831ac55f4cSDimitry Andric "(V?)P(SLL|SRL)DQ(Z?|Z256?)ri", 15841ac55f4cSDimitry Andric "(V?)P(SHUFB)(Y|Z|Z128|Z256?)(rr|rrk|rrkz)", 15851ac55f4cSDimitry Andric "(V?)P(ROL|ROR)(D|Q|VD|VQ)(Z?|Z128?|Z256?)(rr|rrk|rrkz)", 15861ac55f4cSDimitry Andric "(V?)P(ROL|ROR)(D|Q|VD|VQ)(Z256?)(ri|rik|rikz)", 15871ac55f4cSDimitry Andric "(V?)P(ROL|ROR)(D|Q)(Z?|Z128?)(ri|rik|rikz)", 15881ac55f4cSDimitry Andric "VPSHUFBITQMBZ128rr", "VFMSUB231SSZr_Intkz" 15891ac55f4cSDimitry Andric )>; 15901ac55f4cSDimitry Andric 15911ac55f4cSDimitry Andricdef Zn4WriteSHIFTri: SchedWriteRes<[Zn4FPFMisc01]> { 15921ac55f4cSDimitry Andric let Latency = 1; 15935f757f3fSDimitry Andric let ReleaseAtCycles = [1]; 15941ac55f4cSDimitry Andric let NumMicroOps = 1; 15951ac55f4cSDimitry Andric} 15961ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteSHIFTri], (instregex 15971ac55f4cSDimitry Andric "VP(SLL|SRL|SRA)(D|Q|W)(Z|Z128|Z256?)(ri|rik|rikz)" 15981ac55f4cSDimitry Andric )>; 15991ac55f4cSDimitry Andric 16001ac55f4cSDimitry Andric// ALIGN Instructions 16011ac55f4cSDimitry Andricdef Zn4WriteALIGN: SchedWriteRes<[Zn4FPFMisc12]> { 16021ac55f4cSDimitry Andric let Latency = 2; 16035f757f3fSDimitry Andric let ReleaseAtCycles = [2]; 16041ac55f4cSDimitry Andric let NumMicroOps = 1; 16051ac55f4cSDimitry Andric} 16061ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteALIGN], (instregex 16071ac55f4cSDimitry Andric "(V?)PALIGNR(Z?|Z128?|Z256?)(rri|rrik|rrikz)" 16081ac55f4cSDimitry Andric )>; 16091ac55f4cSDimitry Andric 16101ac55f4cSDimitry Andric//PACK Instructions 16111ac55f4cSDimitry Andricdef Zn4WritePACK: SchedWriteRes<[Zn4FPFMisc12]> { 16121ac55f4cSDimitry Andric let Latency = 2; 16135f757f3fSDimitry Andric let ReleaseAtCycles = [2]; 16141ac55f4cSDimitry Andric let NumMicroOps = 1; 16151ac55f4cSDimitry Andric} 16161ac55f4cSDimitry Andricdef : InstRW<[Zn4WritePACK], (instregex 16171ac55f4cSDimitry Andric "(V?)PACK(SS|US)(DW|WB)(Z?|Z128?|Z256?)(rr|rrk|rrkz)" 16181ac55f4cSDimitry Andric )>; 16191ac55f4cSDimitry Andric 16201ac55f4cSDimitry Andric// MAX and MIN Instructions 16211ac55f4cSDimitry Andricdef Zn4WriteFCmp64: SchedWriteRes<[Zn4FPFMisc01]> { 16221ac55f4cSDimitry Andric let Latency = 2; 16235f757f3fSDimitry Andric let ReleaseAtCycles = [2]; 16241ac55f4cSDimitry Andric let NumMicroOps = 1; 16251ac55f4cSDimitry Andric} 16261ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteFCmp64], (instregex 16271ac55f4cSDimitry Andric "(V?)CMP(S|P)(S|D)(rr|rri|rr_Int)", 16281ac55f4cSDimitry Andric "(V?|VP?)(MAX|MIN|MINC|MAXC)(S|P|U)(S|D|Q)(Z?|Z128?|Z256?)(rr|rri|rrk|rrkz)(_Int?)", 16291ac55f4cSDimitry Andric "VP(MAX|MIN)(SQ|UQ)(Z|Z128|Z256)(rr|rrk|rrkz)", 16301ac55f4cSDimitry Andric "(V?)(MAX|MAXC|MIN|MINC)PD(Z|Z128|Z256?)(rr|rrk|rrkz)" 16311ac55f4cSDimitry Andric )>; 16321ac55f4cSDimitry Andric 16331ac55f4cSDimitry Andric// MOV Instructions 1634*0fca6ea1SDimitry Andricdef Zn4MOVDUPZ: SchedWriteRes<[Zn4FPFMisc12]> { 16351ac55f4cSDimitry Andric let Latency = 2; 16365f757f3fSDimitry Andric let ReleaseAtCycles = [2]; 16371ac55f4cSDimitry Andric let NumMicroOps = 1; 16381ac55f4cSDimitry Andric} 1639*0fca6ea1SDimitry Andricdef : InstRW<[Zn4MOVDUPZ], (instregex 1640*0fca6ea1SDimitry Andric "(V?)VMOVDDUP(Z|Z128|Z256)(rr|rrk|rrkz)" 1641*0fca6ea1SDimitry Andric )>; 1642*0fca6ea1SDimitry Andric 1643*0fca6ea1SDimitry Andricdef Zn4MOVS: SchedWriteRes<[Zn4FPFMisc12]> { 1644*0fca6ea1SDimitry Andric let Latency = 2; 1645*0fca6ea1SDimitry Andric let ReleaseAtCycles = [1]; 1646*0fca6ea1SDimitry Andric let NumMicroOps = 1; 1647*0fca6ea1SDimitry Andric} 16481ac55f4cSDimitry Andricdef : InstRW<[Zn4MOVS], (instregex 16491ac55f4cSDimitry Andric "(V?)PMOV(SX|ZX)(BD|BQ|BW|WD|WQ|DQ)(Z128?|Z256?)(rr|rrk|rrkz)", 16501ac55f4cSDimitry Andric "(V?)PMOV(SX|QD|UZ|ZX)(BD|BQ|BW?)(Y|Z128?)(rr|rrk|rrkz)", 16511ac55f4cSDimitry Andric "(V?)PMOV(SX|US|ZX)(DQ|WD|QW|WQ?)(Y|Z128?)(rr|rrk|rrkz)", 1652*0fca6ea1SDimitry Andric "VPMOV(DB|DW|QB|QD|QW|SDB|SDW|SQB|SQD|SQW|SWB|USDB|USDW|USQB|USQD|USWB|WB)(Z128?|Z256?)(rr|rrk|rrkz)" 16531ac55f4cSDimitry Andric )>; 16541ac55f4cSDimitry Andric 16551ac55f4cSDimitry Andricdef Zn4MOVSZ: SchedWriteRes<[Zn4FPFMisc12]> { 16561ac55f4cSDimitry Andric let Latency = 4; 16575f757f3fSDimitry Andric let ReleaseAtCycles = [4]; 16581ac55f4cSDimitry Andric let NumMicroOps = 1; 16591ac55f4cSDimitry Andric} 16601ac55f4cSDimitry Andricdef : InstRW<[Zn4MOVSZ], (instregex 16611ac55f4cSDimitry Andric "(V?)PMOV(SX|ZX)(BD|BQ|BW|WD|WQ|DQ)(Z?)(rr|rrk|rrkz)" 16621ac55f4cSDimitry Andric )>; 16631ac55f4cSDimitry Andric 16641ac55f4cSDimitry Andricdef Zn4MOVSrr: SchedWriteRes<[Zn4FPFMisc12]> { 16651ac55f4cSDimitry Andric let Latency = 5; 16665f757f3fSDimitry Andric let ReleaseAtCycles = [5]; 16671ac55f4cSDimitry Andric let NumMicroOps = 1; 16681ac55f4cSDimitry Andric} 16691ac55f4cSDimitry Andricdef : InstRW<[Zn4MOVSrr], (instregex 16701ac55f4cSDimitry Andric "(V?)PMOV(DB|QB|QW|SDB|SQB|SQW|USDB|USQB|USQW)(Z?)(rr|rrk|rrkz)" 16711ac55f4cSDimitry Andric )>; 16721ac55f4cSDimitry Andric 16731ac55f4cSDimitry Andric 16741ac55f4cSDimitry Andric//VPTEST Instructions 16751ac55f4cSDimitry Andricdef Zn4VPTESTZ128: SchedWriteRes<[Zn4FPFMisc01]> { 16761ac55f4cSDimitry Andric let Latency = 3; 16775f757f3fSDimitry Andric let ReleaseAtCycles = [3]; 16781ac55f4cSDimitry Andric let NumMicroOps = 1; 16791ac55f4cSDimitry Andric} 16801ac55f4cSDimitry Andricdef : InstRW<[Zn4VPTESTZ128], (instregex 16811ac55f4cSDimitry Andric "(V?)PTEST(N?)(MB|MD|MQ|MW)(Z128?)(rrk)" 16821ac55f4cSDimitry Andric )>; 16831ac55f4cSDimitry Andric 16841ac55f4cSDimitry Andricdef Zn4VPTESTZ256: SchedWriteRes<[Zn4FPFMisc01]> { 16851ac55f4cSDimitry Andric let Latency = 4; 16865f757f3fSDimitry Andric let ReleaseAtCycles = [4]; 16871ac55f4cSDimitry Andric let NumMicroOps = 1; 16881ac55f4cSDimitry Andric} 16891ac55f4cSDimitry Andricdef : InstRW<[Zn4VPTESTZ256], (instregex 16901ac55f4cSDimitry Andric "(V?)PTEST(N?)(MB|MD|MQ|MW)(Z256?)(rr|rrk)" 16911ac55f4cSDimitry Andric )>; 16921ac55f4cSDimitry Andric 16931ac55f4cSDimitry Andricdef Zn4VPTESTZ: SchedWriteRes<[Zn4FPFMisc01]> { 16941ac55f4cSDimitry Andric let Latency = 5; 16955f757f3fSDimitry Andric let ReleaseAtCycles = [5]; 16961ac55f4cSDimitry Andric let NumMicroOps = 1; 16971ac55f4cSDimitry Andric} 16981ac55f4cSDimitry Andricdef : InstRW<[Zn4VPTESTZ], (instregex 16991ac55f4cSDimitry Andric "(V?)PTEST(N?)(MB|MD|MQ|MW)(Z?)(rrk)" 17001ac55f4cSDimitry Andric )>; 17011ac55f4cSDimitry Andric 17021ac55f4cSDimitry Andric// CONFLICT Instructions 17031ac55f4cSDimitry Andricdef Zn4CONFLICTZ128: SchedWriteRes<[Zn4FPFMisc01]> { 17041ac55f4cSDimitry Andric let Latency = 2; 17055f757f3fSDimitry Andric let ReleaseAtCycles = [2]; 17061ac55f4cSDimitry Andric let NumMicroOps = 1; 17071ac55f4cSDimitry Andric} 17081ac55f4cSDimitry Andricdef : InstRW<[Zn4CONFLICTZ128], (instregex 17091ac55f4cSDimitry Andric "VPCONFLICT(D|Q)(Z128)(rr|rrk|rrkz)" 17101ac55f4cSDimitry Andric )>; 17111ac55f4cSDimitry Andric 17121ac55f4cSDimitry Andricdef Zn4CONFLICTrr: SchedWriteRes<[Zn4FPFMisc01,Zn4FPFMisc12,Zn4FPFMisc23]> { 17131ac55f4cSDimitry Andric let Latency = 6; 17145f757f3fSDimitry Andric let ReleaseAtCycles = [2,2,2]; 17151ac55f4cSDimitry Andric let NumMicroOps = 4; 17161ac55f4cSDimitry Andric} 17171ac55f4cSDimitry Andricdef : InstRW<[Zn4CONFLICTrr], (instregex 17181ac55f4cSDimitry Andric "VPCONFLICT(D|Q)(Z|Z256)(rr|rrkz)" 17191ac55f4cSDimitry Andric )>; 17201ac55f4cSDimitry Andric 17211ac55f4cSDimitry Andric// RSQRT Instructions 17221ac55f4cSDimitry Andricdef Zn4VRSQRT14PDZ256: SchedWriteRes<[Zn4FPFMisc01]> { 17231ac55f4cSDimitry Andric let Latency = 5; 17245f757f3fSDimitry Andric let ReleaseAtCycles = [2]; 17251ac55f4cSDimitry Andric let NumMicroOps = 1; 17261ac55f4cSDimitry Andric} 17271ac55f4cSDimitry Andricdef : InstRW<[Zn4VRSQRT14PDZ256], (instregex 17281ac55f4cSDimitry Andric "VRSQRT14(PD|PS)(Z?|Z128?|Z256?)(r|rr|rk|rrk|rkz|rrkz)" 17291ac55f4cSDimitry Andric )>; 17301ac55f4cSDimitry Andric 17311ac55f4cSDimitry Andric 17321ac55f4cSDimitry Andric// PERM Instructions 17331ac55f4cSDimitry Andricdef Zn4PERMILP: SchedWriteRes<[Zn4FPFMisc123]> { 17341ac55f4cSDimitry Andric let Latency = 2; 17355f757f3fSDimitry Andric let ReleaseAtCycles = [2]; 17361ac55f4cSDimitry Andric let NumMicroOps = 1; 17371ac55f4cSDimitry Andric} 17381ac55f4cSDimitry Andricdef : InstRW<[Zn4PERMILP], (instregex 17391ac55f4cSDimitry Andric "VPERMILP(S|D)(Y|Z|Z128|Z256)(rr|rrk|rrkz)" 17401ac55f4cSDimitry Andric )>; 17411ac55f4cSDimitry Andric 17421ac55f4cSDimitry Andricdef Zn4PERMIT2_128: SchedWriteRes<[Zn4FPFMisc12]> { 17431ac55f4cSDimitry Andric let Latency = 3; 17445f757f3fSDimitry Andric let ReleaseAtCycles = [2]; 17451ac55f4cSDimitry Andric let NumMicroOps = 1; 17461ac55f4cSDimitry Andric} 17471ac55f4cSDimitry Andricdef : InstRW<[Zn4PERMIT2_128], (instregex 17485f757f3fSDimitry Andric "VPERM(I2|T2)(PS|PD|W)Z128(rr|rrk|rrkz)", 17495f757f3fSDimitry Andric "VPERM(I2|T2)(B|D|Q)Z128(rr|rrk|rrkz)" 17501ac55f4cSDimitry Andric )>; 17511ac55f4cSDimitry Andric 17521ac55f4cSDimitry Andricdef Zn4PERMIT2_128rr:SchedWriteRes<[Zn4FPFMisc12]> { 17531ac55f4cSDimitry Andric let Latency = 2; 17545f757f3fSDimitry Andric let ReleaseAtCycles = [2]; 17551ac55f4cSDimitry Andric let NumMicroOps = 1; 17561ac55f4cSDimitry Andric} 17571ac55f4cSDimitry Andricdef : InstRW<[Zn4PERMIT2_128rr], (instregex 17581ac55f4cSDimitry Andric "V(P?)COMPRESS(B|W|D|Q|PD|PS|SD|SQ)Z128(rr|rrk|rrkz)", 17591ac55f4cSDimitry Andric "VPERM(B|D|Q|W)(Z128?)(rr|rrk|rrkz)" 17601ac55f4cSDimitry Andric )>; 17611ac55f4cSDimitry Andric 17621ac55f4cSDimitry Andricdef Zn4PERMIT2_256: SchedWriteRes<[Zn4FPFMisc12]> { 17631ac55f4cSDimitry Andric let Latency = 4; 17645f757f3fSDimitry Andric let ReleaseAtCycles = [2]; 17651ac55f4cSDimitry Andric let NumMicroOps = 1; 17661ac55f4cSDimitry Andric} 17671ac55f4cSDimitry Andricdef : InstRW<[Zn4PERMIT2_256], (instregex 17685f757f3fSDimitry Andric "VPERM(I2|T2)(PS|PD|W)Z256(rr|rrk|rrkz)", 17691ac55f4cSDimitry Andric "VPERMP(S|D)Z256(rr|rrk|rrkz)", 17701ac55f4cSDimitry Andric "V(P?)COMPRESS(B|W|D|Q|PD|PS|SD|SQ)Z256(rr|rrk|rrkz)", 17711ac55f4cSDimitry Andric "VPERM(B|D|Q|W)Z256(rr|rrk|rrkz)", 17725f757f3fSDimitry Andric "VPERM(I2|Q|T2)(B|D|Q)Z256(rr|rrk|rrkz)", 17731ac55f4cSDimitry Andric "VPEXPAND(B|W)Z256(rr|rrk|rrkz)" 17741ac55f4cSDimitry Andric )>; 17751ac55f4cSDimitry Andric 17761ac55f4cSDimitry Andricdef Zn4PERMIT2Z: SchedWriteRes<[Zn4FPFMisc12]> { 17771ac55f4cSDimitry Andric let Latency = 5; 17785f757f3fSDimitry Andric let ReleaseAtCycles = [2]; 17791ac55f4cSDimitry Andric let NumMicroOps = 1; 17801ac55f4cSDimitry Andric} 17811ac55f4cSDimitry Andricdef : InstRW<[Zn4PERMIT2Z], (instregex 17825f757f3fSDimitry Andric "VPERM(I2|T2)(PS|PD|W)Z(rr|rrk|rrkz)", 17831ac55f4cSDimitry Andric "VPERM(B|D|W)Z(rr|rrk|rrkz)", 17845f757f3fSDimitry Andric "VPERM(I2|Q|T2)(B|D|Q)Z(rr|rrk|rrkz)", 17851ac55f4cSDimitry Andric "V(P?)COMPRESS(B|W|D|Q|PD|PS|SD|SQ)Z(rr|rrk|rrkz)", 17861ac55f4cSDimitry Andric "VPEXPAND(B|W)Z(rr|rrk|rrkz)", 17871ac55f4cSDimitry Andric "VPERMP(S|D)Z(rr|rrk|rrkz)" 17881ac55f4cSDimitry Andric )>; 17891ac55f4cSDimitry Andric 17901ac55f4cSDimitry Andric// ALU SLOW Misc Instructions 17911ac55f4cSDimitry Andricdef Zn4VecALUZSlow: SchedWriteRes<[Zn4FPFMisc01]> { 17921ac55f4cSDimitry Andric let Latency = 2; 17935f757f3fSDimitry Andric let ReleaseAtCycles = [2]; 17941ac55f4cSDimitry Andric let NumMicroOps = 1; 17951ac55f4cSDimitry Andric} 17961ac55f4cSDimitry Andricdef : InstRW<[Zn4VecALUZSlow], (instrs 17971ac55f4cSDimitry Andric VPABSBZ128rr, VPABSBZ128rrk, VPABSBZ128rrkz, VPABSDZ128rr, 17981ac55f4cSDimitry Andric VPABSDZ128rrk, VPABSDZ128rrkz, VPABSQZ128rr, VPABSQZ128rrk, 17991ac55f4cSDimitry Andric VPABSQZ128rrkz, VPABSWZ128rr, VPABSWZ128rrk, VPABSWZ128rrkz, 18001ac55f4cSDimitry Andric VPADDSBZ128rr, VPADDSBZ128rrk, VPADDSBZ128rrkz, VPADDSWZ128rr, 18011ac55f4cSDimitry Andric VPADDSWZ128rrk, VPADDSWZ128rrkz,VPADDUSBZ128rr, VPADDUSBZ128rrk, 18021ac55f4cSDimitry Andric VPADDUSBZ128rrkz, VPADDUSWZ128rr, VPADDUSWZ128rrk, VPADDUSWZ128rrkz, 18031ac55f4cSDimitry Andric VPAVGBZ128rr, VPAVGBZ128rrk, VPAVGBZ128rrkz, VPAVGWZ128rr, 18041ac55f4cSDimitry Andric VPAVGWZ128rrk, VPAVGWZ128rrkz, VPOPCNTBZ128rr, VPOPCNTBZ128rrk, 18051ac55f4cSDimitry Andric VPOPCNTBZ128rrkz, VPOPCNTDZ128rr, VPOPCNTDZ128rrk, VPOPCNTDZ128rrkz, 18061ac55f4cSDimitry Andric VPOPCNTQZ128rr, VPOPCNTQZ128rrk,VPOPCNTQZ128rrkz, VPOPCNTWZ128rr, 18071ac55f4cSDimitry Andric VPOPCNTWZ128rrk, VPOPCNTWZ128rrkz,VPSUBSBZ128rr, VPSUBSBZ128rrk, 18081ac55f4cSDimitry Andric VPSUBSBZ128rrkz, VPSUBSWZ128rr, VPSUBSWZ128rrk, VPSUBSWZ128rrkz, 18091ac55f4cSDimitry Andric VPSUBUSBZ128rr, VPSUBUSBZ128rrk, VPSUBUSBZ128rrkz,VPSUBUSWZ128rr, 18101ac55f4cSDimitry Andric VPSUBUSWZ128rrk, VPSUBUSWZ128rrkz 18111ac55f4cSDimitry Andric )>; 18121ac55f4cSDimitry Andric 18131ac55f4cSDimitry Andric 18141ac55f4cSDimitry Andric/////////////////////////////////////////////////////////////////////////////// 18151ac55f4cSDimitry Andric// Dependency breaking instructions. 18161ac55f4cSDimitry Andric/////////////////////////////////////////////////////////////////////////////// 18171ac55f4cSDimitry Andric 18181ac55f4cSDimitry Andricdef Zn4WriteZeroIdiom : SchedWriteVariant<[ 18191ac55f4cSDimitry Andric SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [Zn4WriteZeroLatency]>, 18201ac55f4cSDimitry Andric SchedVar<NoSchedPred, [WriteALU]> 18211ac55f4cSDimitry Andric]>; 18221ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteZeroIdiom], (instrs XOR32rr, XOR32rr_REV, 18231ac55f4cSDimitry Andric XOR64rr, XOR64rr_REV, 18241ac55f4cSDimitry Andric SUB32rr, SUB32rr_REV, 18251ac55f4cSDimitry Andric SUB64rr, SUB64rr_REV)>; 18261ac55f4cSDimitry Andric 18271ac55f4cSDimitry Andricdef Zn4WriteZeroIdiomEFLAGS : SchedWriteVariant<[ 18281ac55f4cSDimitry Andric SchedVar<MCSchedPredicate<CheckSameRegOperand<0, 1>>, [Zn4WriteZeroLatency]>, 18291ac55f4cSDimitry Andric SchedVar<NoSchedPred, [WriteALU]> 18301ac55f4cSDimitry Andric]>; 18311ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteZeroIdiomEFLAGS], (instrs CMP8rr, CMP8rr_REV, 18321ac55f4cSDimitry Andric CMP16rr, CMP16rr_REV, 18331ac55f4cSDimitry Andric CMP32rr, CMP32rr_REV, 18341ac55f4cSDimitry Andric CMP64rr, CMP64rr_REV)>; 18351ac55f4cSDimitry Andric 18361ac55f4cSDimitry Andricdef Zn4WriteFZeroIdiom : SchedWriteVariant<[ 18371ac55f4cSDimitry Andric SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [Zn4WriteZeroLatency]>, 18381ac55f4cSDimitry Andric SchedVar<NoSchedPred, [WriteFLogic]> 18391ac55f4cSDimitry Andric]>; 18401ac55f4cSDimitry Andric// NOTE: XORPSrr, XORPDrr are not zero-cycle! 18411ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteFZeroIdiom], (instrs VXORPSrr, VXORPDrr, 18421ac55f4cSDimitry Andric VANDNPSrr, VANDNPDrr)>; 18431ac55f4cSDimitry Andric 18441ac55f4cSDimitry Andricdef Zn4WriteFZeroIdiomY : SchedWriteVariant<[ 18451ac55f4cSDimitry Andric SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [Zn4WriteZeroLatency]>, 18461ac55f4cSDimitry Andric SchedVar<NoSchedPred, [WriteFLogicY]> 18471ac55f4cSDimitry Andric]>; 18481ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteFZeroIdiomY], (instrs VXORPSYrr, VXORPDYrr, 18491ac55f4cSDimitry Andric VANDNPSYrr, VANDNPDYrr)>; 18501ac55f4cSDimitry Andric 18511ac55f4cSDimitry Andricdef Zn4WriteVZeroIdiomLogicX : SchedWriteVariant<[ 18521ac55f4cSDimitry Andric SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [Zn4WriteZeroLatency]>, 18531ac55f4cSDimitry Andric SchedVar<NoSchedPred, [WriteVecLogicX]> 18541ac55f4cSDimitry Andric]>; 18551ac55f4cSDimitry Andric// NOTE: PXORrr,PANDNrr are not zero-cycle! 18561ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteVZeroIdiomLogicX], (instrs VPXORrr, VPANDNrr)>; 18571ac55f4cSDimitry Andric 18581ac55f4cSDimitry Andricdef Zn4WriteVZeroIdiomLogicY : SchedWriteVariant<[ 18591ac55f4cSDimitry Andric SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [Zn4WriteZeroLatency]>, 18601ac55f4cSDimitry Andric SchedVar<NoSchedPred, [WriteVecLogicY]> 18611ac55f4cSDimitry Andric]>; 18621ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteVZeroIdiomLogicY], (instrs VPXORYrr, VPANDNYrr)>; 18631ac55f4cSDimitry Andric 18641ac55f4cSDimitry Andricdef Zn4WriteVZeroIdiomALUX : SchedWriteVariant<[ 18651ac55f4cSDimitry Andric SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [Zn4WriteZeroLatency]>, 18661ac55f4cSDimitry Andric SchedVar<NoSchedPred, [WriteVecALUX]> 18671ac55f4cSDimitry Andric]>; 18681ac55f4cSDimitry Andric// NOTE: PSUBBrr, PSUBWrr, PSUBDrr, PSUBQrr, 18691ac55f4cSDimitry Andric// PCMPGTBrr, PCMPGTWrr, PCMPGTDrr, PCMPGTQrr are not zero-cycle! 18701ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteVZeroIdiomALUX], 18711ac55f4cSDimitry Andric (instrs VPSUBBrr, VPSUBWrr, VPSUBDrr, VPSUBQrr, 18721ac55f4cSDimitry Andric VPCMPGTBrr, VPCMPGTWrr, VPCMPGTDrr, VPCMPGTQrr)>; 18731ac55f4cSDimitry Andric 18741ac55f4cSDimitry Andricdef Zn4WriteVZeroIdiomALUY : SchedWriteVariant<[ 18751ac55f4cSDimitry Andric SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [Zn4WriteZeroLatency]>, 18761ac55f4cSDimitry Andric SchedVar<NoSchedPred, [WriteVecALUY]> 18771ac55f4cSDimitry Andric]>; 18781ac55f4cSDimitry Andricdef : InstRW<[Zn4WriteVZeroIdiomALUY], 18791ac55f4cSDimitry Andric (instrs VPSUBBYrr, VPSUBWYrr, VPSUBDYrr, VPSUBQYrr, 18801ac55f4cSDimitry Andric VPCMPGTBYrr, VPCMPGTWYrr, VPCMPGTDYrr, VPCMPGTQYrr)>; 18811ac55f4cSDimitry Andric 18821ac55f4cSDimitry Andricdef : IsZeroIdiomFunction<[ 18831ac55f4cSDimitry Andric // GPR Zero-idioms. 18841ac55f4cSDimitry Andric DepBreakingClass<[ XOR32rr, XOR32rr_REV, 18851ac55f4cSDimitry Andric XOR64rr, XOR64rr_REV, 18861ac55f4cSDimitry Andric SUB32rr, SUB32rr_REV, 18871ac55f4cSDimitry Andric SUB64rr, SUB64rr_REV ], ZeroIdiomPredicate>, 18881ac55f4cSDimitry Andric 18891ac55f4cSDimitry Andric // SSE XMM Zero-idioms. 18901ac55f4cSDimitry Andric DepBreakingClass<[ 18911ac55f4cSDimitry Andric // fp variants. 18921ac55f4cSDimitry Andric XORPSrr, XORPDrr, 18931ac55f4cSDimitry Andric ANDNPSrr, ANDNPDrr, 18941ac55f4cSDimitry Andric 18951ac55f4cSDimitry Andric // int variants. 18961ac55f4cSDimitry Andric PXORrr, 18971ac55f4cSDimitry Andric PANDNrr, 18981ac55f4cSDimitry Andric PSUBBrr, PSUBWrr, PSUBDrr, PSUBQrr, 18991ac55f4cSDimitry Andric PSUBSBrr, PSUBSWrr, 19001ac55f4cSDimitry Andric PSUBUSBrr, PSUBUSWrr, 19011ac55f4cSDimitry Andric PCMPGTBrr, PCMPGTWrr, PCMPGTDrr, PCMPGTQrr 19021ac55f4cSDimitry Andric ], ZeroIdiomPredicate>, 19031ac55f4cSDimitry Andric 19041ac55f4cSDimitry Andric // AVX XMM Zero-idioms. 19051ac55f4cSDimitry Andric DepBreakingClass<[ 19061ac55f4cSDimitry Andric // fp variants. 19071ac55f4cSDimitry Andric VXORPSrr, VXORPDrr, 19081ac55f4cSDimitry Andric VANDNPSrr, VANDNPDrr, 19091ac55f4cSDimitry Andric 19101ac55f4cSDimitry Andric // int variants. 19111ac55f4cSDimitry Andric VPXORrr, 19121ac55f4cSDimitry Andric VPANDNrr, 19131ac55f4cSDimitry Andric VPSUBBrr, VPSUBWrr, VPSUBDrr, VPSUBQrr, 19141ac55f4cSDimitry Andric VPSUBSBrr, VPSUBSWrr, 19151ac55f4cSDimitry Andric VPSUBUSBrr, VPSUBUSWrr, 19161ac55f4cSDimitry Andric VPCMPGTBrr, VPCMPGTWrr, VPCMPGTDrr, VPCMPGTQrr, 19171ac55f4cSDimitry Andric ], ZeroIdiomPredicate>, 19181ac55f4cSDimitry Andric 19191ac55f4cSDimitry Andric // AVX YMM Zero-idioms. 19201ac55f4cSDimitry Andric DepBreakingClass<[ 19211ac55f4cSDimitry Andric // fp variants. 19221ac55f4cSDimitry Andric VXORPSYrr, VXORPDYrr, 19231ac55f4cSDimitry Andric VANDNPSYrr, VANDNPDYrr, 19241ac55f4cSDimitry Andric 19251ac55f4cSDimitry Andric // int variants. 19261ac55f4cSDimitry Andric VPXORYrr, 19271ac55f4cSDimitry Andric VPANDNYrr, 19281ac55f4cSDimitry Andric VPSUBBYrr, VPSUBWYrr, VPSUBDYrr, VPSUBQYrr, 19291ac55f4cSDimitry Andric VPSUBSBYrr, VPSUBSWYrr, 19301ac55f4cSDimitry Andric VPSUBUSBYrr, VPSUBUSWYrr, 19311ac55f4cSDimitry Andric VPCMPGTBYrr, VPCMPGTWYrr, VPCMPGTDYrr, VPCMPGTQYrr 19321ac55f4cSDimitry Andric ], ZeroIdiomPredicate>, 19331ac55f4cSDimitry Andric]>; 19341ac55f4cSDimitry Andric 19351ac55f4cSDimitry Andricdef : IsDepBreakingFunction<[ 19361ac55f4cSDimitry Andric // GPR 19371ac55f4cSDimitry Andric DepBreakingClass<[ SBB32rr, SBB32rr_REV, 19381ac55f4cSDimitry Andric SBB64rr, SBB64rr_REV ], ZeroIdiomPredicate>, 19391ac55f4cSDimitry Andric DepBreakingClass<[ CMP8rr, CMP8rr_REV, 19401ac55f4cSDimitry Andric CMP16rr, CMP16rr_REV, 19411ac55f4cSDimitry Andric CMP32rr, CMP32rr_REV, 19421ac55f4cSDimitry Andric CMP64rr, CMP64rr_REV ], CheckSameRegOperand<0, 1> >, 19431ac55f4cSDimitry Andric // SSE 19441ac55f4cSDimitry Andric DepBreakingClass<[ 19451ac55f4cSDimitry Andric PCMPEQBrr, PCMPEQWrr, PCMPEQDrr, PCMPEQQrr 19461ac55f4cSDimitry Andric ], ZeroIdiomPredicate>, 19471ac55f4cSDimitry Andric 19481ac55f4cSDimitry Andric // AVX XMM 19491ac55f4cSDimitry Andric DepBreakingClass<[ 19501ac55f4cSDimitry Andric VPCMPEQBrr, VPCMPEQWrr, VPCMPEQDrr, VPCMPEQQrr 19511ac55f4cSDimitry Andric ], ZeroIdiomPredicate>, 19521ac55f4cSDimitry Andric 19531ac55f4cSDimitry Andric // AVX YMM 19541ac55f4cSDimitry Andric DepBreakingClass<[ 19551ac55f4cSDimitry Andric VPCMPEQBYrr, VPCMPEQWYrr, VPCMPEQDYrr, VPCMPEQQYrr 19561ac55f4cSDimitry Andric ], ZeroIdiomPredicate>, 19571ac55f4cSDimitry Andric]>; 19581ac55f4cSDimitry Andric 19591ac55f4cSDimitry Andric} // SchedModel 19601ac55f4cSDimitry Andric 1961