10b57cec5SDimitry Andric//=- AArch64SchedCyclone.td - Cyclone Scheduling Definitions -*- tablegen -*-=// 20b57cec5SDimitry Andric// 30b57cec5SDimitry Andric// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric// See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric// 70b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric// 90b57cec5SDimitry Andric// This file defines the machine model for AArch64 Cyclone to support 100b57cec5SDimitry Andric// instruction scheduling and other instruction cost heuristics. 110b57cec5SDimitry Andric// 120b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 130b57cec5SDimitry Andric 140b57cec5SDimitry Andricdef CycloneModel : SchedMachineModel { 150b57cec5SDimitry Andric let IssueWidth = 6; // 6 micro-ops are dispatched per cycle. 160b57cec5SDimitry Andric let MicroOpBufferSize = 192; // Based on the reorder buffer. 170b57cec5SDimitry Andric let LoadLatency = 4; // Optimistic load latency. 180b57cec5SDimitry Andric let MispredictPenalty = 16; // 14-19 cycles are typical. 190b57cec5SDimitry Andric let CompleteModel = 1; 200b57cec5SDimitry Andric 21e837bb5cSDimitry Andric list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F, 22fe6060f1SDimitry Andric PAUnsupported.F, 23753f127fSDimitry Andric SMEUnsupported.F, 24*4c2d3b02SDimitry Andric [HasMTE, HasCSSC]); 250b57cec5SDimitry Andric} 260b57cec5SDimitry Andric 270b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 280b57cec5SDimitry Andric// Define each kind of processor resource and number available on Cyclone. 290b57cec5SDimitry Andric 300b57cec5SDimitry Andric// 4 integer pipes 310b57cec5SDimitry Andricdef CyUnitI : ProcResource<4> { 320b57cec5SDimitry Andric let BufferSize = 48; 330b57cec5SDimitry Andric} 340b57cec5SDimitry Andric 350b57cec5SDimitry Andric// 2 branch units: I[0..1] 360b57cec5SDimitry Andricdef CyUnitB : ProcResource<2> { 370b57cec5SDimitry Andric let Super = CyUnitI; 380b57cec5SDimitry Andric let BufferSize = 24; 390b57cec5SDimitry Andric} 400b57cec5SDimitry Andric 410b57cec5SDimitry Andric// 1 indirect-branch unit: I[0] 420b57cec5SDimitry Andricdef CyUnitBR : ProcResource<1> { 430b57cec5SDimitry Andric let Super = CyUnitB; 440b57cec5SDimitry Andric} 450b57cec5SDimitry Andric 460b57cec5SDimitry Andric// 2 shifter pipes: I[2..3] 470b57cec5SDimitry Andric// When an instruction consumes a CyUnitIS, it also consumes a CyUnitI 480b57cec5SDimitry Andricdef CyUnitIS : ProcResource<2> { 490b57cec5SDimitry Andric let Super = CyUnitI; 500b57cec5SDimitry Andric let BufferSize = 24; 510b57cec5SDimitry Andric} 520b57cec5SDimitry Andric 530b57cec5SDimitry Andric// 1 mul pipe: I[0] 540b57cec5SDimitry Andricdef CyUnitIM : ProcResource<1> { 550b57cec5SDimitry Andric let Super = CyUnitBR; 560b57cec5SDimitry Andric let BufferSize = 32; 570b57cec5SDimitry Andric} 580b57cec5SDimitry Andric 590b57cec5SDimitry Andric// 1 div pipe: I[1] 600b57cec5SDimitry Andricdef CyUnitID : ProcResource<1> { 610b57cec5SDimitry Andric let Super = CyUnitB; 620b57cec5SDimitry Andric let BufferSize = 16; 630b57cec5SDimitry Andric} 640b57cec5SDimitry Andric 650b57cec5SDimitry Andric// 1 integer division unit. This is driven by the ID pipe, but only 660b57cec5SDimitry Andric// consumes the pipe for one cycle at issue and another cycle at writeback. 670b57cec5SDimitry Andricdef CyUnitIntDiv : ProcResource<1>; 680b57cec5SDimitry Andric 690b57cec5SDimitry Andric// 2 ld/st pipes. 700b57cec5SDimitry Andricdef CyUnitLS : ProcResource<2> { 710b57cec5SDimitry Andric let BufferSize = 28; 720b57cec5SDimitry Andric} 730b57cec5SDimitry Andric 740b57cec5SDimitry Andric// 3 fp/vector pipes. 750b57cec5SDimitry Andricdef CyUnitV : ProcResource<3> { 760b57cec5SDimitry Andric let BufferSize = 48; 770b57cec5SDimitry Andric} 780b57cec5SDimitry Andric// 2 fp/vector arithmetic and multiply pipes: V[0-1] 790b57cec5SDimitry Andricdef CyUnitVM : ProcResource<2> { 800b57cec5SDimitry Andric let Super = CyUnitV; 810b57cec5SDimitry Andric let BufferSize = 32; 820b57cec5SDimitry Andric} 830b57cec5SDimitry Andric// 1 fp/vector division/sqrt pipe: V[2] 840b57cec5SDimitry Andricdef CyUnitVD : ProcResource<1> { 850b57cec5SDimitry Andric let Super = CyUnitV; 860b57cec5SDimitry Andric let BufferSize = 16; 870b57cec5SDimitry Andric} 880b57cec5SDimitry Andric// 1 fp compare pipe: V[0] 890b57cec5SDimitry Andricdef CyUnitVC : ProcResource<1> { 900b57cec5SDimitry Andric let Super = CyUnitVM; 910b57cec5SDimitry Andric let BufferSize = 16; 920b57cec5SDimitry Andric} 930b57cec5SDimitry Andric 940b57cec5SDimitry Andric// 2 fp division/square-root units. These are driven by the VD pipe, 950b57cec5SDimitry Andric// but only consume the pipe for one cycle at issue and a cycle at writeback. 960b57cec5SDimitry Andricdef CyUnitFloatDiv : ProcResource<2>; 970b57cec5SDimitry Andric 980b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 990b57cec5SDimitry Andric// Define scheduler read/write resources and latency on Cyclone. 1000b57cec5SDimitry Andric// This mirrors sections 7.7-7.9 of the Tuning Guide v1.0.1. 1010b57cec5SDimitry Andric 1020b57cec5SDimitry Andriclet SchedModel = CycloneModel in { 1030b57cec5SDimitry Andric 1040b57cec5SDimitry Andric//--- 1050b57cec5SDimitry Andric// 7.8.1. Moves 1060b57cec5SDimitry Andric//--- 1070b57cec5SDimitry Andric 1080b57cec5SDimitry Andric// A single nop micro-op (uX). 1090b57cec5SDimitry Andricdef WriteX : SchedWriteRes<[]> { let Latency = 0; } 1100b57cec5SDimitry Andric 1110b57cec5SDimitry Andric// Move zero is a register rename (to machine register zero). 1120b57cec5SDimitry Andric// The move is replaced by a single nop micro-op. 1130b57cec5SDimitry Andric// MOVZ Rd, #0 1140b57cec5SDimitry Andric// AND Rd, Rzr, #imm 1150b57cec5SDimitry Andricdef WriteZPred : SchedPredicate<[{TII->isGPRZero(*MI)}]>; 1160b57cec5SDimitry Andricdef WriteImmZ : SchedWriteVariant<[ 1170b57cec5SDimitry Andric SchedVar<WriteZPred, [WriteX]>, 1180b57cec5SDimitry Andric SchedVar<NoSchedPred, [WriteImm]>]>; 1190b57cec5SDimitry Andricdef : InstRW<[WriteImmZ], (instrs MOVZWi,MOVZXi,ANDWri,ANDXri)>; 1200b57cec5SDimitry Andric 1210b57cec5SDimitry Andric// Move GPR is a register rename and single nop micro-op. 1220b57cec5SDimitry Andric// ORR Xd, XZR, Xm 1230b57cec5SDimitry Andric// ADD Xd, Xn, #0 1240b57cec5SDimitry Andricdef WriteIMovPred : SchedPredicate<[{TII->isGPRCopy(*MI)}]>; 1250b57cec5SDimitry Andricdef WriteVMovPred : SchedPredicate<[{TII->isFPRCopy(*MI)}]>; 1260b57cec5SDimitry Andricdef WriteMov : SchedWriteVariant<[ 1270b57cec5SDimitry Andric SchedVar<WriteIMovPred, [WriteX]>, 1280b57cec5SDimitry Andric SchedVar<WriteVMovPred, [WriteX]>, 1290b57cec5SDimitry Andric SchedVar<NoSchedPred, [WriteI]>]>; 1300b57cec5SDimitry Andricdef : InstRW<[WriteMov], (instrs COPY,ORRXrr,ADDXrr)>; 1310b57cec5SDimitry Andric 1320b57cec5SDimitry Andric// Move non-zero immediate is an integer ALU op. 1330b57cec5SDimitry Andric// MOVN,MOVZ,MOVK 1340b57cec5SDimitry Andricdef : WriteRes<WriteImm, [CyUnitI]>; 1350b57cec5SDimitry Andric 1360b57cec5SDimitry Andric//--- 1370b57cec5SDimitry Andric// 7.8.2-7.8.5. Arithmetic and Logical, Comparison, Conditional, 1380b57cec5SDimitry Andric// Shifts and Bitfield Operations 1390b57cec5SDimitry Andric//--- 1400b57cec5SDimitry Andric 1410b57cec5SDimitry Andric// ADR,ADRP 1420b57cec5SDimitry Andric// ADD(S)ri,SUB(S)ri,AND(S)ri,EORri,ORRri 1430b57cec5SDimitry Andric// ADD(S)rr,SUB(S)rr,AND(S)rr,BIC(S)rr,EONrr,EORrr,ORNrr,ORRrr 1440b57cec5SDimitry Andric// ADC(S),SBC(S) 1450b57cec5SDimitry Andric// Aliases: CMN, CMP, TST 1460b57cec5SDimitry Andric// 1470b57cec5SDimitry Andric// Conditional operations. 1480b57cec5SDimitry Andric// CCMNi,CCMPi,CCMNr,CCMPr, 1490b57cec5SDimitry Andric// CSEL,CSINC,CSINV,CSNEG 1500b57cec5SDimitry Andric// 1510b57cec5SDimitry Andric// Bit counting and reversal operations. 1520b57cec5SDimitry Andric// CLS,CLZ,RBIT,REV,REV16,REV32 1530b57cec5SDimitry Andricdef : WriteRes<WriteI, [CyUnitI]>; 1540b57cec5SDimitry Andric 1550b57cec5SDimitry Andric// ADD with shifted register operand is a single micro-op that 1560b57cec5SDimitry Andric// consumes a shift pipeline for two cycles. 1570b57cec5SDimitry Andric// ADD(S)rs,SUB(S)rs,AND(S)rs,BIC(S)rs,EONrs,EORrs,ORNrs,ORRrs 1580b57cec5SDimitry Andric// EXAMPLE: ADDrs Xn, Xm LSL #imm 1590b57cec5SDimitry Andricdef : WriteRes<WriteISReg, [CyUnitIS]> { 1600b57cec5SDimitry Andric let Latency = 2; 1615f757f3fSDimitry Andric let ReleaseAtCycles = [2]; 1620b57cec5SDimitry Andric} 1630b57cec5SDimitry Andric 1640b57cec5SDimitry Andric// ADD with extended register operand is the same as shifted reg operand. 1650b57cec5SDimitry Andric// ADD(S)re,SUB(S)re 1660b57cec5SDimitry Andric// EXAMPLE: ADDXre Xn, Xm, UXTB #1 1670b57cec5SDimitry Andricdef : WriteRes<WriteIEReg, [CyUnitIS]> { 1680b57cec5SDimitry Andric let Latency = 2; 1695f757f3fSDimitry Andric let ReleaseAtCycles = [2]; 1700b57cec5SDimitry Andric} 1710b57cec5SDimitry Andric 1720b57cec5SDimitry Andric// Variable shift and bitfield operations. 1730b57cec5SDimitry Andric// ASRV,LSLV,LSRV,RORV,BFM,SBFM,UBFM 1740b57cec5SDimitry Andricdef : WriteRes<WriteIS, [CyUnitIS]>; 1750b57cec5SDimitry Andric 1760b57cec5SDimitry Andric// EXTR Shifts a pair of registers and requires two micro-ops. 1770b57cec5SDimitry Andric// The second micro-op is delayed, as modeled by ReadExtrHi. 1780b57cec5SDimitry Andric// EXTR Xn, Xm, #imm 1790b57cec5SDimitry Andricdef : WriteRes<WriteExtr, [CyUnitIS, CyUnitIS]> { 1800b57cec5SDimitry Andric let Latency = 2; 1810b57cec5SDimitry Andric let NumMicroOps = 2; 1820b57cec5SDimitry Andric} 1830b57cec5SDimitry Andric 1840b57cec5SDimitry Andric// EXTR's first register read is delayed by one cycle, effectively 1850b57cec5SDimitry Andric// shortening its writer's latency. 1860b57cec5SDimitry Andric// EXTR Xn, Xm, #imm 1870b57cec5SDimitry Andricdef : ReadAdvance<ReadExtrHi, 1>; 1880b57cec5SDimitry Andric 1890b57cec5SDimitry Andric//--- 1900b57cec5SDimitry Andric// 7.8.6. Multiplies 1910b57cec5SDimitry Andric//--- 1920b57cec5SDimitry Andric 1930b57cec5SDimitry Andric// MUL/MNEG are aliases for MADD/MSUB. 1940b57cec5SDimitry Andric// MADDW,MSUBW,SMADDL,SMSUBL,UMADDL,UMSUBL 1950b57cec5SDimitry Andricdef : WriteRes<WriteIM32, [CyUnitIM]> { 1960b57cec5SDimitry Andric let Latency = 4; 1970b57cec5SDimitry Andric} 1980b57cec5SDimitry Andric// MADDX,MSUBX,SMULH,UMULH 1990b57cec5SDimitry Andricdef : WriteRes<WriteIM64, [CyUnitIM]> { 2000b57cec5SDimitry Andric let Latency = 5; 2010b57cec5SDimitry Andric} 2020b57cec5SDimitry Andric 2030b57cec5SDimitry Andric//--- 2040b57cec5SDimitry Andric// 7.8.7. Divide 2050b57cec5SDimitry Andric//--- 2060b57cec5SDimitry Andric 2070b57cec5SDimitry Andric// 32-bit divide takes 7-13 cycles. 10 cycles covers a 20-bit quotient. 2080b57cec5SDimitry Andric// The ID pipe is consumed for 2 cycles: issue and writeback. 2090b57cec5SDimitry Andric// SDIVW,UDIVW 2100b57cec5SDimitry Andricdef : WriteRes<WriteID32, [CyUnitID, CyUnitIntDiv]> { 2110b57cec5SDimitry Andric let Latency = 10; 2125f757f3fSDimitry Andric let ReleaseAtCycles = [2, 10]; 2130b57cec5SDimitry Andric} 2140b57cec5SDimitry Andric// 64-bit divide takes 7-21 cycles. 13 cycles covers a 32-bit quotient. 2150b57cec5SDimitry Andric// The ID pipe is consumed for 2 cycles: issue and writeback. 2160b57cec5SDimitry Andric// SDIVX,UDIVX 2170b57cec5SDimitry Andricdef : WriteRes<WriteID64, [CyUnitID, CyUnitIntDiv]> { 2180b57cec5SDimitry Andric let Latency = 13; 2195f757f3fSDimitry Andric let ReleaseAtCycles = [2, 13]; 2200b57cec5SDimitry Andric} 2210b57cec5SDimitry Andric 2220b57cec5SDimitry Andric//--- 2230b57cec5SDimitry Andric// 7.8.8,7.8.10. Load/Store, single element 2240b57cec5SDimitry Andric//--- 2250b57cec5SDimitry Andric 2260b57cec5SDimitry Andric// Integer loads take 4 cycles and use one LS unit for one cycle. 2270b57cec5SDimitry Andricdef : WriteRes<WriteLD, [CyUnitLS]> { 2280b57cec5SDimitry Andric let Latency = 4; 2290b57cec5SDimitry Andric} 2300b57cec5SDimitry Andric 2310b57cec5SDimitry Andric// Store-load forwarding is 4 cycles. 2320b57cec5SDimitry Andric// 2330b57cec5SDimitry Andric// Note: The store-exclusive sequence incorporates this 2340b57cec5SDimitry Andric// latency. However, general heuristics should not model the 2350b57cec5SDimitry Andric// dependence between a store and subsequent may-alias load because 2360b57cec5SDimitry Andric// hardware speculation works. 2370b57cec5SDimitry Andricdef : WriteRes<WriteST, [CyUnitLS]> { 2380b57cec5SDimitry Andric let Latency = 4; 2390b57cec5SDimitry Andric} 2400b57cec5SDimitry Andric 2410b57cec5SDimitry Andric// Load from base address plus an optionally scaled register offset. 2420b57cec5SDimitry Andric// Rt latency is latency WriteIS + WriteLD. 2430b57cec5SDimitry Andric// EXAMPLE: LDR Xn, Xm [, lsl 3] 2440b57cec5SDimitry Andricdef CyWriteLDIdx : SchedWriteVariant<[ 2450b57cec5SDimitry Andric SchedVar<ScaledIdxPred, [WriteIS, WriteLD]>, // Load from scaled register. 2460b57cec5SDimitry Andric SchedVar<NoSchedPred, [WriteLD]>]>; // Load from register offset. 2470b57cec5SDimitry Andricdef : SchedAlias<WriteLDIdx, CyWriteLDIdx>; // Map AArch64->Cyclone type. 2480b57cec5SDimitry Andric 2490b57cec5SDimitry Andric// EXAMPLE: STR Xn, Xm [, lsl 3] 2500b57cec5SDimitry Andricdef CyWriteSTIdx : SchedWriteVariant<[ 2510b57cec5SDimitry Andric SchedVar<ScaledIdxPred, [WriteIS, WriteST]>, // Store to scaled register. 2520b57cec5SDimitry Andric SchedVar<NoSchedPred, [WriteST]>]>; // Store to register offset. 2530b57cec5SDimitry Andricdef : SchedAlias<WriteSTIdx, CyWriteSTIdx>; // Map AArch64->Cyclone type. 2540b57cec5SDimitry Andric 2550b57cec5SDimitry Andric// Read the (unshifted) base register Xn in the second micro-op one cycle later. 2560b57cec5SDimitry Andric// EXAMPLE: LDR Xn, Xm [, lsl 3] 2570b57cec5SDimitry Andricdef ReadBaseRS : SchedReadAdvance<1>; 2580b57cec5SDimitry Andricdef CyReadAdrBase : SchedReadVariant<[ 2590b57cec5SDimitry Andric SchedVar<ScaledIdxPred, [ReadBaseRS]>, // Read base reg after shifting offset. 2600b57cec5SDimitry Andric SchedVar<NoSchedPred, [ReadDefault]>]>; // Read base reg with no shift. 2610b57cec5SDimitry Andricdef : SchedAlias<ReadAdrBase, CyReadAdrBase>; // Map AArch64->Cyclone type. 262349cc55cSDimitry Andricdef : ReadAdvance<ReadST, 0>; 2630b57cec5SDimitry Andric 2640b57cec5SDimitry Andric//--- 2650b57cec5SDimitry Andric// 7.8.9,7.8.11. Load/Store, paired 2660b57cec5SDimitry Andric//--- 2670b57cec5SDimitry Andric 2680b57cec5SDimitry Andric// Address pre/post increment is a simple ALU op with one cycle latency. 2690b57cec5SDimitry Andricdef : WriteRes<WriteAdr, [CyUnitI]>; 2700b57cec5SDimitry Andric 2710b57cec5SDimitry Andric// LDP high register write is fused with the load, but a nop micro-op remains. 2720b57cec5SDimitry Andricdef : WriteRes<WriteLDHi, []> { 2730b57cec5SDimitry Andric let Latency = 4; 2740b57cec5SDimitry Andric} 2750b57cec5SDimitry Andric 2760b57cec5SDimitry Andric// STP is a vector op and store, except for QQ, which is just two stores. 2770b57cec5SDimitry Andricdef : SchedAlias<WriteSTP, WriteVSTShuffle>; 2780b57cec5SDimitry Andricdef : InstRW<[WriteST, WriteST], (instrs STPQi)>; 2790b57cec5SDimitry Andric 2800b57cec5SDimitry Andric//--- 2810b57cec5SDimitry Andric// 7.8.13. Branches 2820b57cec5SDimitry Andric//--- 2830b57cec5SDimitry Andric 2840b57cec5SDimitry Andric// Branches take a single micro-op. 2850b57cec5SDimitry Andric// The misprediction penalty is defined as a SchedMachineModel property. 2860b57cec5SDimitry Andricdef : WriteRes<WriteBr, [CyUnitB]> {let Latency = 0;} 2870b57cec5SDimitry Andricdef : WriteRes<WriteBrReg, [CyUnitBR]> {let Latency = 0;} 2880b57cec5SDimitry Andric 2890b57cec5SDimitry Andric//--- 2900b57cec5SDimitry Andric// 7.8.14. Never-issued Instructions, Barrier and Hint Operations 2910b57cec5SDimitry Andric//--- 2920b57cec5SDimitry Andric 2930b57cec5SDimitry Andric// NOP,SEV,SEVL,WFE,WFI,YIELD 2940b57cec5SDimitry Andricdef : WriteRes<WriteHint, []> {let Latency = 0;} 2950b57cec5SDimitry Andric// ISB 2960b57cec5SDimitry Andricdef : InstRW<[WriteI], (instrs ISB)>; 2970b57cec5SDimitry Andric// SLREX,DMB,DSB 2980b57cec5SDimitry Andricdef : WriteRes<WriteBarrier, [CyUnitLS]>; 2990b57cec5SDimitry Andric 3000b57cec5SDimitry Andric// System instructions get an invalid latency because the latency of 3010b57cec5SDimitry Andric// other operations across them is meaningless. 3020b57cec5SDimitry Andricdef : WriteRes<WriteSys, []> {let Latency = -1;} 3030b57cec5SDimitry Andric 3040b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 3050b57cec5SDimitry Andric// 7.9 Vector Unit Instructions 3060b57cec5SDimitry Andric 3070b57cec5SDimitry Andric// Simple vector operations take 2 cycles. 308349cc55cSDimitry Andricdef : WriteRes<WriteVd, [CyUnitV]> {let Latency = 2;} 309349cc55cSDimitry Andricdef : WriteRes<WriteVq, [CyUnitV]> {let Latency = 2;} 3100b57cec5SDimitry Andric 3110b57cec5SDimitry Andric// Define some longer latency vector op types for Cyclone. 3120b57cec5SDimitry Andricdef CyWriteV3 : SchedWriteRes<[CyUnitV]> {let Latency = 3;} 3130b57cec5SDimitry Andricdef CyWriteV4 : SchedWriteRes<[CyUnitV]> {let Latency = 4;} 3140b57cec5SDimitry Andricdef CyWriteV5 : SchedWriteRes<[CyUnitV]> {let Latency = 5;} 3150b57cec5SDimitry Andricdef CyWriteV6 : SchedWriteRes<[CyUnitV]> {let Latency = 6;} 3160b57cec5SDimitry Andric 3170b57cec5SDimitry Andric// Simple floating-point operations take 2 cycles. 3180b57cec5SDimitry Andricdef : WriteRes<WriteF, [CyUnitV]> {let Latency = 2;} 3190b57cec5SDimitry Andric 3200b57cec5SDimitry Andric//--- 3210b57cec5SDimitry Andric// 7.9.1 Vector Moves 3220b57cec5SDimitry Andric//--- 3230b57cec5SDimitry Andric 3240b57cec5SDimitry Andric// TODO: Add Cyclone-specific zero-cycle zeros. LLVM currently 3250b57cec5SDimitry Andric// generates expensive int-float conversion instead: 3260b57cec5SDimitry Andric// FMOVDi Dd, #0.0 3270b57cec5SDimitry Andric// FMOVv2f64ns Vd.2d, #0.0 3280b57cec5SDimitry Andric 3290b57cec5SDimitry Andric// FMOVSi,FMOVDi 3300b57cec5SDimitry Andricdef : WriteRes<WriteFImm, [CyUnitV]> {let Latency = 2;} 3310b57cec5SDimitry Andric 3320b57cec5SDimitry Andric// MOVI,MVNI are WriteV 3330b57cec5SDimitry Andric// FMOVv2f32ns,FMOVv2f64ns,FMOVv4f32ns are WriteV 3340b57cec5SDimitry Andric 3350b57cec5SDimitry Andric// Move FPR is a register rename and single nop micro-op. 3360b57cec5SDimitry Andric// ORR.16b Vd,Vn,Vn 3370b57cec5SDimitry Andric// COPY is handled above in the WriteMov Variant. 3380b57cec5SDimitry Andricdef WriteVMov : SchedWriteVariant<[ 3390b57cec5SDimitry Andric SchedVar<WriteVMovPred, [WriteX]>, 340349cc55cSDimitry Andric SchedVar<NoSchedPred, [WriteVq]>]>; 3410b57cec5SDimitry Andricdef : InstRW<[WriteVMov], (instrs ORRv16i8)>; 3420b57cec5SDimitry Andric 3430b57cec5SDimitry Andric// FMOVSr,FMOVDr are WriteF. 3440b57cec5SDimitry Andric 3450b57cec5SDimitry Andric// MOV V,V is a WriteV. 3460b57cec5SDimitry Andric 3470b57cec5SDimitry Andric// CPY D,V[x] is a WriteV 3480b57cec5SDimitry Andric 3490b57cec5SDimitry Andric// INS V[x],V[y] is a WriteV. 3500b57cec5SDimitry Andric 3510b57cec5SDimitry Andric// FMOVWSr,FMOVXDr,FMOVXDHighr 3520b57cec5SDimitry Andricdef : WriteRes<WriteFCopy, [CyUnitLS]> { 3530b57cec5SDimitry Andric let Latency = 5; 3540b57cec5SDimitry Andric} 3550b57cec5SDimitry Andric 3560b57cec5SDimitry Andric// FMOVSWr,FMOVDXr 3570b57cec5SDimitry Andricdef : InstRW<[WriteLD], (instrs FMOVSWr,FMOVDXr,FMOVDXHighr)>; 3580b57cec5SDimitry Andric 3590b57cec5SDimitry Andric// INS V[x],R 360349cc55cSDimitry Andricdef CyWriteCopyToFPR : WriteSequence<[WriteVLD, WriteVq]>; 3610b57cec5SDimitry Andricdef : InstRW<[CyWriteCopyToFPR], (instregex "INSv")>; 3620b57cec5SDimitry Andric 3630b57cec5SDimitry Andric// SMOV,UMOV R,V[x] 3640b57cec5SDimitry Andricdef CyWriteCopyToGPR : WriteSequence<[WriteLD, WriteI]>; 3650b57cec5SDimitry Andricdef : InstRW<[CyWriteCopyToGPR], (instregex "SMOVv","UMOVv")>; 3660b57cec5SDimitry Andric 3670b57cec5SDimitry Andric// DUP V,R 3680b57cec5SDimitry Andricdef : InstRW<[CyWriteCopyToFPR], (instregex "DUPv")>; 3690b57cec5SDimitry Andric 3700b57cec5SDimitry Andric// DUP V,V[x] is a WriteV. 3710b57cec5SDimitry Andric 3720b57cec5SDimitry Andric//--- 3730b57cec5SDimitry Andric// 7.9.2 Integer Arithmetic, Logical, and Comparisons 3740b57cec5SDimitry Andric//--- 3750b57cec5SDimitry Andric 3760b57cec5SDimitry Andric// BIC,ORR V,#imm are WriteV 3770b57cec5SDimitry Andric 3780b57cec5SDimitry Andricdef : InstRW<[CyWriteV3], (instregex "ABSv")>; 3790b57cec5SDimitry Andric 3800b57cec5SDimitry Andric// MVN,NEG,NOT are WriteV 3810b57cec5SDimitry Andric 3820b57cec5SDimitry Andricdef : InstRW<[CyWriteV3], (instregex "SQABSv","SQNEGv")>; 3830b57cec5SDimitry Andric 3840b57cec5SDimitry Andric// ADDP is a WriteV. 3850b57cec5SDimitry Andricdef CyWriteVADDLP : SchedWriteRes<[CyUnitV]> {let Latency = 2;} 3860b57cec5SDimitry Andricdef : InstRW<[CyWriteVADDLP], (instregex "SADDLPv","UADDLPv")>; 3870b57cec5SDimitry Andric 3880b57cec5SDimitry Andricdef : InstRW<[CyWriteV3], 3890b57cec5SDimitry Andric (instregex "ADDVv","SMAXVv","UMAXVv","SMINVv","UMINVv")>; 3900b57cec5SDimitry Andric 3910b57cec5SDimitry Andricdef : InstRW<[CyWriteV3], (instregex "SADDLV","UADDLV")>; 3920b57cec5SDimitry Andric 3930b57cec5SDimitry Andric// ADD,SUB are WriteV 3940b57cec5SDimitry Andric 3950b57cec5SDimitry Andric// Forward declare. 3960b57cec5SDimitry Andricdef CyWriteVABD : SchedWriteRes<[CyUnitV]> {let Latency = 3;} 3970b57cec5SDimitry Andric 3980b57cec5SDimitry Andric// Add/Diff and accumulate uses the vector multiply unit. 3990b57cec5SDimitry Andricdef CyWriteVAccum : SchedWriteRes<[CyUnitVM]> {let Latency = 3;} 4000b57cec5SDimitry Andricdef CyReadVAccum : SchedReadAdvance<1, 4010b57cec5SDimitry Andric [CyWriteVAccum, CyWriteVADDLP, CyWriteVABD]>; 4020b57cec5SDimitry Andric 4030b57cec5SDimitry Andricdef : InstRW<[CyWriteVAccum, CyReadVAccum], 4040b57cec5SDimitry Andric (instregex "SADALP","UADALP")>; 4050b57cec5SDimitry Andric 4060b57cec5SDimitry Andricdef : InstRW<[CyWriteVAccum, CyReadVAccum], 4070b57cec5SDimitry Andric (instregex "SABAv","UABAv","SABALv","UABALv")>; 4080b57cec5SDimitry Andric 4090b57cec5SDimitry Andricdef : InstRW<[CyWriteV3], (instregex "SQADDv","SQSUBv","UQADDv","UQSUBv")>; 4100b57cec5SDimitry Andric 4110b57cec5SDimitry Andricdef : InstRW<[CyWriteV3], (instregex "SUQADDv","USQADDv")>; 4120b57cec5SDimitry Andric 4130b57cec5SDimitry Andricdef : InstRW<[CyWriteV4], (instregex "ADDHNv","RADDHNv", "RSUBHNv", "SUBHNv")>; 4140b57cec5SDimitry Andric 4150b57cec5SDimitry Andric// WriteV includes: 4160b57cec5SDimitry Andric// AND,BIC,CMTST,EOR,ORN,ORR 4170b57cec5SDimitry Andric// ADDP 4180b57cec5SDimitry Andric// SHADD,SHSUB,SRHADD,UHADD,UHSUB,URHADD 4190b57cec5SDimitry Andric// SADDL,SSUBL,UADDL,USUBL 4200b57cec5SDimitry Andric// SADDW,SSUBW,UADDW,USUBW 4210b57cec5SDimitry Andric 4220b57cec5SDimitry Andricdef : InstRW<[CyWriteV3], (instregex "CMEQv","CMGEv","CMGTv", 4230b57cec5SDimitry Andric "CMLEv","CMLTv", 4240b57cec5SDimitry Andric "CMHIv","CMHSv")>; 4250b57cec5SDimitry Andric 4260b57cec5SDimitry Andricdef : InstRW<[CyWriteV3], (instregex "SMAXv","SMINv","UMAXv","UMINv", 4270b57cec5SDimitry Andric "SMAXPv","SMINPv","UMAXPv","UMINPv")>; 4280b57cec5SDimitry Andric 4290b57cec5SDimitry Andricdef : InstRW<[CyWriteVABD], (instregex "SABDv","UABDv", 4300b57cec5SDimitry Andric "SABDLv","UABDLv")>; 4310b57cec5SDimitry Andric 4320b57cec5SDimitry Andric//--- 4330b57cec5SDimitry Andric// 7.9.3 Floating Point Arithmetic and Comparisons 4340b57cec5SDimitry Andric//--- 4350b57cec5SDimitry Andric 4360b57cec5SDimitry Andric// FABS,FNEG are WriteF 4370b57cec5SDimitry Andric 4380b57cec5SDimitry Andricdef : InstRW<[CyWriteV4], (instrs FADDPv2i32p)>; 4390b57cec5SDimitry Andricdef : InstRW<[CyWriteV5], (instrs FADDPv2i64p)>; 4400b57cec5SDimitry Andric 4410b57cec5SDimitry Andricdef : InstRW<[CyWriteV3], (instregex "FMAXPv2i","FMAXNMPv2i", 4420b57cec5SDimitry Andric "FMINPv2i","FMINNMPv2i")>; 4430b57cec5SDimitry Andric 4440b57cec5SDimitry Andricdef : InstRW<[CyWriteV4], (instregex "FMAXVv","FMAXNMVv","FMINVv","FMINNMVv")>; 4450b57cec5SDimitry Andric 4460b57cec5SDimitry Andricdef : InstRW<[CyWriteV4], (instrs FADDSrr,FADDv2f32,FADDv4f32, 4470b57cec5SDimitry Andric FSUBSrr,FSUBv2f32,FSUBv4f32, 4480b57cec5SDimitry Andric FADDPv2f32,FADDPv4f32, 4490b57cec5SDimitry Andric FABD32,FABDv2f32,FABDv4f32)>; 4500b57cec5SDimitry Andricdef : InstRW<[CyWriteV5], (instrs FADDDrr,FADDv2f64, 4510b57cec5SDimitry Andric FSUBDrr,FSUBv2f64, 4520b57cec5SDimitry Andric FADDPv2f64, 4530b57cec5SDimitry Andric FABD64,FABDv2f64)>; 4540b57cec5SDimitry Andric 4550b57cec5SDimitry Andricdef : InstRW<[CyWriteV3], (instregex "FCMEQ","FCMGT","FCMLE","FCMLT")>; 4560b57cec5SDimitry Andric 4570b57cec5SDimitry Andricdef : InstRW<[CyWriteV3], (instregex "FACGE","FACGT", 4580b57cec5SDimitry Andric "FMAXS","FMAXD","FMAXv", 4590b57cec5SDimitry Andric "FMINS","FMIND","FMINv", 4600b57cec5SDimitry Andric "FMAXNMS","FMAXNMD","FMAXNMv", 4610b57cec5SDimitry Andric "FMINNMS","FMINNMD","FMINNMv", 4620b57cec5SDimitry Andric "FMAXPv2f","FMAXPv4f", 4630b57cec5SDimitry Andric "FMINPv2f","FMINPv4f", 4640b57cec5SDimitry Andric "FMAXNMPv2f","FMAXNMPv4f", 4650b57cec5SDimitry Andric "FMINNMPv2f","FMINNMPv4f")>; 4660b57cec5SDimitry Andric 4670b57cec5SDimitry Andric// FCMP,FCMPE,FCCMP,FCCMPE 4680b57cec5SDimitry Andricdef : WriteRes<WriteFCmp, [CyUnitVC]> {let Latency = 4;} 4690b57cec5SDimitry Andric 4700b57cec5SDimitry Andric// FCSEL is a WriteF. 4710b57cec5SDimitry Andric 4720b57cec5SDimitry Andric//--- 4730b57cec5SDimitry Andric// 7.9.4 Shifts and Bitfield Operations 4740b57cec5SDimitry Andric//--- 4750b57cec5SDimitry Andric 4760b57cec5SDimitry Andric// SHL is a WriteV 4770b57cec5SDimitry Andric 4780b57cec5SDimitry Andricdef CyWriteVSHR : SchedWriteRes<[CyUnitV]> {let Latency = 2;} 4790b57cec5SDimitry Andricdef : InstRW<[CyWriteVSHR], (instregex "SSHRv","USHRv")>; 4800b57cec5SDimitry Andric 4810b57cec5SDimitry Andricdef CyWriteVSRSHR : SchedWriteRes<[CyUnitV]> {let Latency = 3;} 4820b57cec5SDimitry Andricdef : InstRW<[CyWriteVSRSHR], (instregex "SRSHRv","URSHRv")>; 4830b57cec5SDimitry Andric 4840b57cec5SDimitry Andric// Shift and accumulate uses the vector multiply unit. 4850b57cec5SDimitry Andricdef CyWriteVShiftAcc : SchedWriteRes<[CyUnitVM]> {let Latency = 3;} 4860b57cec5SDimitry Andricdef CyReadVShiftAcc : SchedReadAdvance<1, 4870b57cec5SDimitry Andric [CyWriteVShiftAcc, CyWriteVSHR, CyWriteVSRSHR]>; 4880b57cec5SDimitry Andricdef : InstRW<[CyWriteVShiftAcc, CyReadVShiftAcc], 4890b57cec5SDimitry Andric (instregex "SRSRAv","SSRAv","URSRAv","USRAv")>; 4900b57cec5SDimitry Andric 4910b57cec5SDimitry Andric// SSHL,USHL are WriteV. 4920b57cec5SDimitry Andric 4930b57cec5SDimitry Andricdef : InstRW<[CyWriteV3], (instregex "SRSHLv","URSHLv")>; 4940b57cec5SDimitry Andric 4950b57cec5SDimitry Andric// SQSHL,SQSHLU,UQSHL are WriteV. 4960b57cec5SDimitry Andric 4970b57cec5SDimitry Andricdef : InstRW<[CyWriteV3], (instregex "SQRSHLv","UQRSHLv")>; 4980b57cec5SDimitry Andric 4990b57cec5SDimitry Andric// WriteV includes: 5000b57cec5SDimitry Andric// SHLL,SSHLL,USHLL 5010b57cec5SDimitry Andric// SLI,SRI 5025ffd83dbSDimitry Andric// BIF,BIT,BSL,BSP 5030b57cec5SDimitry Andric// EXT 5040b57cec5SDimitry Andric// CLS,CLZ,CNT,RBIT,REV16,REV32,REV64,XTN 5050b57cec5SDimitry Andric// XTN2 5060b57cec5SDimitry Andric 5070b57cec5SDimitry Andricdef : InstRW<[CyWriteV4], 5080b57cec5SDimitry Andric (instregex "RSHRNv","SHRNv", 5090b57cec5SDimitry Andric "SQRSHRNv","SQRSHRUNv","SQSHRNv","SQSHRUNv", 5100b57cec5SDimitry Andric "UQRSHRNv","UQSHRNv","SQXTNv","SQXTUNv","UQXTNv")>; 5110b57cec5SDimitry Andric 5120b57cec5SDimitry Andric//--- 5130b57cec5SDimitry Andric// 7.9.5 Multiplication 5140b57cec5SDimitry Andric//--- 5150b57cec5SDimitry Andric 5160b57cec5SDimitry Andricdef CyWriteVMul : SchedWriteRes<[CyUnitVM]> { let Latency = 4;} 5170b57cec5SDimitry Andricdef : InstRW<[CyWriteVMul], (instregex "MULv","SMULLv","UMULLv", 5180b57cec5SDimitry Andric "SQDMULLv","SQDMULHv","SQRDMULHv")>; 5190b57cec5SDimitry Andric 5200b57cec5SDimitry Andric// FMUL,FMULX,FNMUL default to WriteFMul. 5210b57cec5SDimitry Andricdef : WriteRes<WriteFMul, [CyUnitVM]> { let Latency = 4;} 5220b57cec5SDimitry Andric 5230b57cec5SDimitry Andricdef CyWriteV64Mul : SchedWriteRes<[CyUnitVM]> { let Latency = 5;} 5240b57cec5SDimitry Andricdef : InstRW<[CyWriteV64Mul], (instrs FMULDrr,FMULv2f64,FMULv2i64_indexed, 5250b57cec5SDimitry Andric FNMULDrr,FMULX64,FMULXv2f64,FMULXv2i64_indexed)>; 5260b57cec5SDimitry Andric 5270b57cec5SDimitry Andricdef CyReadVMulAcc : SchedReadAdvance<1, [CyWriteVMul, CyWriteV64Mul]>; 5280b57cec5SDimitry Andricdef : InstRW<[CyWriteVMul, CyReadVMulAcc], 5290b57cec5SDimitry Andric (instregex "MLA","MLS","SMLAL","SMLSL","UMLAL","UMLSL", 5300b57cec5SDimitry Andric "SQDMLAL","SQDMLSL")>; 5310b57cec5SDimitry Andric 5320b57cec5SDimitry Andricdef CyWriteSMul : SchedWriteRes<[CyUnitVM]> { let Latency = 8;} 5330b57cec5SDimitry Andricdef CyWriteDMul : SchedWriteRes<[CyUnitVM]> { let Latency = 10;} 5340b57cec5SDimitry Andricdef CyReadSMul : SchedReadAdvance<4, [CyWriteSMul]>; 5350b57cec5SDimitry Andricdef CyReadDMul : SchedReadAdvance<5, [CyWriteDMul]>; 5360b57cec5SDimitry Andric 5370b57cec5SDimitry Andricdef : InstRW<[CyWriteSMul, CyReadSMul], 5380b57cec5SDimitry Andric (instrs FMADDSrrr,FMSUBSrrr,FNMADDSrrr,FNMSUBSrrr, 5390b57cec5SDimitry Andric FMLAv2f32,FMLAv4f32, 5400b57cec5SDimitry Andric FMLAv1i32_indexed,FMLAv1i64_indexed,FMLAv2i32_indexed)>; 5410b57cec5SDimitry Andricdef : InstRW<[CyWriteDMul, CyReadDMul], 5420b57cec5SDimitry Andric (instrs FMADDDrrr,FMSUBDrrr,FNMADDDrrr,FNMSUBDrrr, 5430b57cec5SDimitry Andric FMLAv2f64,FMLAv2i64_indexed, 5440b57cec5SDimitry Andric FMLSv2f64,FMLSv2i64_indexed)>; 5450b57cec5SDimitry Andric 5460b57cec5SDimitry Andricdef CyWritePMUL : SchedWriteRes<[CyUnitVD]> { let Latency = 3; } 5470b57cec5SDimitry Andricdef : InstRW<[CyWritePMUL], (instregex "PMULv", "PMULLv")>; 5480b57cec5SDimitry Andric 5490b57cec5SDimitry Andric//--- 5500b57cec5SDimitry Andric// 7.9.6 Divide and Square Root 5510b57cec5SDimitry Andric//--- 5520b57cec5SDimitry Andric 5530b57cec5SDimitry Andric// FDIV,FSQRT 5540b57cec5SDimitry Andric// TODO: Add 64-bit variant with 19 cycle latency. 5550b57cec5SDimitry Andric// TODO: Specialize FSQRT for longer latency. 5560b57cec5SDimitry Andricdef : WriteRes<WriteFDiv, [CyUnitVD, CyUnitFloatDiv]> { 5570b57cec5SDimitry Andric let Latency = 17; 5585f757f3fSDimitry Andric let ReleaseAtCycles = [2, 17]; 5590b57cec5SDimitry Andric} 5600b57cec5SDimitry Andric 5610b57cec5SDimitry Andricdef : InstRW<[CyWriteV4], (instregex "FRECPEv","FRECPXv","URECPEv","URSQRTEv")>; 5620b57cec5SDimitry Andric 5630b57cec5SDimitry Andricdef WriteFRSQRTE : SchedWriteRes<[CyUnitVM]> { let Latency = 4; } 5640b57cec5SDimitry Andricdef : InstRW<[WriteFRSQRTE], (instregex "FRSQRTEv")>; 5650b57cec5SDimitry Andric 5660b57cec5SDimitry Andricdef WriteFRECPS : SchedWriteRes<[CyUnitVM]> { let Latency = 8; } 5670b57cec5SDimitry Andricdef WriteFRSQRTS : SchedWriteRes<[CyUnitVM]> { let Latency = 10; } 5680b57cec5SDimitry Andricdef : InstRW<[WriteFRECPS], (instregex "FRECPSv")>; 5690b57cec5SDimitry Andricdef : InstRW<[WriteFRSQRTS], (instregex "FRSQRTSv")>; 5700b57cec5SDimitry Andric 5710b57cec5SDimitry Andric//--- 5720b57cec5SDimitry Andric// 7.9.7 Integer-FP Conversions 5730b57cec5SDimitry Andric//--- 5740b57cec5SDimitry Andric 5750b57cec5SDimitry Andric// FCVT lengthen f16/s32 576349cc55cSDimitry Andricdef : InstRW<[WriteVq], (instrs FCVTSHr,FCVTDHr,FCVTDSr)>; 5770b57cec5SDimitry Andric 5780b57cec5SDimitry Andric// FCVT,FCVTN,FCVTXN 5790b57cec5SDimitry Andric// SCVTF,UCVTF V,V 5800b57cec5SDimitry Andric// FRINT(AIMNPXZ) V,V 5810b57cec5SDimitry Andricdef : WriteRes<WriteFCvt, [CyUnitV]> {let Latency = 4;} 5820b57cec5SDimitry Andric 5830b57cec5SDimitry Andric// SCVT/UCVT S/D, Rd = VLD5+V4: 9 cycles. 5840b57cec5SDimitry Andricdef CyWriteCvtToFPR : WriteSequence<[WriteVLD, CyWriteV4]>; 5850b57cec5SDimitry Andricdef : InstRW<[CyWriteCopyToFPR], (instregex "FCVT[AMNPZ][SU][SU][WX][SD]r")>; 5860b57cec5SDimitry Andric 5870b57cec5SDimitry Andric// FCVT Rd, S/D = V6+LD4: 10 cycles 5880b57cec5SDimitry Andricdef CyWriteCvtToGPR : WriteSequence<[CyWriteV6, WriteLD]>; 5890b57cec5SDimitry Andricdef : InstRW<[CyWriteCvtToGPR], (instregex "[SU]CVTF[SU][WX][SD]r")>; 5900b57cec5SDimitry Andric 5910b57cec5SDimitry Andric// FCVTL is a WriteV 5920b57cec5SDimitry Andric 5930b57cec5SDimitry Andric//--- 5940b57cec5SDimitry Andric// 7.9.8-7.9.10 Cryptography, Data Transposition, Table Lookup 5950b57cec5SDimitry Andric//--- 5960b57cec5SDimitry Andric 5970b57cec5SDimitry Andricdef CyWriteCrypto2 : SchedWriteRes<[CyUnitVD]> {let Latency = 2;} 5980b57cec5SDimitry Andricdef : InstRW<[CyWriteCrypto2], (instrs AESIMCrr, AESMCrr, SHA1Hrr, 5990b57cec5SDimitry Andric AESDrr, AESErr, SHA1SU1rr, SHA256SU0rr, 6000b57cec5SDimitry Andric SHA1SU0rrr)>; 6010b57cec5SDimitry Andric 6020b57cec5SDimitry Andricdef CyWriteCrypto3 : SchedWriteRes<[CyUnitVD]> {let Latency = 3;} 6030b57cec5SDimitry Andricdef : InstRW<[CyWriteCrypto3], (instrs SHA256SU1rrr)>; 6040b57cec5SDimitry Andric 6050b57cec5SDimitry Andricdef CyWriteCrypto6 : SchedWriteRes<[CyUnitVD]> {let Latency = 6;} 6060b57cec5SDimitry Andricdef : InstRW<[CyWriteCrypto6], (instrs SHA1Crrr, SHA1Mrrr, SHA1Prrr, 6070b57cec5SDimitry Andric SHA256Hrrr,SHA256H2rrr)>; 6080b57cec5SDimitry Andric 6090b57cec5SDimitry Andric// TRN,UZP,ZUP are WriteV. 6100b57cec5SDimitry Andric 6110b57cec5SDimitry Andric// TBL,TBX are WriteV. 6120b57cec5SDimitry Andric 6130b57cec5SDimitry Andric//--- 6140b57cec5SDimitry Andric// 7.9.11-7.9.14 Load/Store, single element and paired 6150b57cec5SDimitry Andric//--- 6160b57cec5SDimitry Andric 6170b57cec5SDimitry Andric// Loading into the vector unit takes 5 cycles vs 4 for integer loads. 6180b57cec5SDimitry Andricdef : WriteRes<WriteVLD, [CyUnitLS]> { 6190b57cec5SDimitry Andric let Latency = 5; 6200b57cec5SDimitry Andric} 6210b57cec5SDimitry Andric 6220b57cec5SDimitry Andric// Store-load forwarding is 4 cycles. 6230b57cec5SDimitry Andricdef : WriteRes<WriteVST, [CyUnitLS]> { 6240b57cec5SDimitry Andric let Latency = 4; 6250b57cec5SDimitry Andric} 6260b57cec5SDimitry Andric 6270b57cec5SDimitry Andric// WriteVLDPair/VSTPair sequences are expanded by the target description. 6280b57cec5SDimitry Andric 6290b57cec5SDimitry Andric//--- 6300b57cec5SDimitry Andric// 7.9.15 Load, element operations 6310b57cec5SDimitry Andric//--- 6320b57cec5SDimitry Andric 6330b57cec5SDimitry Andric// Only the first WriteVLD and WriteAdr for writeback matches def operands. 6340b57cec5SDimitry Andric// Subsequent WriteVLDs consume resources. Since all loaded values have the 6350b57cec5SDimitry Andric// same latency, this is acceptable. 6360b57cec5SDimitry Andric 6370b57cec5SDimitry Andric// Vd is read 5 cycles after issuing the vector load. 6380b57cec5SDimitry Andricdef : ReadAdvance<ReadVLD, 5>; 6390b57cec5SDimitry Andric 6400b57cec5SDimitry Andricdef : InstRW<[WriteVLD], 6410b57cec5SDimitry Andric (instregex "LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 6420b57cec5SDimitry Andricdef : InstRW<[WriteVLD, WriteAdr], 6430b57cec5SDimitry Andric (instregex "LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST")>; 6440b57cec5SDimitry Andric 6450b57cec5SDimitry Andric// Register writes from the load's high half are fused micro-ops. 6460b57cec5SDimitry Andricdef : InstRW<[WriteVLD], 6470b57cec5SDimitry Andric (instregex "LD1Twov(8b|4h|2s|1d)$")>; 6480b57cec5SDimitry Andricdef : InstRW<[WriteVLD, WriteAdr], 6490b57cec5SDimitry Andric (instregex "LD1Twov(8b|4h|2s|1d)_POST")>; 6500b57cec5SDimitry Andricdef : InstRW<[WriteVLD, WriteVLD], 6510b57cec5SDimitry Andric (instregex "LD1Twov(16b|8h|4s|2d)$")>; 6520b57cec5SDimitry Andricdef : InstRW<[WriteVLD, WriteAdr, WriteVLD], 6530b57cec5SDimitry Andric (instregex "LD1Twov(16b|8h|4s|2d)_POST")>; 6540b57cec5SDimitry Andric 6550b57cec5SDimitry Andricdef : InstRW<[WriteVLD, WriteVLD], 6560b57cec5SDimitry Andric (instregex "LD1Threev(8b|4h|2s|1d)$")>; 6570b57cec5SDimitry Andricdef : InstRW<[WriteVLD, WriteAdr, WriteVLD], 6580b57cec5SDimitry Andric (instregex "LD1Threev(8b|4h|2s|1d)_POST")>; 6590b57cec5SDimitry Andricdef : InstRW<[WriteVLD, WriteVLD, WriteVLD], 6600b57cec5SDimitry Andric (instregex "LD1Threev(16b|8h|4s|2d)$")>; 6610b57cec5SDimitry Andricdef : InstRW<[WriteVLD, WriteAdr, WriteVLD, WriteVLD], 6620b57cec5SDimitry Andric (instregex "LD1Threev(16b|8h|4s|2d)_POST")>; 6630b57cec5SDimitry Andric 6640b57cec5SDimitry Andricdef : InstRW<[WriteVLD, WriteVLD], 6650b57cec5SDimitry Andric (instregex "LD1Fourv(8b|4h|2s|1d)$")>; 6660b57cec5SDimitry Andricdef : InstRW<[WriteVLD, WriteAdr, WriteVLD], 6670b57cec5SDimitry Andric (instregex "LD1Fourv(8b|4h|2s|1d)_POST")>; 6680b57cec5SDimitry Andricdef : InstRW<[WriteVLD, WriteVLD, WriteVLD, WriteVLD], 6690b57cec5SDimitry Andric (instregex "LD1Fourv(16b|8h|4s|2d)$")>; 6700b57cec5SDimitry Andricdef : InstRW<[WriteVLD, WriteAdr, WriteVLD, WriteVLD, WriteVLD], 6710b57cec5SDimitry Andric (instregex "LD1Fourv(16b|8h|4s|2d)_POST")>; 6720b57cec5SDimitry Andric 6730b57cec5SDimitry Andricdef : InstRW<[WriteVLDShuffle, ReadVLD], 6740b57cec5SDimitry Andric (instregex "LD1i(8|16|32)$")>; 6750b57cec5SDimitry Andricdef : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr], 6760b57cec5SDimitry Andric (instregex "LD1i(8|16|32)_POST")>; 6770b57cec5SDimitry Andric 6780b57cec5SDimitry Andricdef : InstRW<[WriteVLDShuffle, ReadVLD], (instrs LD1i64)>; 6790b57cec5SDimitry Andricdef : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr],(instrs LD1i64_POST)>; 6800b57cec5SDimitry Andric 6810b57cec5SDimitry Andricdef : InstRW<[WriteVLDShuffle], 6820b57cec5SDimitry Andric (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 6830b57cec5SDimitry Andricdef : InstRW<[WriteVLDShuffle, WriteAdr], 6840b57cec5SDimitry Andric (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; 6850b57cec5SDimitry Andric 686349cc55cSDimitry Andricdef : InstRW<[WriteVLDShuffle, WriteVq], 6870b57cec5SDimitry Andric (instregex "LD2Twov(8b|4h|2s)$")>; 688349cc55cSDimitry Andricdef : InstRW<[WriteVLDShuffle, WriteAdr, WriteVq], 6890b57cec5SDimitry Andric (instregex "LD2Twov(8b|4h|2s)_POST$")>; 6900b57cec5SDimitry Andricdef : InstRW<[WriteVLDShuffle, WriteVLDShuffle], 6910b57cec5SDimitry Andric (instregex "LD2Twov(16b|8h|4s|2d)$")>; 6920b57cec5SDimitry Andricdef : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle], 6930b57cec5SDimitry Andric (instregex "LD2Twov(16b|8h|4s|2d)_POST")>; 6940b57cec5SDimitry Andric 695349cc55cSDimitry Andricdef : InstRW<[WriteVLDShuffle, ReadVLD, WriteVq], 6960b57cec5SDimitry Andric (instregex "LD2i(8|16|32)$")>; 697349cc55cSDimitry Andricdef : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteVq], 6980b57cec5SDimitry Andric (instregex "LD2i(8|16|32)_POST")>; 699349cc55cSDimitry Andricdef : InstRW<[WriteVLDShuffle, ReadVLD, WriteVq], 7000b57cec5SDimitry Andric (instregex "LD2i64$")>; 701349cc55cSDimitry Andricdef : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteVq], 7020b57cec5SDimitry Andric (instregex "LD2i64_POST")>; 7030b57cec5SDimitry Andric 704349cc55cSDimitry Andricdef : InstRW<[WriteVLDShuffle, WriteVq], 7050b57cec5SDimitry Andric (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 706349cc55cSDimitry Andricdef : InstRW<[WriteVLDShuffle, WriteAdr, WriteVq], 7070b57cec5SDimitry Andric (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST")>; 7080b57cec5SDimitry Andric 709349cc55cSDimitry Andricdef : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteVq], 7100b57cec5SDimitry Andric (instregex "LD3Threev(8b|4h|2s)$")>; 711349cc55cSDimitry Andricdef : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteVq], 7120b57cec5SDimitry Andric (instregex "LD3Threev(8b|4h|2s)_POST")>; 7130b57cec5SDimitry Andricdef : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteVLDShuffle], 7140b57cec5SDimitry Andric (instregex "LD3Threev(16b|8h|4s|2d)$")>; 7150b57cec5SDimitry Andricdef : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteVLDShuffle], 7160b57cec5SDimitry Andric (instregex "LD3Threev(16b|8h|4s|2d)_POST")>; 7170b57cec5SDimitry Andric 718349cc55cSDimitry Andricdef : InstRW<[WriteVLDShuffle, ReadVLD, WriteVq, WriteVq], 7190b57cec5SDimitry Andric (instregex "LD3i(8|16|32)$")>; 720349cc55cSDimitry Andricdef : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteVq, WriteVq], 7210b57cec5SDimitry Andric (instregex "LD3i(8|16|32)_POST")>; 7220b57cec5SDimitry Andric 723349cc55cSDimitry Andricdef : InstRW<[WriteVLDShuffle, ReadVLD, WriteVLDShuffle, WriteVq], 7240b57cec5SDimitry Andric (instregex "LD3i64$")>; 725349cc55cSDimitry Andricdef : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteVLDShuffle, WriteVq], 7260b57cec5SDimitry Andric (instregex "LD3i64_POST")>; 7270b57cec5SDimitry Andric 728349cc55cSDimitry Andricdef : InstRW<[WriteVLDShuffle, WriteVq, WriteVq], 7290b57cec5SDimitry Andric (instregex "LD3Rv(8b|4h|2s|16b|8h|4s)$")>; 730349cc55cSDimitry Andricdef : InstRW<[WriteVLDShuffle, WriteAdr, WriteVq, WriteVq], 7310b57cec5SDimitry Andric (instregex "LD3Rv(8b|4h|2s|16b|8h|4s)_POST")>; 7320b57cec5SDimitry Andric 733349cc55cSDimitry Andricdef : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteVq], 7340b57cec5SDimitry Andric (instrs LD3Rv1d,LD3Rv2d)>; 735349cc55cSDimitry Andricdef : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteVq], 7360b57cec5SDimitry Andric (instrs LD3Rv1d_POST,LD3Rv2d_POST)>; 7370b57cec5SDimitry Andric 738349cc55cSDimitry Andricdef : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteVq, WriteVq], 7390b57cec5SDimitry Andric (instregex "LD4Fourv(8b|4h|2s)$")>; 740349cc55cSDimitry Andricdef : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteVq, WriteVq], 7410b57cec5SDimitry Andric (instregex "LD4Fourv(8b|4h|2s)_POST")>; 7420b57cec5SDimitry Andricdef : InstRW<[WriteVLDPairShuffle, WriteVLDPairShuffle, 7430b57cec5SDimitry Andric WriteVLDPairShuffle, WriteVLDPairShuffle], 7440b57cec5SDimitry Andric (instregex "LD4Fourv(16b|8h|4s|2d)$")>; 7450b57cec5SDimitry Andricdef : InstRW<[WriteVLDPairShuffle, WriteAdr, WriteVLDPairShuffle, 7460b57cec5SDimitry Andric WriteVLDPairShuffle, WriteVLDPairShuffle], 7470b57cec5SDimitry Andric (instregex "LD4Fourv(16b|8h|4s|2d)_POST")>; 7480b57cec5SDimitry Andric 749349cc55cSDimitry Andricdef : InstRW<[WriteVLDShuffle, ReadVLD, WriteVq, WriteVq, WriteVq], 7500b57cec5SDimitry Andric (instregex "LD4i(8|16|32)$")>; 751349cc55cSDimitry Andricdef : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteVq, WriteVq, WriteVq], 7520b57cec5SDimitry Andric (instregex "LD4i(8|16|32)_POST")>; 7530b57cec5SDimitry Andric 7540b57cec5SDimitry Andric 755349cc55cSDimitry Andricdef : InstRW<[WriteVLDShuffle, ReadVLD, WriteVLDShuffle, WriteVq, WriteVq], 7560b57cec5SDimitry Andric (instrs LD4i64)>; 757349cc55cSDimitry Andricdef : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteVLDShuffle, WriteVq], 7580b57cec5SDimitry Andric (instrs LD4i64_POST)>; 7590b57cec5SDimitry Andric 760349cc55cSDimitry Andricdef : InstRW<[WriteVLDShuffle, WriteVq, WriteVq, WriteVq], 7610b57cec5SDimitry Andric (instregex "LD4Rv(8b|4h|2s|16b|8h|4s)$")>; 762349cc55cSDimitry Andricdef : InstRW<[WriteVLDShuffle, WriteAdr, WriteVq, WriteVq, WriteVq], 7630b57cec5SDimitry Andric (instregex "LD4Rv(8b|4h|2s|16b|8h|4s)_POST")>; 7640b57cec5SDimitry Andric 765349cc55cSDimitry Andricdef : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteVq, WriteVq], 7660b57cec5SDimitry Andric (instrs LD4Rv1d,LD4Rv2d)>; 767349cc55cSDimitry Andricdef : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteVq, WriteVq], 7680b57cec5SDimitry Andric (instrs LD4Rv1d_POST,LD4Rv2d_POST)>; 7690b57cec5SDimitry Andric 7700b57cec5SDimitry Andric//--- 7710b57cec5SDimitry Andric// 7.9.16 Store, element operations 7720b57cec5SDimitry Andric//--- 7730b57cec5SDimitry Andric 7740b57cec5SDimitry Andric// Only the WriteAdr for writeback matches a def operands. 7750b57cec5SDimitry Andric// Subsequent WriteVLDs only consume resources. 7760b57cec5SDimitry Andric 7770b57cec5SDimitry Andricdef : InstRW<[WriteVST], 7780b57cec5SDimitry Andric (instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 7790b57cec5SDimitry Andricdef : InstRW<[WriteAdr, WriteVST], 7800b57cec5SDimitry Andric (instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST")>; 7810b57cec5SDimitry Andric 7820b57cec5SDimitry Andricdef : InstRW<[WriteVSTShuffle], 7830b57cec5SDimitry Andric (instregex "ST1Twov(8b|4h|2s|1d)$")>; 7840b57cec5SDimitry Andricdef : InstRW<[WriteAdr, WriteVSTShuffle], 7850b57cec5SDimitry Andric (instregex "ST1Twov(8b|4h|2s|1d)_POST")>; 7860b57cec5SDimitry Andricdef : InstRW<[WriteVST, WriteVST], 7870b57cec5SDimitry Andric (instregex "ST1Twov(16b|8h|4s|2d)$")>; 7880b57cec5SDimitry Andricdef : InstRW<[WriteAdr, WriteVST, WriteVST], 7890b57cec5SDimitry Andric (instregex "ST1Twov(16b|8h|4s|2d)_POST")>; 7900b57cec5SDimitry Andric 7910b57cec5SDimitry Andricdef : InstRW<[WriteVSTShuffle, WriteVST], 7920b57cec5SDimitry Andric (instregex "ST1Threev(8b|4h|2s|1d)$")>; 7930b57cec5SDimitry Andricdef : InstRW<[WriteAdr, WriteVSTShuffle, WriteVST], 7940b57cec5SDimitry Andric (instregex "ST1Threev(8b|4h|2s|1d)_POST")>; 7950b57cec5SDimitry Andricdef : InstRW<[WriteVST, WriteVST, WriteVST], 7960b57cec5SDimitry Andric (instregex "ST1Threev(16b|8h|4s|2d)$")>; 7970b57cec5SDimitry Andricdef : InstRW<[WriteAdr, WriteVST, WriteVST, WriteVST], 7980b57cec5SDimitry Andric (instregex "ST1Threev(16b|8h|4s|2d)_POST")>; 7990b57cec5SDimitry Andric 8000b57cec5SDimitry Andricdef : InstRW<[WriteVSTShuffle, WriteVSTShuffle], 8010b57cec5SDimitry Andric (instregex "ST1Fourv(8b|4h|2s|1d)$")>; 8020b57cec5SDimitry Andricdef : InstRW<[WriteAdr, WriteVSTShuffle, WriteVSTShuffle], 8030b57cec5SDimitry Andric (instregex "ST1Fourv(8b|4h|2s|1d)_POST")>; 8040b57cec5SDimitry Andricdef : InstRW<[WriteVST, WriteVST, WriteVST, WriteVST], 8050b57cec5SDimitry Andric (instregex "ST1Fourv(16b|8h|4s|2d)$")>; 8060b57cec5SDimitry Andricdef : InstRW<[WriteAdr, WriteVST, WriteVST, WriteVST, WriteVST], 8070b57cec5SDimitry Andric (instregex "ST1Fourv(16b|8h|4s|2d)_POST")>; 8080b57cec5SDimitry Andric 8090b57cec5SDimitry Andricdef : InstRW<[WriteVSTShuffle], (instregex "ST1i(8|16|32)$")>; 8100b57cec5SDimitry Andricdef : InstRW<[WriteAdr, WriteVSTShuffle], (instregex "ST1i(8|16|32)_POST")>; 8110b57cec5SDimitry Andric 8120b57cec5SDimitry Andricdef : InstRW<[WriteVSTShuffle], (instrs ST1i64)>; 8130b57cec5SDimitry Andricdef : InstRW<[WriteAdr, WriteVSTShuffle], (instrs ST1i64_POST)>; 8140b57cec5SDimitry Andric 8150b57cec5SDimitry Andricdef : InstRW<[WriteVSTShuffle], 8160b57cec5SDimitry Andric (instregex "ST2Twov(8b|4h|2s)$")>; 8170b57cec5SDimitry Andricdef : InstRW<[WriteAdr, WriteVSTShuffle], 8180b57cec5SDimitry Andric (instregex "ST2Twov(8b|4h|2s)_POST")>; 8190b57cec5SDimitry Andricdef : InstRW<[WriteVSTShuffle, WriteVSTShuffle], 8200b57cec5SDimitry Andric (instregex "ST2Twov(16b|8h|4s|2d)$")>; 8210b57cec5SDimitry Andricdef : InstRW<[WriteAdr, WriteVSTShuffle, WriteVSTShuffle], 8220b57cec5SDimitry Andric (instregex "ST2Twov(16b|8h|4s|2d)_POST")>; 8230b57cec5SDimitry Andric 8240b57cec5SDimitry Andricdef : InstRW<[WriteVSTShuffle], (instregex "ST2i(8|16|32)$")>; 8250b57cec5SDimitry Andricdef : InstRW<[WriteAdr, WriteVSTShuffle], (instregex "ST2i(8|16|32)_POST")>; 8260b57cec5SDimitry Andricdef : InstRW<[WriteVSTShuffle], (instrs ST2i64)>; 8270b57cec5SDimitry Andricdef : InstRW<[WriteAdr, WriteVSTShuffle], (instrs ST2i64_POST)>; 8280b57cec5SDimitry Andric 8290b57cec5SDimitry Andricdef : InstRW<[WriteVSTShuffle, WriteVSTShuffle], 8300b57cec5SDimitry Andric (instregex "ST3Threev(8b|4h|2s)$")>; 8310b57cec5SDimitry Andricdef : InstRW<[WriteAdr, WriteVSTShuffle, WriteVSTShuffle], 8320b57cec5SDimitry Andric (instregex "ST3Threev(8b|4h|2s)_POST")>; 8330b57cec5SDimitry Andricdef : InstRW<[WriteVSTShuffle, WriteVSTShuffle, WriteVSTShuffle], 8340b57cec5SDimitry Andric (instregex "ST3Threev(16b|8h|4s|2d)$")>; 8350b57cec5SDimitry Andricdef : InstRW<[WriteAdr, WriteVSTShuffle, WriteVSTShuffle, WriteVSTShuffle], 8360b57cec5SDimitry Andric (instregex "ST3Threev(16b|8h|4s|2d)_POST")>; 8370b57cec5SDimitry Andric 8380b57cec5SDimitry Andricdef : InstRW<[WriteVSTShuffle], (instregex "ST3i(8|16|32)$")>; 8390b57cec5SDimitry Andricdef : InstRW<[WriteAdr, WriteVSTShuffle], (instregex "ST3i(8|16|32)_POST")>; 8400b57cec5SDimitry Andric 8410b57cec5SDimitry Andricdef :InstRW<[WriteVSTShuffle, WriteVSTShuffle], (instrs ST3i64)>; 8420b57cec5SDimitry Andricdef :InstRW<[WriteAdr, WriteVSTShuffle, WriteVSTShuffle], (instrs ST3i64_POST)>; 8430b57cec5SDimitry Andric 8440b57cec5SDimitry Andricdef : InstRW<[WriteVSTPairShuffle, WriteVSTPairShuffle], 8450b57cec5SDimitry Andric (instregex "ST4Fourv(8b|4h|2s|1d)$")>; 8460b57cec5SDimitry Andricdef : InstRW<[WriteAdr, WriteVSTPairShuffle, WriteVSTPairShuffle], 8470b57cec5SDimitry Andric (instregex "ST4Fourv(8b|4h|2s|1d)_POST")>; 8480b57cec5SDimitry Andricdef : InstRW<[WriteVSTPairShuffle, WriteVSTPairShuffle, 8490b57cec5SDimitry Andric WriteVSTPairShuffle, WriteVSTPairShuffle], 8500b57cec5SDimitry Andric (instregex "ST4Fourv(16b|8h|4s|2d)$")>; 8510b57cec5SDimitry Andricdef : InstRW<[WriteAdr, WriteVSTPairShuffle, WriteVSTPairShuffle, 8520b57cec5SDimitry Andric WriteVSTPairShuffle, WriteVSTPairShuffle], 8530b57cec5SDimitry Andric (instregex "ST4Fourv(16b|8h|4s|2d)_POST")>; 8540b57cec5SDimitry Andric 8550b57cec5SDimitry Andricdef : InstRW<[WriteVSTPairShuffle], (instregex "ST4i(8|16|32)$")>; 8560b57cec5SDimitry Andricdef : InstRW<[WriteAdr, WriteVSTPairShuffle], (instregex "ST4i(8|16|32)_POST")>; 8570b57cec5SDimitry Andric 8580b57cec5SDimitry Andricdef : InstRW<[WriteVSTShuffle, WriteVSTShuffle], (instrs ST4i64)>; 8590b57cec5SDimitry Andricdef : InstRW<[WriteAdr, WriteVSTShuffle, WriteVSTShuffle],(instrs ST4i64_POST)>; 8600b57cec5SDimitry Andric 8610b57cec5SDimitry Andric// Atomic operations are not supported. 8620b57cec5SDimitry Andricdef : WriteRes<WriteAtomic, []> { let Unsupported = 1; } 8630b57cec5SDimitry Andric 8640b57cec5SDimitry Andric//--- 8650b57cec5SDimitry Andric// Unused SchedRead types 8660b57cec5SDimitry Andric//--- 8670b57cec5SDimitry Andric 8680b57cec5SDimitry Andricdef : ReadAdvance<ReadI, 0>; 8690b57cec5SDimitry Andricdef : ReadAdvance<ReadISReg, 0>; 8700b57cec5SDimitry Andricdef : ReadAdvance<ReadIEReg, 0>; 8710b57cec5SDimitry Andricdef : ReadAdvance<ReadIM, 0>; 8720b57cec5SDimitry Andricdef : ReadAdvance<ReadIMA, 0>; 8730b57cec5SDimitry Andricdef : ReadAdvance<ReadID, 0>; 8740b57cec5SDimitry Andric 8750b57cec5SDimitry Andric} // SchedModel = CycloneModel 876