xref: /freebsd-src/contrib/llvm-project/llvm/lib/Target/ARM/ARMScheduleA57.td (revision bdd1243df58e60e85101c09001d9812a789b6bc4)
10b57cec5SDimitry Andric//=- ARMScheduleA57.td - ARM Cortex-A57 Scheduling Defs -----*- tablegen -*-=//
20b57cec5SDimitry Andric//
30b57cec5SDimitry Andric// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric// See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric//
70b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric//
90b57cec5SDimitry Andric// This file defines the machine model for ARM Cortex-A57 to support
100b57cec5SDimitry Andric// instruction scheduling and other instruction cost heuristics.
110b57cec5SDimitry Andric//
120b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
130b57cec5SDimitry Andric
140b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
150b57cec5SDimitry Andric// *** Common description and scheduling model parameters taken from AArch64 ***
160b57cec5SDimitry Andric// The Cortex-A57 is a traditional superscalar microprocessor with a
170b57cec5SDimitry Andric// conservative 3-wide in-order stage for decode and dispatch. Combined with the
180b57cec5SDimitry Andric// much wider out-of-order issue stage, this produced a need to carefully
190b57cec5SDimitry Andric// schedule micro-ops so that all three decoded each cycle are successfully
200b57cec5SDimitry Andric// issued as the reservation station(s) simply don't stay occupied for long.
210b57cec5SDimitry Andric// Therefore, IssueWidth is set to the narrower of the two at three, while still
220b57cec5SDimitry Andric// modeling the machine as out-of-order.
230b57cec5SDimitry Andric
24e8d8bef9SDimitry Andricdef IsCPSRDefinedAndPredicated : CheckAll<[IsCPSRDefined, IsPredicated]>;
250b57cec5SDimitry Andricdef IsCPSRDefinedAndPredicatedPred :
26e8d8bef9SDimitry Andric    MCSchedPredicate<IsCPSRDefinedAndPredicated>;
270b57cec5SDimitry Andric
280b57cec5SDimitry Andric// Cortex A57 rev. r1p0 or later (false = r0px)
29e8d8bef9SDimitry Andricdef IsR1P0AndLaterPred : MCSchedPredicate<FalsePred>;
300b57cec5SDimitry Andric
31e8d8bef9SDimitry Andricdef IsLdrAm3RegOffPred : MCSchedPredicate<CheckInvalidRegOperand<2>>;
32e8d8bef9SDimitry Andricdef IsLdrAm3RegOffPredX2 : MCSchedPredicate<CheckInvalidRegOperand<3>>;
33e8d8bef9SDimitry Andricdef IsLdrAm3RegOffPredX3 : MCSchedPredicate<CheckInvalidRegOperand<4>>;
340b57cec5SDimitry Andric
350b57cec5SDimitry Andric// If Addrmode3 contains "minus register"
36e8d8bef9SDimitry Andricclass Am3NegativeRegOffset<int n> : MCSchedPredicate<CheckAll<[
37e8d8bef9SDimitry Andric                                      CheckValidRegOperand<n>,
38e8d8bef9SDimitry Andric                                      CheckAM3OpSub<!add(n, 1)>]>>;
39e8d8bef9SDimitry Andric
40e8d8bef9SDimitry Andricdef IsLdrAm3NegRegOffPred : Am3NegativeRegOffset<2>;
41e8d8bef9SDimitry Andricdef IsLdrAm3NegRegOffPredX2 : Am3NegativeRegOffset<3>;
42e8d8bef9SDimitry Andricdef IsLdrAm3NegRegOffPredX3 : Am3NegativeRegOffset<4>;
430b57cec5SDimitry Andric
440b57cec5SDimitry Andric// Load, scaled register offset, not plus LSL2
45e8d8bef9SDimitry Andricclass ScaledRegNotPlusLsl2<int n> : CheckNot<
46e8d8bef9SDimitry Andric                                      CheckAny<[
47e8d8bef9SDimitry Andric                                        CheckAM2NoShift<n>,
48e8d8bef9SDimitry Andric                                        CheckAll<[
49e8d8bef9SDimitry Andric                                          CheckAM2OpAdd<n>,
50e8d8bef9SDimitry Andric                                          CheckAM2ShiftLSL<n>,
51e8d8bef9SDimitry Andric                                          CheckAM2Offset<n, 2>
52e8d8bef9SDimitry Andric                                        ]>
53e8d8bef9SDimitry Andric                                      ]>
54e8d8bef9SDimitry Andric                                    >;
550b57cec5SDimitry Andric
56e8d8bef9SDimitry Andricdef IsLdstsoScaledNotOptimalPredX0 : MCSchedPredicate<ScaledRegNotPlusLsl2<2>>;
57e8d8bef9SDimitry Andricdef IsLdstsoScaledNotOptimalPred : MCSchedPredicate<ScaledRegNotPlusLsl2<3>>;
58e8d8bef9SDimitry Andricdef IsLdstsoScaledNotOptimalPredX2 : MCSchedPredicate<ScaledRegNotPlusLsl2<4>>;
590b57cec5SDimitry Andric
60e8d8bef9SDimitry Andricdef IsLdstsoScaledPredX2 : MCSchedPredicate<CheckNot<CheckAM2NoShift<4>>>;
610b57cec5SDimitry Andric
62e8d8bef9SDimitry Andricdef IsLdstsoMinusRegPredX0 : MCSchedPredicate<CheckAM2OpSub<2>>;
63e8d8bef9SDimitry Andricdef IsLdstsoMinusRegPred : MCSchedPredicate<CheckAM2OpSub<3>>;
64e8d8bef9SDimitry Andricdef IsLdstsoMinusRegPredX2 : MCSchedPredicate<CheckAM2OpSub<4>>;
650b57cec5SDimitry Andric
660b57cec5SDimitry Andricclass A57WriteLMOpsListType<list<SchedWriteRes> writes> {
670b57cec5SDimitry Andric  list <SchedWriteRes> Writes = writes;
680b57cec5SDimitry Andric  SchedMachineModel SchedModel = ?;
690b57cec5SDimitry Andric}
700b57cec5SDimitry Andric
710b57cec5SDimitry Andric// *** Common description and scheduling model parameters taken from AArch64 ***
720b57cec5SDimitry Andric// (AArch64SchedA57.td)
730b57cec5SDimitry Andricdef CortexA57Model : SchedMachineModel {
740b57cec5SDimitry Andric  let IssueWidth        =   3; // 3-way decode and dispatch
750b57cec5SDimitry Andric  let MicroOpBufferSize = 128; // 128 micro-op re-order buffer
760b57cec5SDimitry Andric  let LoadLatency       =   4; // Optimistic load latency
770b57cec5SDimitry Andric  let MispredictPenalty =  16; // Fetch + Decode/Rename/Dispatch + Branch
780b57cec5SDimitry Andric
790b57cec5SDimitry Andric  // Enable partial & runtime unrolling.
800b57cec5SDimitry Andric  let LoopMicroOpBufferSize = 16;
810b57cec5SDimitry Andric  let CompleteModel = 1;
820b57cec5SDimitry Andric
830b57cec5SDimitry Andric  // FIXME: Remove when all errors have been fixed.
840b57cec5SDimitry Andric  let FullInstRWOverlapCheck = 0;
850b57cec5SDimitry Andric
86*bdd1243dSDimitry Andric  let UnsupportedFeatures = [HasV8_1MMainline, HasMVEInt, HasMVEFloat, IsMClass,
875ffd83dbSDimitry Andric                             HasFPRegsV8_1M, HasFP16FML, HasMatMulInt8, HasBF16];
880b57cec5SDimitry Andric}
890b57cec5SDimitry Andric
900b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
910b57cec5SDimitry Andric// Define each kind of processor resource and number available on Cortex-A57.
920b57cec5SDimitry Andric// Cortex A-57 has 8 pipelines that each has its own 8-entry queue where
930b57cec5SDimitry Andric// micro-ops wait for their operands and then issue out-of-order.
940b57cec5SDimitry Andric
950b57cec5SDimitry Andricdef A57UnitB : ProcResource<1>;  // Type B micro-ops
960b57cec5SDimitry Andricdef A57UnitI : ProcResource<2>;  // Type I micro-ops
970b57cec5SDimitry Andricdef A57UnitM : ProcResource<1>;  // Type M micro-ops
980b57cec5SDimitry Andricdef A57UnitL : ProcResource<1>;  // Type L micro-ops
990b57cec5SDimitry Andricdef A57UnitS : ProcResource<1>;  // Type S micro-ops
1000b57cec5SDimitry Andric
1010b57cec5SDimitry Andricdef A57UnitX : ProcResource<1>;  // Type X micro-ops (F1)
1020b57cec5SDimitry Andricdef A57UnitW : ProcResource<1>;  // Type W micro-ops (F0)
1030b57cec5SDimitry Andric
1040b57cec5SDimitry Andriclet SchedModel = CortexA57Model in {
1050b57cec5SDimitry Andric  def A57UnitV : ProcResGroup<[A57UnitX, A57UnitW]>;    // Type V micro-ops
1060b57cec5SDimitry Andric}
1070b57cec5SDimitry Andric
1080b57cec5SDimitry Andriclet SchedModel = CortexA57Model in {
1090b57cec5SDimitry Andric
1100b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
1110b57cec5SDimitry Andric// Define customized scheduler read/write types specific to the Cortex-A57.
1120b57cec5SDimitry Andric
1130b57cec5SDimitry Andricinclude "ARMScheduleA57WriteRes.td"
1140b57cec5SDimitry Andric
1150b57cec5SDimitry Andric// To have "CompleteModel = 1", support of pseudos and special instructions
1160b57cec5SDimitry Andricdef : InstRW<[WriteNoop], (instregex "(t)?BKPT$", "(t2)?CDP(2)?$",
1170b57cec5SDimitry Andric  "(t2)?CLREX$", "CONSTPOOL_ENTRY$", "COPY_STRUCT_BYVAL_I32$",
1180b57cec5SDimitry Andric  "(t2)?CPS[123]p$", "(t2)?DBG$", "(t2)?DMB$", "(t2)?DSB$", "ERET$",
1190b57cec5SDimitry Andric  "(t2|t)?HINT$", "(t)?HLT$", "(t2)?HVC$", "(t2)?ISB$", "ITasm$",
1200b57cec5SDimitry Andric  "(t2)?RFE(DA|DB|IA|IB)", "(t)?SETEND", "(t2)?SETPAN", "(t2)?SMC", "SPACE",
1210b57cec5SDimitry Andric  "(t2)?SRS(DA|DB|IA|IB)", "SWP(B)?", "t?TRAP", "(t2|t)?UDF$", "t2DCPS", "t2SG",
122*bdd1243dSDimitry Andric  "t2TT", "tCPS", "CMP_SWAP", "t?SVC", "t2IT", "t__brkdiv0")>;
1230b57cec5SDimitry Andric
1240b57cec5SDimitry Andricdef : InstRW<[WriteNoop], (instregex "VMRS", "VMSR", "FMSTAT")>;
1250b57cec5SDimitry Andric
1260b57cec5SDimitry Andric// Specific memory instrs
1270b57cec5SDimitry Andricdef : InstRW<[WriteNoop, WriteNoop], (instregex "(t2)?LDA", "(t2)?LDC", "(t2)?STC",
1280b57cec5SDimitry Andric  "(t2)?STL", "(t2)?LDREX", "(t2)?STREX", "MEMCPY")>;
1290b57cec5SDimitry Andric
1300b57cec5SDimitry Andric// coprocessor moves
1310b57cec5SDimitry Andricdef : InstRW<[WriteNoop, WriteNoop], (instregex
1320b57cec5SDimitry Andric  "(t2)?MCR(2|R|R2)?$", "(t2)?MRC(2)?$",
1330b57cec5SDimitry Andric  "(t2)?MRRC(2)?$", "(t2)?MRS(banked|sys|_AR|_M|sys_AR)?$",
1340b57cec5SDimitry Andric  "(t2)?MSR(banked|i|_AR|_M)?$")>;
1350b57cec5SDimitry Andric
1360b57cec5SDimitry Andric// Deprecated instructions
1370b57cec5SDimitry Andricdef : InstRW<[WriteNoop], (instregex "FLDM", "FSTM")>;
1380b57cec5SDimitry Andric
1390b57cec5SDimitry Andric// Pseudos
1400b57cec5SDimitry Andricdef : InstRW<[WriteNoop], (instregex "(t2)?ABS$",
1410b57cec5SDimitry Andric  "(t)?ADJCALLSTACKDOWN$", "(t)?ADJCALLSTACKUP$", "(t2|t)?Int_eh_sjlj",
1420b57cec5SDimitry Andric  "tLDRpci_pic", "(t2)?SUBS_PC_LR",
1430b57cec5SDimitry Andric  "JUMPTABLE", "tInt_WIN_eh_sjlj_longjmp",
1440b57cec5SDimitry Andric  "VLD(1|2)LN(d|q)(WB_fixed_|WB_register_)?Asm",
1450b57cec5SDimitry Andric  "VLD(3|4)(DUP|LN)?(d|q)(WB_fixed_|WB_register_)?Asm",
1460b57cec5SDimitry Andric  "VST(1|2)LN(d|q)(WB_fixed_|WB_register_)?Asm",
1470b57cec5SDimitry Andric  "VST(3|4)(DUP|LN)?(d|q)(WB_fixed_|WB_register_)?Asm",
1480b57cec5SDimitry Andric  "WIN__CHKSTK", "WIN__DBZCHK")>;
1490b57cec5SDimitry Andric
1500b57cec5SDimitry Andric// Miscellaneous
1510b57cec5SDimitry Andric// -----------------------------------------------------------------------------
1520b57cec5SDimitry Andric
1530b57cec5SDimitry Andricdef : InstRW<[A57Write_1cyc_1I], (instrs COPY)>;
1540b57cec5SDimitry Andric
1550b57cec5SDimitry Andric// --- 3.2 Branch Instructions ---
1560b57cec5SDimitry Andric// B, BX, BL, BLX (imm, reg != LR, reg == LR), CBZ, CBNZ
1570b57cec5SDimitry Andric
1580b57cec5SDimitry Andricdef : InstRW<[A57Write_1cyc_1B], (instregex "(t2|t)?B$", "t?BX", "(t2|t)?Bcc$",
1590b57cec5SDimitry Andric  "t?TAILJMP(d|r)", "TCRETURN(d|r)i", "tBfar", "tCBN?Z")>;
1600b57cec5SDimitry Andricdef : InstRW<[A57Write_1cyc_1B_1I],
1610b57cec5SDimitry Andric  (instregex "t?BL$", "BL_pred$", "t?BLXi", "t?TPsoft")>;
1620b57cec5SDimitry Andricdef : InstRW<[A57Write_2cyc_1B_1I], (instregex "BLX", "tBLX(NS)?r")>;
1630b57cec5SDimitry Andric// Pseudos
1640b57cec5SDimitry Andricdef : InstRW<[A57Write_2cyc_1B_1I], (instregex "BCCi64", "BCCZi64")>;
1650b57cec5SDimitry Andricdef : InstRW<[A57Write_3cyc_1B_1I], (instregex "BR_JTadd", "t?BR_JTr",
1660b57cec5SDimitry Andric  "t2BR_JT", "t2BXJ", "(t2)?TB(B|H)(_JT)?$", "tBRIND")>;
1670b57cec5SDimitry Andricdef : InstRW<[A57Write_6cyc_1B_1L], (instregex "BR_JTm")>;
1680b57cec5SDimitry Andric
1690b57cec5SDimitry Andric// --- 3.3 Arithmetic and Logical Instructions ---
1700b57cec5SDimitry Andric// ADD{S}, ADC{S}, ADR,	AND{S},	BIC{S},	CMN, CMP, EOR{S}, ORN{S}, ORR{S},
1710b57cec5SDimitry Andric// RSB{S}, RSC{S}, SUB{S}, SBC{S}, TEQ, TST
1720b57cec5SDimitry Andric
1730b57cec5SDimitry Andricdef : InstRW<[A57Write_1cyc_1I], (instregex "tADDframe")>;
1740b57cec5SDimitry Andric
175e8d8bef9SDimitry Andric// Check branch forms of ALU ops:
176e8d8bef9SDimitry Andric// check reg 0 for ARM_AM::PC
177e8d8bef9SDimitry Andric// if so adds 2 cyc to latency, 1 uop, 1 res cycle for A57UnitB
178e8d8bef9SDimitry Andricclass A57BranchForm<SchedWriteRes non_br> :
179e8d8bef9SDimitry Andric  BranchWriteRes<2, 1, [A57UnitB], [1], non_br>;
180e8d8bef9SDimitry Andric
1810b57cec5SDimitry Andric// shift by register, conditional or unconditional
1820b57cec5SDimitry Andric// TODO: according to the doc, conditional uses I0/I1, unconditional uses M
1830b57cec5SDimitry Andric// Why more complex instruction uses more simple pipeline?
1840b57cec5SDimitry Andric// May be an error in doc.
1850b57cec5SDimitry Andricdef A57WriteALUsr : SchedWriteVariant<[
186e8d8bef9SDimitry Andric  SchedVar<IsPredicatedPred, [CheckBranchForm<0, A57BranchForm<A57Write_2cyc_1I>>]>,
187e8d8bef9SDimitry Andric  SchedVar<NoSchedPred,      [CheckBranchForm<0, A57BranchForm<A57Write_2cyc_1M>>]>
1880b57cec5SDimitry Andric]>;
1890b57cec5SDimitry Andricdef A57WriteALUSsr : SchedWriteVariant<[
190e8d8bef9SDimitry Andric  SchedVar<IsPredicatedPred, [CheckBranchForm<0, A57BranchForm<A57Write_2cyc_1I>>]>,
191e8d8bef9SDimitry Andric  SchedVar<NoSchedPred,      [CheckBranchForm<0, A57BranchForm<A57Write_2cyc_1M>>]>
1920b57cec5SDimitry Andric]>;
1930b57cec5SDimitry Andricdef A57ReadALUsr : SchedReadVariant<[
1940b57cec5SDimitry Andric  SchedVar<IsPredicatedPred, [ReadDefault]>,
1950b57cec5SDimitry Andric  SchedVar<NoSchedPred,      [ReadDefault]>
1960b57cec5SDimitry Andric]>;
197e8d8bef9SDimitry Andricdef : SchedAlias<WriteALUsi,  CheckBranchForm<0, A57BranchForm<A57Write_2cyc_1M>>>;
1980b57cec5SDimitry Andricdef : SchedAlias<WriteALUsr,  A57WriteALUsr>;
1990b57cec5SDimitry Andricdef : SchedAlias<WriteALUSsr, A57WriteALUSsr>;
2000b57cec5SDimitry Andricdef : SchedAlias<ReadALUsr,   A57ReadALUsr>;
2010b57cec5SDimitry Andric
2020b57cec5SDimitry Andricdef A57WriteCMPsr : SchedWriteVariant<[
2030b57cec5SDimitry Andric  SchedVar<IsPredicatedPred, [A57Write_2cyc_1I]>,
2040b57cec5SDimitry Andric  SchedVar<NoSchedPred,      [A57Write_2cyc_1M]>
2050b57cec5SDimitry Andric]>;
2060b57cec5SDimitry Andricdef : SchedAlias<WriteCMP,   A57Write_1cyc_1I>;
2070b57cec5SDimitry Andricdef : SchedAlias<WriteCMPsi, A57Write_2cyc_1M>;
2080b57cec5SDimitry Andricdef : SchedAlias<WriteCMPsr, A57WriteCMPsr>;
2090b57cec5SDimitry Andric
2100b57cec5SDimitry Andric// --- 3.4 Move and Shift Instructions ---
2110b57cec5SDimitry Andric// Move, basic
2120b57cec5SDimitry Andric// MOV{S}, MOVW, MVN{S}
2130b57cec5SDimitry Andricdef : InstRW<[A57Write_1cyc_1I], (instregex "MOV(r|i|i16|r_TC)",
2140b57cec5SDimitry Andric  "(t2)?MVN(CC)?(r|i)", "BMOVPCB_CALL", "BMOVPCRX_CALL",
2150b57cec5SDimitry Andric  "MOVCC(r|i|i16|i32imm)", "tMOV", "tMVN")>;
2160b57cec5SDimitry Andric
2170b57cec5SDimitry Andric// Move, shift by immed, setflags/no setflags
2180b57cec5SDimitry Andric// (ASR, LSL, LSR, ROR, RRX)=MOVsi, MVN
2190b57cec5SDimitry Andric// setflags = isCPSRDefined
2200b57cec5SDimitry Andricdef A57WriteMOVsi : SchedWriteVariant<[
2210b57cec5SDimitry Andric  SchedVar<IsCPSRDefinedPred,              [A57Write_2cyc_1M]>,
2220b57cec5SDimitry Andric  SchedVar<NoSchedPred,                    [A57Write_1cyc_1I]>
2230b57cec5SDimitry Andric]>;
2240b57cec5SDimitry Andricdef : InstRW<[A57WriteMOVsi], (instregex "MOV(CC)?si", "MVNsi",
2250b57cec5SDimitry Andric  "ASRi", "(t2|t)ASRri", "LSRi", "(t2|t)LSRri", "LSLi", "(t2|t)LSLri", "RORi",
2260b57cec5SDimitry Andric  "(t2|t)RORri", "(t2)?RRX", "t2MOV", "tROR")>;
2270b57cec5SDimitry Andric
2280b57cec5SDimitry Andric// shift by register, conditional or unconditional, setflags/no setflags
2290b57cec5SDimitry Andricdef A57WriteMOVsr : SchedWriteVariant<[
2300b57cec5SDimitry Andric  SchedVar<IsCPSRDefinedAndPredicatedPred, [A57Write_2cyc_1I]>,
2310b57cec5SDimitry Andric  SchedVar<IsCPSRDefinedPred,              [A57Write_2cyc_1M]>,
2320b57cec5SDimitry Andric  SchedVar<IsPredicatedPred,               [A57Write_2cyc_1I]>,
2330b57cec5SDimitry Andric  SchedVar<NoSchedPred,                    [A57Write_1cyc_1I]>
2340b57cec5SDimitry Andric]>;
2350b57cec5SDimitry Andricdef : InstRW<[A57WriteMOVsr], (instregex "MOV(CC)?sr", "MVNsr", "t2MVNs",
2360b57cec5SDimitry Andric  "ASRr", "(t2|t)ASRrr", "LSRr", "(t2|t)LSRrr", "LSLr", "(t2|t)?LSLrr", "RORr",
2370b57cec5SDimitry Andric  "(t2|t)RORrr")>;
2380b57cec5SDimitry Andric
2390b57cec5SDimitry Andric// Move, top
2400b57cec5SDimitry Andric// MOVT - A57Write_2cyc_1M for r0px, A57Write_1cyc_1I for r1p0 and later
2410b57cec5SDimitry Andricdef A57WriteMOVT : SchedWriteVariant<[
2420b57cec5SDimitry Andric  SchedVar<IsR1P0AndLaterPred,             [A57Write_1cyc_1I]>,
2430b57cec5SDimitry Andric  SchedVar<NoSchedPred,                    [A57Write_2cyc_1M]>
2440b57cec5SDimitry Andric]>;
2450b57cec5SDimitry Andricdef : InstRW<[A57WriteMOVT], (instregex "MOVTi16")>;
2460b57cec5SDimitry Andric
2470b57cec5SDimitry Andricdef A57WriteI2pc :
2480b57cec5SDimitry Andric  WriteSequence<[A57Write_1cyc_1I, A57Write_1cyc_1I, A57Write_1cyc_1I]>;
2490b57cec5SDimitry Andricdef A57WriteI2ld :
2500b57cec5SDimitry Andric  WriteSequence<[A57Write_1cyc_1I, A57Write_1cyc_1I, A57Write_4cyc_1L]>;
2510b57cec5SDimitry Andricdef : InstRW< [A57WriteI2pc], (instregex "MOV_ga_pcrel")>;
2520b57cec5SDimitry Andricdef : InstRW< [A57WriteI2ld], (instregex "MOV_ga_pcrel_ldr")>;
2530b57cec5SDimitry Andric
2540b57cec5SDimitry Andric// +2cyc for branch forms
2550b57cec5SDimitry Andricdef : InstRW<[A57Write_3cyc_1I], (instregex "MOVPC(LR|RX)")>;
2560b57cec5SDimitry Andric
2570b57cec5SDimitry Andric// --- 3.5 Divide and Multiply Instructions ---
2580b57cec5SDimitry Andric// Divide: SDIV, UDIV
2590b57cec5SDimitry Andric// latency from documentration: 4 ­‐ 20, maximum taken
2600b57cec5SDimitry Andricdef : SchedAlias<WriteDIV, A57Write_20cyc_1M>;
2610b57cec5SDimitry Andric// Multiply: tMul not bound to common WriteRes types
2620b57cec5SDimitry Andricdef : InstRW<[A57Write_3cyc_1M], (instregex "tMUL")>;
2630b57cec5SDimitry Andricdef : SchedAlias<WriteMUL16, A57Write_3cyc_1M>;
2640b57cec5SDimitry Andricdef : SchedAlias<WriteMUL32, A57Write_3cyc_1M>;
2650b57cec5SDimitry Andricdef : ReadAdvance<ReadMUL, 0>;
2660b57cec5SDimitry Andric
2670b57cec5SDimitry Andric// Multiply accumulate: MLA, MLS, SMLABB, SMLABT, SMLATB, SMLATT, SMLAWB,
2680b57cec5SDimitry Andric// SMLAWT, SMLAD{X}, SMLSD{X}, SMMLA{R}, SMMLS{R}
2690b57cec5SDimitry Andric// Multiply-accumulate pipelines support late-forwarding of accumulate operands
2700b57cec5SDimitry Andric// from similar μops, allowing a typical sequence of multiply-accumulate μops
2710b57cec5SDimitry Andric// to issue one every 1 cycle (sched advance = 2).
2720b57cec5SDimitry Andricdef A57WriteMLA : SchedWriteRes<[A57UnitM]> { let Latency = 3; }
273e8d8bef9SDimitry Andricdef A57WriteMLAL : SchedWriteVariant<[
274e8d8bef9SDimitry Andric  SchedVar<IsCPSRDefinedPred, [A57Write_5cyc_1I_1M]>,
275e8d8bef9SDimitry Andric  SchedVar<NoSchedPred,       [A57Write_4cyc_1M]>
276e8d8bef9SDimitry Andric]>;
277e8d8bef9SDimitry Andric
2780b57cec5SDimitry Andricdef A57ReadMLA  : SchedReadAdvance<2, [A57WriteMLA, A57WriteMLAL]>;
2790b57cec5SDimitry Andric
2800b57cec5SDimitry Andricdef : InstRW<[A57WriteMLA],
2810b57cec5SDimitry Andric  (instregex "t2SMLAD", "t2SMLADX", "t2SMLSD", "t2SMLSDX")>;
2820b57cec5SDimitry Andric
2830b57cec5SDimitry Andricdef : SchedAlias<WriteMAC16, A57WriteMLA>;
2840b57cec5SDimitry Andricdef : SchedAlias<WriteMAC32, A57WriteMLA>;
2850b57cec5SDimitry Andricdef : SchedAlias<ReadMAC,    A57ReadMLA>;
2860b57cec5SDimitry Andric
2870b57cec5SDimitry Andricdef : SchedAlias<WriteMAC64Lo, A57WriteMLAL>;
2880b57cec5SDimitry Andricdef : SchedAlias<WriteMAC64Hi, A57WriteMLAL>;
2890b57cec5SDimitry Andric
2900b57cec5SDimitry Andric// Multiply long: SMULL, UMULL
2910b57cec5SDimitry Andricdef : SchedAlias<WriteMUL64Lo, A57Write_4cyc_1M>;
2920b57cec5SDimitry Andricdef : SchedAlias<WriteMUL64Hi, A57Write_4cyc_1M>;
2930b57cec5SDimitry Andric
2940b57cec5SDimitry Andric// --- 3.6 Saturating and Parallel Arithmetic Instructions ---
2950b57cec5SDimitry Andric// Parallel	arith
2960b57cec5SDimitry Andric// SADD16, SADD8, SSUB16, SSUB8, UADD16, UADD8, USUB16, USUB8
2970b57cec5SDimitry Andric// Conditional GE-setting instructions require three extra μops
2980b57cec5SDimitry Andric// and two additional cycles to conditionally update the GE field.
2990b57cec5SDimitry Andricdef A57WriteParArith : SchedWriteVariant<[
3000b57cec5SDimitry Andric  SchedVar<IsPredicatedPred, [A57Write_4cyc_1I_1M]>,
3010b57cec5SDimitry Andric  SchedVar<NoSchedPred,      [A57Write_2cyc_1I_1M]>
3020b57cec5SDimitry Andric]>;
3030b57cec5SDimitry Andricdef : InstRW< [A57WriteParArith], (instregex
3040b57cec5SDimitry Andric  "(t2)?SADD(16|8)", "(t2)?SSUB(16|8)",
3050b57cec5SDimitry Andric  "(t2)?UADD(16|8)", "(t2)?USUB(16|8)")>;
3060b57cec5SDimitry Andric
3070b57cec5SDimitry Andric// Parallel	arith with exchange: SASX, SSAX, UASX, USAX
3080b57cec5SDimitry Andricdef A57WriteParArithExch : SchedWriteVariant<[
3090b57cec5SDimitry Andric  SchedVar<IsPredicatedPred, [A57Write_5cyc_1I_1M]>,
3100b57cec5SDimitry Andric  SchedVar<NoSchedPred,      [A57Write_3cyc_1I_1M]>
3110b57cec5SDimitry Andric]>;
3120b57cec5SDimitry Andricdef : InstRW<[A57WriteParArithExch],
3130b57cec5SDimitry Andric  (instregex "(t2)?SASX", "(t2)?SSAX", "(t2)?UASX", "(t2)?USAX")>;
3140b57cec5SDimitry Andric
3150b57cec5SDimitry Andric// Parallel	halving	arith
3160b57cec5SDimitry Andric// SHADD16, SHADD8, SHSUB16, SHSUB8, UHADD16, UHADD8, UHSUB16,	UHSUB8
3170b57cec5SDimitry Andricdef : InstRW<[A57Write_2cyc_1M], (instregex
3180b57cec5SDimitry Andric  "(t2)?SHADD(16|8)", "(t2)?SHSUB(16|8)",
3190b57cec5SDimitry Andric  "(t2)?UHADD(16|8)", "(t2)?UHSUB(16|8)")>;
3200b57cec5SDimitry Andric
3210b57cec5SDimitry Andric// Parallel halving arith with exchange
3220b57cec5SDimitry Andric// SHASX, SHSAX, UHASX, UHSAX
3230b57cec5SDimitry Andricdef : InstRW<[A57Write_3cyc_1I_1M], (instregex "(t2)?SHASX", "(t2)?SHSAX",
3240b57cec5SDimitry Andric  "(t2)?UHASX", "(t2)?UHSAX")>;
3250b57cec5SDimitry Andric
3260b57cec5SDimitry Andric// Parallel	saturating arith
3270b57cec5SDimitry Andric// QADD16, QADD8, QSUB16, QSUB8, UQADD16, UQADD8, UQSUB16, UQSUB8
3280b57cec5SDimitry Andricdef : InstRW<[A57Write_2cyc_1M], (instregex "QADD(16|8)", "QSUB(16|8)",
3290b57cec5SDimitry Andric  "UQADD(16|8)", "UQSUB(16|8)", "t2(U?)QADD", "t2(U?)QSUB")>;
3300b57cec5SDimitry Andric
3310b57cec5SDimitry Andric// Parallel	saturating arith with exchange
3320b57cec5SDimitry Andric// QASX, QSAX, UQASX, UQSAX
3330b57cec5SDimitry Andricdef : InstRW<[A57Write_3cyc_1I_1M], (instregex "(t2)?QASX", "(t2)?QSAX",
3340b57cec5SDimitry Andric  "(t2)?UQASX", "(t2)?UQSAX")>;
3350b57cec5SDimitry Andric
3360b57cec5SDimitry Andric// Saturate: SSAT, SSAT16, USAT, USAT16
3370b57cec5SDimitry Andricdef : InstRW<[A57Write_2cyc_1M],
3380b57cec5SDimitry Andric  (instregex "(t2)?SSAT(16)?", "(t2)?USAT(16)?")>;
3390b57cec5SDimitry Andric
3400b57cec5SDimitry Andric// Saturating arith: QADD, QSUB
3410b57cec5SDimitry Andricdef : InstRW<[A57Write_2cyc_1M], (instregex "QADD$", "QSUB$")>;
3420b57cec5SDimitry Andric
3430b57cec5SDimitry Andric// Saturating doubling arith: QDADD, QDSUB
3440b57cec5SDimitry Andricdef : InstRW<[A57Write_3cyc_1I_1M], (instregex "(t2)?QDADD", "(t2)?QDSUB")>;
3450b57cec5SDimitry Andric
3460b57cec5SDimitry Andric// --- 3.7 Miscellaneous Data-Processing Instructions ---
3470b57cec5SDimitry Andric// Bit field extract: SBFX, UBFX
3480b57cec5SDimitry Andricdef : InstRW<[A57Write_1cyc_1I], (instregex "(t2)?SBFX", "(t2)?UBFX")>;
3490b57cec5SDimitry Andric
3500b57cec5SDimitry Andric// Bit field insert/clear: BFI, BFC
3510b57cec5SDimitry Andricdef : InstRW<[A57Write_2cyc_1M], (instregex "(t2)?BFI", "(t2)?BFC")>;
3520b57cec5SDimitry Andric
3530b57cec5SDimitry Andric// Select bytes, conditional/unconditional
3540b57cec5SDimitry Andricdef A57WriteSEL : SchedWriteVariant<[
3550b57cec5SDimitry Andric  SchedVar<IsPredicatedPred, [A57Write_2cyc_1I]>,
3560b57cec5SDimitry Andric  SchedVar<NoSchedPred,      [A57Write_1cyc_1I]>
3570b57cec5SDimitry Andric]>;
3580b57cec5SDimitry Andricdef : InstRW<[A57WriteSEL], (instregex "(t2)?SEL")>;
3590b57cec5SDimitry Andric
3600b57cec5SDimitry Andric// Sign/zero extend, normal: SXTB, SXTH, UXTB, UXTH
3610b57cec5SDimitry Andricdef : InstRW<[A57Write_1cyc_1I],
3620b57cec5SDimitry Andric  (instregex "(t2|t)?SXT(B|H)$", "(t2|t)?UXT(B|H)$")>;
3630b57cec5SDimitry Andric
3640b57cec5SDimitry Andric// Sign/zero extend and add, normal: SXTAB, SXTAH, UXTAB, UXTAH
3650b57cec5SDimitry Andricdef : InstRW<[A57Write_2cyc_1M],
3660b57cec5SDimitry Andric  (instregex "(t2)?SXTA(B|H)$", "(t2)?UXTA(B|H)$")>;
3670b57cec5SDimitry Andric
3680b57cec5SDimitry Andric// Sign/zero extend and add, parallel: SXTAB16, UXTAB16
3690b57cec5SDimitry Andricdef : InstRW<[A57Write_4cyc_1M], (instregex "(t2)?SXTAB16", "(t2)?UXTAB16")>;
3700b57cec5SDimitry Andric
3710b57cec5SDimitry Andric// Sum of absolute differences: USAD8, USADA8
3720b57cec5SDimitry Andricdef : InstRW<[A57Write_3cyc_1M], (instregex "(t2)?USAD8", "(t2)?USADA8")>;
3730b57cec5SDimitry Andric
3740b57cec5SDimitry Andric// --- 3.8 Load Instructions ---
3750b57cec5SDimitry Andric
3760b57cec5SDimitry Andric// Load, immed offset
3770b57cec5SDimitry Andric// LDR and LDRB have LDRi12 and LDRBi12 forms for immediate
3780b57cec5SDimitry Andricdef : InstRW<[A57Write_4cyc_1L], (instregex "LDRi12", "LDRBi12",
3790b57cec5SDimitry Andric  "LDRcp", "(t2|t)?LDRConstPool", "LDRLIT_ga_(pcrel|abs)",
3800b57cec5SDimitry Andric  "PICLDR", "tLDR")>;
3810b57cec5SDimitry Andric
3820b57cec5SDimitry Andricdef : InstRW<[A57Write_4cyc_1L],
3830b57cec5SDimitry Andric  (instregex "t2LDRS?(B|H)?(pcrel|T|i8|i12|pci|pci_pic|s)?$")>;
3840b57cec5SDimitry Andric
3850b57cec5SDimitry Andric// For "Load, register offset, minus" we need +1cyc, +1I
3860b57cec5SDimitry Andricdef A57WriteLdrAm3 : SchedWriteVariant<[
3870b57cec5SDimitry Andric  SchedVar<IsLdrAm3NegRegOffPred, [A57Write_5cyc_1I_1L]>,
3880b57cec5SDimitry Andric  SchedVar<NoSchedPred,           [A57Write_4cyc_1L]>
3890b57cec5SDimitry Andric]>;
3900b57cec5SDimitry Andricdef : InstRW<[A57WriteLdrAm3], (instregex "LDR(H|SH|SB)$")>;
3910b57cec5SDimitry Andricdef A57WriteLdrAm3X2 : SchedWriteVariant<[
3920b57cec5SDimitry Andric  SchedVar<IsLdrAm3NegRegOffPredX2, [A57Write_5cyc_1I_1L]>,
3930b57cec5SDimitry Andric  SchedVar<NoSchedPred,             [A57Write_4cyc_1L]>
3940b57cec5SDimitry Andric]>;
3950b57cec5SDimitry Andricdef : InstRW<[A57WriteLdrAm3X2, A57WriteLdrAm3X2], (instregex "LDRD$")>;
3960b57cec5SDimitry Andricdef : InstRW<[A57Write_4cyc_1L, A57Write_4cyc_1L], (instregex "t2LDRDi8")>;
3970b57cec5SDimitry Andric
3980b57cec5SDimitry Andricdef A57WriteLdrAmLDSTSO : SchedWriteVariant<[
3990b57cec5SDimitry Andric  SchedVar<IsLdstsoScaledNotOptimalPred, [A57Write_5cyc_1I_1L]>,
4000b57cec5SDimitry Andric  SchedVar<IsLdstsoMinusRegPred,         [A57Write_5cyc_1I_1L]>,
4010b57cec5SDimitry Andric  SchedVar<NoSchedPred,                  [A57Write_4cyc_1L]>
4020b57cec5SDimitry Andric]>;
4030b57cec5SDimitry Andricdef : InstRW<[A57WriteLdrAmLDSTSO], (instregex "LDRrs", "LDRBrs")>;
4040b57cec5SDimitry Andric
4050b57cec5SDimitry Andricdef A57WrBackOne : SchedWriteRes<[]> {
4060b57cec5SDimitry Andric  let Latency = 1;
4070b57cec5SDimitry Andric  let NumMicroOps = 0;
4080b57cec5SDimitry Andric}
4090b57cec5SDimitry Andricdef A57WrBackTwo : SchedWriteRes<[]> {
4100b57cec5SDimitry Andric  let Latency = 2;
4110b57cec5SDimitry Andric  let NumMicroOps = 0;
4120b57cec5SDimitry Andric}
4130b57cec5SDimitry Andricdef A57WrBackThree : SchedWriteRes<[]> {
4140b57cec5SDimitry Andric  let Latency = 3;
4150b57cec5SDimitry Andric  let NumMicroOps = 0;
4160b57cec5SDimitry Andric}
4170b57cec5SDimitry Andric
4180b57cec5SDimitry Andric// --- LDR pre-indexed ---
4190b57cec5SDimitry Andric// Load, immed pre-indexed (4 cyc for load result, 1 cyc for Base update)
4200b57cec5SDimitry Andricdef : InstRW<[A57Write_4cyc_1L_1I, A57WrBackOne], (instregex "LDR_PRE_IMM",
4210b57cec5SDimitry Andric  "LDRB_PRE_IMM", "t2LDRB_PRE")>;
4220b57cec5SDimitry Andric
4230b57cec5SDimitry Andric// Load, register pre-indexed (4 cyc for load result, 2 cyc for Base update)
4240b57cec5SDimitry Andric// (5 cyc load result for not-lsl2 scaled)
4250b57cec5SDimitry Andricdef A57WriteLdrAmLDSTSOPre : SchedWriteVariant<[
4260b57cec5SDimitry Andric  SchedVar<IsLdstsoScaledNotOptimalPredX2, [A57Write_5cyc_1I_1L]>,
4270b57cec5SDimitry Andric  SchedVar<NoSchedPred,                    [A57Write_4cyc_1L_1I]>
4280b57cec5SDimitry Andric]>;
4290b57cec5SDimitry Andricdef : InstRW<[A57WriteLdrAmLDSTSOPre, A57WrBackTwo],
4300b57cec5SDimitry Andric  (instregex "LDR_PRE_REG", "LDRB_PRE_REG")>;
4310b57cec5SDimitry Andric
4320b57cec5SDimitry Andricdef A57WriteLdrAm3PreWrBack : SchedWriteVariant<[
4330b57cec5SDimitry Andric  SchedVar<IsLdrAm3RegOffPredX2, [A57WrBackTwo]>,
4340b57cec5SDimitry Andric  SchedVar<NoSchedPred,          [A57WrBackOne]>
4350b57cec5SDimitry Andric]>;
4360b57cec5SDimitry Andricdef : InstRW<[A57Write_4cyc_1L, A57WriteLdrAm3PreWrBack],
4370b57cec5SDimitry Andric  (instregex "LDR(H|SH|SB)_PRE")>;
4380b57cec5SDimitry Andricdef : InstRW<[A57Write_4cyc_1L, A57WrBackOne],
4390b57cec5SDimitry Andric  (instregex "t2LDR(H|SH|SB)?_PRE")>;
4400b57cec5SDimitry Andric
4410b57cec5SDimitry Andric// LDRD pre-indexed: 5(2) cyc for reg, 4(1) cyc for imm.
4420b57cec5SDimitry Andricdef A57WriteLdrDAm3Pre : SchedWriteVariant<[
4430b57cec5SDimitry Andric  SchedVar<IsLdrAm3RegOffPredX3, [A57Write_5cyc_1I_1L]>,
4440b57cec5SDimitry Andric  SchedVar<NoSchedPred,          [A57Write_4cyc_1L_1I]>
4450b57cec5SDimitry Andric]>;
4460b57cec5SDimitry Andricdef A57WriteLdrDAm3PreWrBack : SchedWriteVariant<[
4470b57cec5SDimitry Andric  SchedVar<IsLdrAm3RegOffPredX3, [A57WrBackTwo]>,
4480b57cec5SDimitry Andric  SchedVar<NoSchedPred,          [A57WrBackOne]>
4490b57cec5SDimitry Andric]>;
4500b57cec5SDimitry Andricdef : InstRW<[A57WriteLdrDAm3Pre, A57WriteLdrDAm3Pre, A57WriteLdrDAm3PreWrBack],
4510b57cec5SDimitry Andric  (instregex "LDRD_PRE")>;
4520b57cec5SDimitry Andricdef : InstRW<[A57Write_4cyc_1L_1I, A57Write_4cyc_1L_1I, A57WrBackOne],
4530b57cec5SDimitry Andric  (instregex "t2LDRD_PRE")>;
4540b57cec5SDimitry Andric
4550b57cec5SDimitry Andric// --- LDR post-indexed ---
4560b57cec5SDimitry Andricdef : InstRW<[A57Write_4cyc_1L_1I, A57WrBackOne], (instregex "LDR(T?)_POST_IMM",
4570b57cec5SDimitry Andric  "LDRB(T?)_POST_IMM", "LDR(SB|H|SH)Ti", "t2LDRB_POST")>;
4580b57cec5SDimitry Andric
4590b57cec5SDimitry Andricdef A57WriteLdrAm3PostWrBack : SchedWriteVariant<[
4600b57cec5SDimitry Andric  SchedVar<IsLdrAm3RegOffPred, [A57WrBackTwo]>,
4610b57cec5SDimitry Andric  SchedVar<NoSchedPred,        [A57WrBackOne]>
4620b57cec5SDimitry Andric]>;
4630b57cec5SDimitry Andricdef : InstRW<[A57Write_4cyc_1L_1I, A57WriteLdrAm3PostWrBack],
4640b57cec5SDimitry Andric  (instregex "LDR(H|SH|SB)_POST")>;
4650b57cec5SDimitry Andricdef : InstRW<[A57Write_4cyc_1L, A57WrBackOne],
4660b57cec5SDimitry Andric  (instregex "t2LDR(H|SH|SB)?_POST")>;
4670b57cec5SDimitry Andric
4680b57cec5SDimitry Andricdef : InstRW<[A57Write_4cyc_1L_1I, A57WrBackTwo], (instregex "LDR_POST_REG",
4690b57cec5SDimitry Andric  "LDRB_POST_REG", "LDR(B?)T_POST$")>;
4700b57cec5SDimitry Andric
4710b57cec5SDimitry Andricdef A57WriteLdrTRegPost : SchedWriteVariant<[
472e8d8bef9SDimitry Andric  SchedVar<IsLdstsoScaledPredX2, [A57Write_4cyc_1I_1L_1M]>,
4730b57cec5SDimitry Andric  SchedVar<NoSchedPred,        [A57Write_4cyc_1L_1I]>
4740b57cec5SDimitry Andric]>;
4750b57cec5SDimitry Andricdef A57WriteLdrTRegPostWrBack : SchedWriteVariant<[
476e8d8bef9SDimitry Andric  SchedVar<IsLdstsoScaledPredX2, [A57WrBackThree]>,
4770b57cec5SDimitry Andric  SchedVar<NoSchedPred,        [A57WrBackTwo]>
4780b57cec5SDimitry Andric]>;
4790b57cec5SDimitry Andric// 4(3) "I0/I1,L,M" for scaled register, otherwise 4(2) "I0/I1,L"
4800b57cec5SDimitry Andricdef : InstRW<[A57WriteLdrTRegPost, A57WriteLdrTRegPostWrBack],
4810b57cec5SDimitry Andric  (instregex "LDRT_POST_REG", "LDRBT_POST_REG")>;
4820b57cec5SDimitry Andric
4830b57cec5SDimitry Andricdef : InstRW<[A57Write_4cyc_1L_1I, A57WrBackTwo], (instregex "LDR(SB|H|SH)Tr")>;
4840b57cec5SDimitry Andric
4850b57cec5SDimitry Andricdef A57WriteLdrAm3PostWrBackX3 : SchedWriteVariant<[
4860b57cec5SDimitry Andric  SchedVar<IsLdrAm3RegOffPredX3, [A57WrBackTwo]>,
4870b57cec5SDimitry Andric  SchedVar<NoSchedPred,          [A57WrBackOne]>
4880b57cec5SDimitry Andric]>;
4890b57cec5SDimitry Andric// LDRD post-indexed: 4(2) cyc for reg, 4(1) cyc for imm.
4900b57cec5SDimitry Andricdef : InstRW<[A57Write_4cyc_1L_1I, A57Write_4cyc_1L_1I,
4910b57cec5SDimitry Andric  A57WriteLdrAm3PostWrBackX3], (instregex "LDRD_POST")>;
4920b57cec5SDimitry Andricdef : InstRW<[A57Write_4cyc_1L_1I, A57Write_4cyc_1L_1I, A57WrBackOne],
4930b57cec5SDimitry Andric  (instregex "t2LDRD_POST")>;
4940b57cec5SDimitry Andric
4950b57cec5SDimitry Andric// --- Preload instructions ---
4960b57cec5SDimitry Andric// Preload, immed offset
4970b57cec5SDimitry Andricdef : InstRW<[A57Write_4cyc_1L], (instregex "(t2)?PLDi12", "(t2)?PLDWi12",
4980b57cec5SDimitry Andric  "t2PLDW?(i8|pci|s)", "(t2)?PLI")>;
4990b57cec5SDimitry Andric
5000b57cec5SDimitry Andric// Preload, register offset,
5010b57cec5SDimitry Andric// 5cyc "I0/I1,L" for minus reg or scaled not plus lsl2
5020b57cec5SDimitry Andric// otherwise 4cyc "L"
5030b57cec5SDimitry Andricdef A57WritePLD : SchedWriteVariant<[
5040b57cec5SDimitry Andric  SchedVar<IsLdstsoScaledNotOptimalPredX0, [A57Write_5cyc_1I_1L]>,
5050b57cec5SDimitry Andric  SchedVar<IsLdstsoMinusRegPredX0,         [A57Write_5cyc_1I_1L]>,
5060b57cec5SDimitry Andric  SchedVar<NoSchedPred,                    [A57Write_4cyc_1L]>
5070b57cec5SDimitry Andric]>;
5080b57cec5SDimitry Andricdef : InstRW<[A57WritePLD], (instregex "PLDrs", "PLDWrs")>;
5090b57cec5SDimitry Andric
5100b57cec5SDimitry Andric// --- Load multiple instructions ---
5110b57cec5SDimitry Andricforeach NumAddr = 1-8 in {
512e8d8bef9SDimitry Andric  def A57LMAddrPred#NumAddr : MCSchedPredicate<CheckAny<[
513e8d8bef9SDimitry Andric                                CheckNumOperands<!add(!shl(NumAddr, 1), 2)>,
514e8d8bef9SDimitry Andric                                CheckNumOperands<!add(!shl(NumAddr, 1), 3)>]>>;
515e8d8bef9SDimitry Andric  def A57LMAddrUpdPred#NumAddr : MCSchedPredicate<CheckAny<[
516e8d8bef9SDimitry Andric                                   CheckNumOperands<!add(!shl(NumAddr, 1), 3)>,
517e8d8bef9SDimitry Andric                                   CheckNumOperands<!add(!shl(NumAddr, 1), 4)>]>>;
5180b57cec5SDimitry Andric}
5190b57cec5SDimitry Andric
5200b57cec5SDimitry Andricdef A57LDMOpsListNoregin : A57WriteLMOpsListType<
5210b57cec5SDimitry Andric                [A57Write_3cyc_1L, A57Write_3cyc_1L,
5220b57cec5SDimitry Andric                 A57Write_4cyc_1L, A57Write_4cyc_1L,
5230b57cec5SDimitry Andric                 A57Write_5cyc_1L, A57Write_5cyc_1L,
5240b57cec5SDimitry Andric                 A57Write_6cyc_1L, A57Write_6cyc_1L,
5250b57cec5SDimitry Andric                 A57Write_7cyc_1L, A57Write_7cyc_1L,
5260b57cec5SDimitry Andric                 A57Write_8cyc_1L, A57Write_8cyc_1L,
5270b57cec5SDimitry Andric                 A57Write_9cyc_1L, A57Write_9cyc_1L,
5280b57cec5SDimitry Andric                 A57Write_10cyc_1L, A57Write_10cyc_1L]>;
5290b57cec5SDimitry Andricdef A57WriteLDMnoreginlist : SchedWriteVariant<[
5300b57cec5SDimitry Andric  SchedVar<A57LMAddrPred1,     A57LDMOpsListNoregin.Writes[0-1]>,
5310b57cec5SDimitry Andric  SchedVar<A57LMAddrPred2,     A57LDMOpsListNoregin.Writes[0-3]>,
5320b57cec5SDimitry Andric  SchedVar<A57LMAddrPred3,     A57LDMOpsListNoregin.Writes[0-5]>,
5330b57cec5SDimitry Andric  SchedVar<A57LMAddrPred4,     A57LDMOpsListNoregin.Writes[0-7]>,
5340b57cec5SDimitry Andric  SchedVar<A57LMAddrPred5,     A57LDMOpsListNoregin.Writes[0-9]>,
5350b57cec5SDimitry Andric  SchedVar<A57LMAddrPred6,     A57LDMOpsListNoregin.Writes[0-11]>,
5360b57cec5SDimitry Andric  SchedVar<A57LMAddrPred7,     A57LDMOpsListNoregin.Writes[0-13]>,
5370b57cec5SDimitry Andric  SchedVar<A57LMAddrPred8,     A57LDMOpsListNoregin.Writes[0-15]>,
5380b57cec5SDimitry Andric  SchedVar<NoSchedPred,        A57LDMOpsListNoregin.Writes[0-15]>
5390b57cec5SDimitry Andric]> { let Variadic=1; }
5400b57cec5SDimitry Andric
5410b57cec5SDimitry Andricdef A57LDMOpsListRegin : A57WriteLMOpsListType<
5420b57cec5SDimitry Andric                [A57Write_4cyc_1L_1I, A57Write_4cyc_1L_1I,
5430b57cec5SDimitry Andric                 A57Write_5cyc_1L_1I, A57Write_5cyc_1L_1I,
5440b57cec5SDimitry Andric                 A57Write_6cyc_1L_1I, A57Write_6cyc_1L_1I,
5450b57cec5SDimitry Andric                 A57Write_7cyc_1L_1I, A57Write_7cyc_1L_1I,
5460b57cec5SDimitry Andric                 A57Write_8cyc_1L_1I, A57Write_8cyc_1L_1I,
5470b57cec5SDimitry Andric                 A57Write_9cyc_1L_1I, A57Write_9cyc_1L_1I,
5480b57cec5SDimitry Andric                 A57Write_10cyc_1L_1I, A57Write_10cyc_1L_1I,
5490b57cec5SDimitry Andric                 A57Write_11cyc_1L_1I, A57Write_11cyc_1L_1I]>;
5500b57cec5SDimitry Andricdef A57WriteLDMreginlist : SchedWriteVariant<[
5510b57cec5SDimitry Andric  SchedVar<A57LMAddrPred1,     A57LDMOpsListRegin.Writes[0-1]>,
5520b57cec5SDimitry Andric  SchedVar<A57LMAddrPred2,     A57LDMOpsListRegin.Writes[0-3]>,
5530b57cec5SDimitry Andric  SchedVar<A57LMAddrPred3,     A57LDMOpsListRegin.Writes[0-5]>,
5540b57cec5SDimitry Andric  SchedVar<A57LMAddrPred4,     A57LDMOpsListRegin.Writes[0-7]>,
5550b57cec5SDimitry Andric  SchedVar<A57LMAddrPred5,     A57LDMOpsListRegin.Writes[0-9]>,
5560b57cec5SDimitry Andric  SchedVar<A57LMAddrPred6,     A57LDMOpsListRegin.Writes[0-11]>,
5570b57cec5SDimitry Andric  SchedVar<A57LMAddrPred7,     A57LDMOpsListRegin.Writes[0-13]>,
5580b57cec5SDimitry Andric  SchedVar<A57LMAddrPred8,     A57LDMOpsListRegin.Writes[0-15]>,
5590b57cec5SDimitry Andric  SchedVar<NoSchedPred,        A57LDMOpsListRegin.Writes[0-15]>
5600b57cec5SDimitry Andric]> { let Variadic=1; }
5610b57cec5SDimitry Andric
5620b57cec5SDimitry Andricdef A57LDMOpsList_Upd : A57WriteLMOpsListType<
5630b57cec5SDimitry Andric              [A57WrBackOne,
5640b57cec5SDimitry Andric               A57Write_3cyc_1L_1I, A57Write_3cyc_1L_1I,
5650b57cec5SDimitry Andric               A57Write_4cyc_1L_1I, A57Write_4cyc_1L_1I,
5660b57cec5SDimitry Andric               A57Write_5cyc_1L_1I, A57Write_5cyc_1L_1I,
5670b57cec5SDimitry Andric               A57Write_6cyc_1L_1I, A57Write_6cyc_1L_1I,
5680b57cec5SDimitry Andric               A57Write_7cyc_1L_1I, A57Write_7cyc_1L_1I,
5690b57cec5SDimitry Andric               A57Write_8cyc_1L_1I, A57Write_8cyc_1L_1I,
5700b57cec5SDimitry Andric               A57Write_9cyc_1L_1I, A57Write_9cyc_1L_1I,
5710b57cec5SDimitry Andric               A57Write_10cyc_1L_1I, A57Write_10cyc_1L_1I]>;
5720b57cec5SDimitry Andricdef A57WriteLDM_Upd : SchedWriteVariant<[
573e8d8bef9SDimitry Andric  SchedVar<A57LMAddrUpdPred1,     A57LDMOpsList_Upd.Writes[0-2]>,
574e8d8bef9SDimitry Andric  SchedVar<A57LMAddrUpdPred2,     A57LDMOpsList_Upd.Writes[0-4]>,
575e8d8bef9SDimitry Andric  SchedVar<A57LMAddrUpdPred3,     A57LDMOpsList_Upd.Writes[0-6]>,
576e8d8bef9SDimitry Andric  SchedVar<A57LMAddrUpdPred4,     A57LDMOpsList_Upd.Writes[0-8]>,
577e8d8bef9SDimitry Andric  SchedVar<A57LMAddrUpdPred5,     A57LDMOpsList_Upd.Writes[0-10]>,
578e8d8bef9SDimitry Andric  SchedVar<A57LMAddrUpdPred6,     A57LDMOpsList_Upd.Writes[0-12]>,
579e8d8bef9SDimitry Andric  SchedVar<A57LMAddrUpdPred7,     A57LDMOpsList_Upd.Writes[0-14]>,
580e8d8bef9SDimitry Andric  SchedVar<A57LMAddrUpdPred8,     A57LDMOpsList_Upd.Writes[0-16]>,
5810b57cec5SDimitry Andric  SchedVar<NoSchedPred,           A57LDMOpsList_Upd.Writes[0-16]>
5820b57cec5SDimitry Andric]> { let Variadic=1; }
5830b57cec5SDimitry Andric
5840b57cec5SDimitry Andricdef A57WriteLDM : SchedWriteVariant<[
585e8d8bef9SDimitry Andric  SchedVar<IsLDMBaseRegInListPred, [A57WriteLDMreginlist]>,
5860b57cec5SDimitry Andric  SchedVar<NoSchedPred,            [A57WriteLDMnoreginlist]>
5870b57cec5SDimitry Andric]> { let Variadic=1; }
5880b57cec5SDimitry Andric
5890b57cec5SDimitry Andricdef : InstRW<[A57WriteLDM], (instregex "(t|t2|sys)?LDM(IA|DA|DB|IB)$")>;
5900b57cec5SDimitry Andric
5910b57cec5SDimitry Andric// TODO: no writeback latency defined in documentation (implemented as 1 cyc)
5920b57cec5SDimitry Andricdef : InstRW<[A57WriteLDM_Upd],
5930b57cec5SDimitry Andric  (instregex "(t|t2|sys)?LDM(IA_UPD|DA_UPD|DB_UPD|IB_UPD|IA_RET)", "tPOP")>;
5940b57cec5SDimitry Andric
5950b57cec5SDimitry Andricdef : InstRW<[A57Write_5cyc_1L], (instregex "VLLDM")>;
5960b57cec5SDimitry Andric
5970b57cec5SDimitry Andric// --- 3.9 Store Instructions ---
5980b57cec5SDimitry Andric
5990b57cec5SDimitry Andric// Store, immed offset
6000b57cec5SDimitry Andricdef : InstRW<[A57Write_1cyc_1S], (instregex "STRi12", "STRBi12", "PICSTR",
6010b57cec5SDimitry Andric  "t2STR(B?)(T|i12|i8|s)", "t2STRDi8", "t2STRH(i12|i8|s)", "tSTR")>;
6020b57cec5SDimitry Andric
6030b57cec5SDimitry Andric// Store, register offset
6040b57cec5SDimitry Andric// For minus or for not plus lsl2 scaled we need 3cyc "I0/I1, S",
6050b57cec5SDimitry Andric// otherwise 1cyc S.
6060b57cec5SDimitry Andricdef A57WriteStrAmLDSTSO : SchedWriteVariant<[
6070b57cec5SDimitry Andric  SchedVar<IsLdstsoScaledNotOptimalPred, [A57Write_3cyc_1I_1S]>,
6080b57cec5SDimitry Andric  SchedVar<IsLdstsoMinusRegPred,         [A57Write_3cyc_1I_1S]>,
6090b57cec5SDimitry Andric  SchedVar<NoSchedPred,                  [A57Write_1cyc_1S]>
6100b57cec5SDimitry Andric]>;
6110b57cec5SDimitry Andricdef : InstRW<[A57WriteStrAmLDSTSO], (instregex "STRrs", "STRBrs")>;
6120b57cec5SDimitry Andric
6130b57cec5SDimitry Andric// STRH,STRD: 3cyc "I0/I1, S" for minus reg, 1cyc S for imm or for plus reg.
6140b57cec5SDimitry Andricdef A57WriteStrAm3 : SchedWriteVariant<[
6150b57cec5SDimitry Andric  SchedVar<IsLdrAm3NegRegOffPred, [A57Write_3cyc_1I_1S]>,
6160b57cec5SDimitry Andric  SchedVar<NoSchedPred,           [A57Write_1cyc_1S]>
6170b57cec5SDimitry Andric]>;
6180b57cec5SDimitry Andricdef : InstRW<[A57WriteStrAm3], (instregex "STRH$")>;
6190b57cec5SDimitry Andricdef A57WriteStrAm3X2 : SchedWriteVariant<[
6200b57cec5SDimitry Andric  SchedVar<IsLdrAm3NegRegOffPredX2, [A57Write_3cyc_1I_1S]>,
6210b57cec5SDimitry Andric  SchedVar<NoSchedPred,             [A57Write_1cyc_1S]>
6220b57cec5SDimitry Andric]>;
6230b57cec5SDimitry Andricdef : InstRW<[A57WriteStrAm3X2], (instregex "STRD$")>;
6240b57cec5SDimitry Andric
6250b57cec5SDimitry Andric// Store, immed pre-indexed (1cyc "S, I0/I1", 1cyc writeback)
6260b57cec5SDimitry Andricdef : InstRW<[A57WrBackOne, A57Write_1cyc_1S_1I], (instregex "STR_PRE_IMM",
6270b57cec5SDimitry Andric  "STRB_PRE_IMM", "STR(B)?(r|i)_preidx", "(t2)?STRH_(preidx|PRE)",
6280b57cec5SDimitry Andric  "t2STR(B?)_(PRE|preidx)", "t2STRD_PRE")>;
6290b57cec5SDimitry Andric
6300b57cec5SDimitry Andric// Store, register pre-indexed:
6310b57cec5SDimitry Andric// 1(1) "S, I0/I1" for plus reg
6320b57cec5SDimitry Andric// 3(2) "I0/I1, S" for minus reg
6330b57cec5SDimitry Andric// 1(2) "S, M" for scaled plus lsl2
6340b57cec5SDimitry Andric// 3(2) "I0/I1, S" for other scaled
6350b57cec5SDimitry Andricdef A57WriteStrAmLDSTSOPre : SchedWriteVariant<[
6360b57cec5SDimitry Andric  SchedVar<IsLdstsoScaledNotOptimalPredX2, [A57Write_3cyc_1I_1S]>,
6370b57cec5SDimitry Andric  SchedVar<IsLdstsoMinusRegPredX2,         [A57Write_3cyc_1I_1S]>,
6380b57cec5SDimitry Andric  SchedVar<IsLdstsoScaledPredX2,           [A57Write_1cyc_1S_1M]>,
6390b57cec5SDimitry Andric  SchedVar<NoSchedPred,                    [A57Write_1cyc_1S_1I]>
6400b57cec5SDimitry Andric]>;
6410b57cec5SDimitry Andricdef A57WriteStrAmLDSTSOPreWrBack : SchedWriteVariant<[
6420b57cec5SDimitry Andric  SchedVar<IsLdstsoScaledPredX2,           [A57WrBackTwo]>,
6430b57cec5SDimitry Andric  SchedVar<IsLdstsoMinusRegPredX2,         [A57WrBackTwo]>,
6440b57cec5SDimitry Andric  SchedVar<NoSchedPred,                    [A57WrBackOne]>
6450b57cec5SDimitry Andric]>;
6460b57cec5SDimitry Andricdef : InstRW<[A57WriteStrAmLDSTSOPreWrBack, A57WriteStrAmLDSTSOPre],
6470b57cec5SDimitry Andric  (instregex "STR_PRE_REG", "STRB_PRE_REG")>;
6480b57cec5SDimitry Andric
6490b57cec5SDimitry Andric// pre-indexed STRH/STRD (STRH_PRE, STRD_PRE)
6500b57cec5SDimitry Andric// 1(1) "S, I0/I1" for imm or reg plus
6510b57cec5SDimitry Andric// 3(2) "I0/I1, S" for reg minus
6520b57cec5SDimitry Andricdef A57WriteStrAm3PreX2 : SchedWriteVariant<[
6530b57cec5SDimitry Andric  SchedVar<IsLdrAm3NegRegOffPredX2, [A57Write_3cyc_1I_1S]>,
6540b57cec5SDimitry Andric  SchedVar<NoSchedPred,             [A57Write_1cyc_1S_1I]>
6550b57cec5SDimitry Andric]>;
6560b57cec5SDimitry Andricdef A57WriteStrAm3PreWrBackX2 : SchedWriteVariant<[
6570b57cec5SDimitry Andric  SchedVar<IsLdrAm3NegRegOffPredX2, [A57WrBackTwo]>,
6580b57cec5SDimitry Andric  SchedVar<NoSchedPred,             [A57WrBackOne]>
6590b57cec5SDimitry Andric]>;
6600b57cec5SDimitry Andricdef : InstRW<[A57WriteStrAm3PreWrBackX2, A57WriteStrAm3PreX2],
6610b57cec5SDimitry Andric  (instregex "STRH_PRE")>;
6620b57cec5SDimitry Andric
6630b57cec5SDimitry Andricdef A57WriteStrAm3PreX3 : SchedWriteVariant<[
6640b57cec5SDimitry Andric  SchedVar<IsLdrAm3NegRegOffPredX3, [A57Write_3cyc_1I_1S]>,
6650b57cec5SDimitry Andric  SchedVar<NoSchedPred,             [A57Write_1cyc_1S_1I]>
6660b57cec5SDimitry Andric]>;
6670b57cec5SDimitry Andricdef A57WriteStrAm3PreWrBackX3 : SchedWriteVariant<[
6680b57cec5SDimitry Andric  SchedVar<IsLdrAm3NegRegOffPredX3, [A57WrBackTwo]>,
6690b57cec5SDimitry Andric  SchedVar<NoSchedPred,             [A57WrBackOne]>
6700b57cec5SDimitry Andric]>;
6710b57cec5SDimitry Andricdef : InstRW<[A57WriteStrAm3PreWrBackX3, A57WriteStrAm3PreX3],
6720b57cec5SDimitry Andric  (instregex "STRD_PRE")>;
6730b57cec5SDimitry Andric
6740b57cec5SDimitry Andricdef : InstRW<[A57WrBackOne, A57Write_1cyc_1S_1I], (instregex "STR(T?)_POST_IMM",
6750b57cec5SDimitry Andric  "STRB(T?)_POST_IMM", "t2STR(B?)_POST")>;
6760b57cec5SDimitry Andric
6770b57cec5SDimitry Andric// 1(2) "S, M" for STR/STRB register post-indexed (both scaled or not)
6780b57cec5SDimitry Andricdef : InstRW<[A57WrBackTwo, A57Write_1cyc_1S_1M], (instregex "STR(T?)_POST_REG",
6790b57cec5SDimitry Andric  "STRB(T?)_POST_REG", "STR(B?)T_POST$")>;
6800b57cec5SDimitry Andric
6810b57cec5SDimitry Andric// post-indexed STRH/STRD(STRH_POST, STRD_POST), STRHTi, STRHTr
6820b57cec5SDimitry Andric// 1(1) "S, I0/I1" both for reg or imm
6830b57cec5SDimitry Andricdef : InstRW<[A57WrBackOne, A57Write_1cyc_1S_1I],
6840b57cec5SDimitry Andric  (instregex "(t2)?STR(H|D)_POST", "STRHT(i|r)", "t2STRHT")>;
6850b57cec5SDimitry Andric
6860b57cec5SDimitry Andric// --- Store multiple instructions ---
6870b57cec5SDimitry Andric// TODO: no writeback latency defined in documentation
6880b57cec5SDimitry Andricdef A57WriteSTM : SchedWriteVariant<[
6890b57cec5SDimitry Andric    SchedVar<A57LMAddrPred1, [A57Write_1cyc_1S]>,
6900b57cec5SDimitry Andric    SchedVar<A57LMAddrPred2, [A57Write_2cyc_1S]>,
6910b57cec5SDimitry Andric    SchedVar<A57LMAddrPred3, [A57Write_3cyc_1S]>,
6920b57cec5SDimitry Andric    SchedVar<A57LMAddrPred4, [A57Write_4cyc_1S]>,
6930b57cec5SDimitry Andric    SchedVar<A57LMAddrPred5, [A57Write_5cyc_1S]>,
6940b57cec5SDimitry Andric    SchedVar<A57LMAddrPred6, [A57Write_6cyc_1S]>,
6950b57cec5SDimitry Andric    SchedVar<A57LMAddrPred7, [A57Write_7cyc_1S]>,
6960b57cec5SDimitry Andric    SchedVar<A57LMAddrPred8, [A57Write_8cyc_1S]>,
6970b57cec5SDimitry Andric    SchedVar<NoSchedPred,    [A57Write_2cyc_1S]>
6980b57cec5SDimitry Andric]>;
6990b57cec5SDimitry Andricdef A57WriteSTM_Upd : SchedWriteVariant<[
7000b57cec5SDimitry Andric    SchedVar<A57LMAddrPred1, [A57Write_1cyc_1S_1I]>,
7010b57cec5SDimitry Andric    SchedVar<A57LMAddrPred2, [A57Write_2cyc_1S_1I]>,
7020b57cec5SDimitry Andric    SchedVar<A57LMAddrPred3, [A57Write_3cyc_1S_1I]>,
7030b57cec5SDimitry Andric    SchedVar<A57LMAddrPred4, [A57Write_4cyc_1S_1I]>,
7040b57cec5SDimitry Andric    SchedVar<A57LMAddrPred5, [A57Write_5cyc_1S_1I]>,
7050b57cec5SDimitry Andric    SchedVar<A57LMAddrPred6, [A57Write_6cyc_1S_1I]>,
7060b57cec5SDimitry Andric    SchedVar<A57LMAddrPred7, [A57Write_7cyc_1S_1I]>,
7070b57cec5SDimitry Andric    SchedVar<A57LMAddrPred8, [A57Write_8cyc_1S_1I]>,
7080b57cec5SDimitry Andric    SchedVar<NoSchedPred,    [A57Write_2cyc_1S_1I]>
7090b57cec5SDimitry Andric]>;
7100b57cec5SDimitry Andric
7110b57cec5SDimitry Andricdef : InstRW<[A57WriteSTM], (instregex "(t2|sys|t)?STM(IA|DA|DB|IB)$")>;
7120b57cec5SDimitry Andricdef : InstRW<[A57WrBackOne, A57WriteSTM_Upd],
7130b57cec5SDimitry Andric  (instregex "(t2|sys|t)?STM(IA_UPD|DA_UPD|DB_UPD|IB_UPD)", "tPUSH")>;
7140b57cec5SDimitry Andric
7150b57cec5SDimitry Andricdef : InstRW<[A57Write_5cyc_1S], (instregex "VLSTM")>;
7160b57cec5SDimitry Andric
7170b57cec5SDimitry Andric// --- 3.10 FP Data Processing Instructions ---
7180b57cec5SDimitry Andricdef : SchedAlias<WriteFPALU32, A57Write_5cyc_1V>;
7190b57cec5SDimitry Andricdef : SchedAlias<WriteFPALU64, A57Write_5cyc_1V>;
7200b57cec5SDimitry Andric
7210b57cec5SDimitry Andricdef : InstRW<[A57Write_3cyc_1V], (instregex "VABS(S|D|H)")>;
7220b57cec5SDimitry Andric
7230b57cec5SDimitry Andric// fp compare - 3cyc F1 for unconditional, 6cyc "F0/F1, F1" for conditional
7240b57cec5SDimitry Andricdef A57WriteVcmp : SchedWriteVariant<[
7250b57cec5SDimitry Andric  SchedVar<IsPredicatedPred, [A57Write_6cyc_1V_1X]>,
7260b57cec5SDimitry Andric  SchedVar<NoSchedPred,      [A57Write_3cyc_1X]>
7270b57cec5SDimitry Andric]>;
7280b57cec5SDimitry Andricdef : InstRW<[A57WriteVcmp],
7290b57cec5SDimitry Andric  (instregex "VCMP(D|S|H|ZD|ZS|ZH)$", "VCMPE(D|S|H|ZD|ZS|ZH)")>;
7300b57cec5SDimitry Andric
7310b57cec5SDimitry Andric// fp convert
7320b57cec5SDimitry Andricdef : InstRW<[A57Write_5cyc_1V], (instregex
7330b57cec5SDimitry Andric  "VCVT(A|N|P|M)(SH|UH|SS|US|SD|UD)", "VCVT(BDH|THD|TDH)")>;
7340b57cec5SDimitry Andricdef : InstRW<[A57Write_5cyc_1V], (instregex "VTOSLS", "VTOUHS", "VTOULS")>;
7350b57cec5SDimitry Andricdef : SchedAlias<WriteFPCVT, A57Write_5cyc_1V>;
7360b57cec5SDimitry Andric
7370b57cec5SDimitry Andricdef : InstRW<[A57Write_5cyc_1V], (instregex "VJCVT")>;
7380b57cec5SDimitry Andric
7390b57cec5SDimitry Andric// FP round to integral
7400b57cec5SDimitry Andricdef : InstRW<[A57Write_5cyc_1V], (instregex "VRINT(A|N|P|M|Z|R|X)(H|S|D)$")>;
7410b57cec5SDimitry Andric
7420b57cec5SDimitry Andric// FP divide, FP square root
7430b57cec5SDimitry Andricdef : SchedAlias<WriteFPDIV32, A57Write_17cyc_1W>;
7440b57cec5SDimitry Andricdef : SchedAlias<WriteFPDIV64, A57Write_32cyc_1W>;
7450b57cec5SDimitry Andricdef : SchedAlias<WriteFPSQRT32, A57Write_17cyc_1W>;
7460b57cec5SDimitry Andricdef : SchedAlias<WriteFPSQRT64, A57Write_32cyc_1W>;
7470b57cec5SDimitry Andric
7480b57cec5SDimitry Andricdef : InstRW<[A57Write_17cyc_1W], (instregex "VSQRTH")>;
7490b57cec5SDimitry Andric
7500b57cec5SDimitry Andric// FP max/min
7510b57cec5SDimitry Andricdef : InstRW<[A57Write_5cyc_1V], (instregex "VMAX", "VMIN")>;
7520b57cec5SDimitry Andric
7530b57cec5SDimitry Andric// FP multiply-accumulate pipelines support late forwarding of the result
7540b57cec5SDimitry Andric// from FP multiply μops to the accumulate operands of an
7550b57cec5SDimitry Andric// FP multiply-accumulate μop. The latter can potentially be issued 1 cycle
7560b57cec5SDimitry Andric// after the FP multiply μop has been issued
7570b57cec5SDimitry Andric// FP multiply, FZ
7580b57cec5SDimitry Andricdef A57WriteVMUL : SchedWriteRes<[A57UnitV]> { let Latency = 5; }
7590b57cec5SDimitry Andric
7600b57cec5SDimitry Andricdef : SchedAlias<WriteFPMUL32, A57WriteVMUL>;
7610b57cec5SDimitry Andricdef : SchedAlias<WriteFPMUL64, A57WriteVMUL>;
7620b57cec5SDimitry Andricdef : ReadAdvance<ReadFPMUL, 0>;
7630b57cec5SDimitry Andric
7640b57cec5SDimitry Andric// FP multiply accumulate, FZ: 9cyc "F0/F1" or 4 cyc for sequenced accumulate
7650b57cec5SDimitry Andric// VFMA, VFMS, VFNMA, VFNMS, VMLA, VMLS, VNMLA, VNMLS
7660b57cec5SDimitry Andricdef A57WriteVFMA : SchedWriteRes<[A57UnitV]> { let Latency = 9;  }
7670b57cec5SDimitry Andric
7680b57cec5SDimitry Andric// VFMA takes 9 cyc for common case and 4 cyc for VFMA->VFMA chain (5 read adv.)
7690b57cec5SDimitry Andric// VMUL takes 5 cyc for common case and 1 cyc for VMUL->VFMA chain (4 read adv.)
7700b57cec5SDimitry Andric// Currently, there is no way to define different read advances for VFMA operand
7710b57cec5SDimitry Andric// from VFMA or from VMUL, so there will be 5 read advance.
7720b57cec5SDimitry Andric// Zero latency (instead of one) for VMUL->VFMA shouldn't break something.
7730b57cec5SDimitry Andric// The same situation with ASIMD VMUL/VFMA instructions
7740b57cec5SDimitry Andric// def A57ReadVFMA : SchedRead;
7750b57cec5SDimitry Andric// def : ReadAdvance<A57ReadVFMA, 5, [A57WriteVFMA]>;
7760b57cec5SDimitry Andric// def : ReadAdvance<A57ReadVFMA, 4, [A57WriteVMUL]>;
7770b57cec5SDimitry Andricdef A57ReadVFMA5 : SchedReadAdvance<5, [A57WriteVFMA, A57WriteVMUL]>;
7780b57cec5SDimitry Andric
7790b57cec5SDimitry Andricdef : SchedAlias<WriteFPMAC32, A57WriteVFMA>;
7800b57cec5SDimitry Andricdef : SchedAlias<WriteFPMAC64, A57WriteVFMA>;
7810b57cec5SDimitry Andricdef : SchedAlias<ReadFPMAC, A57ReadVFMA5>;
7820b57cec5SDimitry Andric
7830b57cec5SDimitry Andric// VMLAH/VMLSH are not binded to scheduling classes by default, so here custom:
7840b57cec5SDimitry Andricdef : InstRW<[A57WriteVFMA, A57ReadVFMA5, ReadFPMUL, ReadFPMUL],
7850b57cec5SDimitry Andric  (instregex "VMLAH", "VMLSH", "VNMLAH", "VNMLSH")>;
7860b57cec5SDimitry Andric
7870b57cec5SDimitry Andricdef : InstRW<[A57WriteVMUL],
7880b57cec5SDimitry Andric  (instregex "VUDOTD", "VSDOTD", "VUDOTQ", "VSDOTQ")>;
7890b57cec5SDimitry Andric
7900b57cec5SDimitry Andricdef : InstRW<[A57Write_3cyc_1V], (instregex "VNEG")>;
7910b57cec5SDimitry Andricdef : InstRW<[A57Write_3cyc_1V], (instregex "VSEL")>;
7920b57cec5SDimitry Andric
7930b57cec5SDimitry Andric// --- 3.11 FP Miscellaneous Instructions ---
7940b57cec5SDimitry Andric// VMOV: 3cyc "F0/F1" for imm/reg
7950b57cec5SDimitry Andricdef : InstRW<[A57Write_3cyc_1V], (instregex "FCONST(D|S|H)")>;
7960b57cec5SDimitry Andricdef : InstRW<[A57Write_3cyc_1V], (instregex "VMOV(D|S|H)(cc)?$")>;
7970b57cec5SDimitry Andric
7980b57cec5SDimitry Andricdef : InstRW<[A57Write_3cyc_1V], (instregex "VINSH")>;
7990b57cec5SDimitry Andric
8000b57cec5SDimitry Andric// 5cyc L for FP transfer, vfp to core reg,
8010b57cec5SDimitry Andric// 5cyc L for FP transfer, core reg to vfp
8020b57cec5SDimitry Andricdef : SchedAlias<WriteFPMOV, A57Write_5cyc_1L>;
8030b57cec5SDimitry Andric// VMOVRRS/VMOVRRD in common code declared with one WriteFPMOV (instead of 2).
8040b57cec5SDimitry Andricdef : InstRW<[A57Write_5cyc_1L, A57Write_5cyc_1L], (instregex "VMOV(RRS|RRD)")>;
8050b57cec5SDimitry Andric
8060b57cec5SDimitry Andric// 8cyc "L,F0/F1" for FP transfer, core reg to upper or lower half of vfp D-reg
8070b57cec5SDimitry Andricdef : InstRW<[A57Write_8cyc_1L_1I], (instregex "VMOVDRR")>;
8080b57cec5SDimitry Andric
8090b57cec5SDimitry Andric// --- 3.12 FP Load Instructions ---
8100b57cec5SDimitry Andricdef : InstRW<[A57Write_5cyc_1L], (instregex "VLDR(D|S|H)")>;
8110b57cec5SDimitry Andric
8120b57cec5SDimitry Andricdef : InstRW<[A57Write_5cyc_1L], (instregex "VLDMQIA$")>;
8130b57cec5SDimitry Andric
8140b57cec5SDimitry Andric// FP load multiple (VLDM)
8150b57cec5SDimitry Andric
8160b57cec5SDimitry Andricdef A57VLDMOpsListUncond : A57WriteLMOpsListType<
8170b57cec5SDimitry Andric               [A57Write_5cyc_1L, A57Write_5cyc_1L,
8180b57cec5SDimitry Andric                A57Write_6cyc_1L, A57Write_6cyc_1L,
8190b57cec5SDimitry Andric                A57Write_7cyc_1L, A57Write_7cyc_1L,
8200b57cec5SDimitry Andric                A57Write_8cyc_1L, A57Write_8cyc_1L,
8210b57cec5SDimitry Andric                A57Write_9cyc_1L, A57Write_9cyc_1L,
8220b57cec5SDimitry Andric                A57Write_10cyc_1L, A57Write_10cyc_1L,
8230b57cec5SDimitry Andric                A57Write_11cyc_1L, A57Write_11cyc_1L,
8240b57cec5SDimitry Andric                A57Write_12cyc_1L, A57Write_12cyc_1L]>;
8250b57cec5SDimitry Andricdef A57WriteVLDMuncond : SchedWriteVariant<[
8260b57cec5SDimitry Andric  SchedVar<A57LMAddrPred1,  A57VLDMOpsListUncond.Writes[0-1]>,
8270b57cec5SDimitry Andric  SchedVar<A57LMAddrPred2,  A57VLDMOpsListUncond.Writes[0-3]>,
8280b57cec5SDimitry Andric  SchedVar<A57LMAddrPred3,  A57VLDMOpsListUncond.Writes[0-5]>,
8290b57cec5SDimitry Andric  SchedVar<A57LMAddrPred4,  A57VLDMOpsListUncond.Writes[0-7]>,
8300b57cec5SDimitry Andric  SchedVar<A57LMAddrPred5,  A57VLDMOpsListUncond.Writes[0-9]>,
8310b57cec5SDimitry Andric  SchedVar<A57LMAddrPred6,  A57VLDMOpsListUncond.Writes[0-11]>,
8320b57cec5SDimitry Andric  SchedVar<A57LMAddrPred7,  A57VLDMOpsListUncond.Writes[0-13]>,
8330b57cec5SDimitry Andric  SchedVar<NoSchedPred,     A57VLDMOpsListUncond.Writes[0-15]>
8340b57cec5SDimitry Andric]> { let Variadic=1; }
8350b57cec5SDimitry Andric
8360b57cec5SDimitry Andricdef A57VLDMOpsListCond : A57WriteLMOpsListType<
8370b57cec5SDimitry Andric               [A57Write_5cyc_1L, A57Write_6cyc_1L,
8380b57cec5SDimitry Andric                A57Write_7cyc_1L, A57Write_8cyc_1L,
8390b57cec5SDimitry Andric                A57Write_9cyc_1L, A57Write_10cyc_1L,
8400b57cec5SDimitry Andric                A57Write_11cyc_1L, A57Write_12cyc_1L,
8410b57cec5SDimitry Andric                A57Write_13cyc_1L, A57Write_14cyc_1L,
8420b57cec5SDimitry Andric                A57Write_15cyc_1L, A57Write_16cyc_1L,
8430b57cec5SDimitry Andric                A57Write_17cyc_1L, A57Write_18cyc_1L,
8440b57cec5SDimitry Andric                A57Write_19cyc_1L, A57Write_20cyc_1L]>;
8450b57cec5SDimitry Andricdef A57WriteVLDMcond : SchedWriteVariant<[
8460b57cec5SDimitry Andric  SchedVar<A57LMAddrPred1,  A57VLDMOpsListCond.Writes[0-1]>,
8470b57cec5SDimitry Andric  SchedVar<A57LMAddrPred2,  A57VLDMOpsListCond.Writes[0-3]>,
8480b57cec5SDimitry Andric  SchedVar<A57LMAddrPred3,  A57VLDMOpsListCond.Writes[0-5]>,
8490b57cec5SDimitry Andric  SchedVar<A57LMAddrPred4,  A57VLDMOpsListCond.Writes[0-7]>,
8500b57cec5SDimitry Andric  SchedVar<A57LMAddrPred5,  A57VLDMOpsListCond.Writes[0-9]>,
8510b57cec5SDimitry Andric  SchedVar<A57LMAddrPred6,  A57VLDMOpsListCond.Writes[0-11]>,
8520b57cec5SDimitry Andric  SchedVar<A57LMAddrPred7,  A57VLDMOpsListCond.Writes[0-13]>,
8530b57cec5SDimitry Andric  SchedVar<NoSchedPred,     A57VLDMOpsListCond.Writes[0-15]>
8540b57cec5SDimitry Andric]> { let Variadic=1; }
8550b57cec5SDimitry Andric
8560b57cec5SDimitry Andricdef A57WriteVLDM : SchedWriteVariant<[
8570b57cec5SDimitry Andric  SchedVar<IsPredicatedPred, [A57WriteVLDMcond]>,
8580b57cec5SDimitry Andric  SchedVar<NoSchedPred,      [A57WriteVLDMuncond]>
8590b57cec5SDimitry Andric]> { let Variadic=1; }
8600b57cec5SDimitry Andric
8610b57cec5SDimitry Andricdef : InstRW<[A57WriteVLDM], (instregex "VLDM(DIA|SIA)$")>;
8620b57cec5SDimitry Andric
8630b57cec5SDimitry Andricdef A57VLDMOpsListUncond_Upd : A57WriteLMOpsListType<
8640b57cec5SDimitry Andric               [A57Write_5cyc_1L_1I, A57Write_5cyc_1L_1I,
8650b57cec5SDimitry Andric                A57Write_6cyc_1L_1I, A57Write_6cyc_1L_1I,
8660b57cec5SDimitry Andric                A57Write_7cyc_1L_1I, A57Write_7cyc_1L_1I,
8670b57cec5SDimitry Andric                A57Write_8cyc_1L_1I, A57Write_8cyc_1L_1I,
8680b57cec5SDimitry Andric                A57Write_9cyc_1L_1I, A57Write_9cyc_1L_1I,
8690b57cec5SDimitry Andric                A57Write_10cyc_1L_1I, A57Write_10cyc_1L_1I,
8700b57cec5SDimitry Andric                A57Write_11cyc_1L_1I, A57Write_11cyc_1L_1I,
8710b57cec5SDimitry Andric                A57Write_12cyc_1L_1I, A57Write_12cyc_1L_1I]>;
8720b57cec5SDimitry Andricdef A57WriteVLDMuncond_UPD : SchedWriteVariant<[
8730b57cec5SDimitry Andric  SchedVar<A57LMAddrPred1,  A57VLDMOpsListUncond_Upd.Writes[0-1]>,
8740b57cec5SDimitry Andric  SchedVar<A57LMAddrPred2,  A57VLDMOpsListUncond_Upd.Writes[0-3]>,
8750b57cec5SDimitry Andric  SchedVar<A57LMAddrPred3,  A57VLDMOpsListUncond_Upd.Writes[0-5]>,
8760b57cec5SDimitry Andric  SchedVar<A57LMAddrPred4,  A57VLDMOpsListUncond_Upd.Writes[0-7]>,
8770b57cec5SDimitry Andric  SchedVar<A57LMAddrPred5,  A57VLDMOpsListUncond_Upd.Writes[0-9]>,
8780b57cec5SDimitry Andric  SchedVar<A57LMAddrPred6,  A57VLDMOpsListUncond_Upd.Writes[0-11]>,
8790b57cec5SDimitry Andric  SchedVar<A57LMAddrPred7,  A57VLDMOpsListUncond_Upd.Writes[0-13]>,
8800b57cec5SDimitry Andric  SchedVar<NoSchedPred,     A57VLDMOpsListUncond_Upd.Writes[0-15]>
8810b57cec5SDimitry Andric]> { let Variadic=1; }
8820b57cec5SDimitry Andric
8830b57cec5SDimitry Andricdef A57VLDMOpsListCond_Upd : A57WriteLMOpsListType<
8840b57cec5SDimitry Andric               [A57Write_5cyc_1L_1I, A57Write_6cyc_1L_1I,
8850b57cec5SDimitry Andric                A57Write_7cyc_1L_1I, A57Write_8cyc_1L_1I,
8860b57cec5SDimitry Andric                A57Write_9cyc_1L_1I, A57Write_10cyc_1L_1I,
8870b57cec5SDimitry Andric                A57Write_11cyc_1L_1I, A57Write_12cyc_1L_1I,
8880b57cec5SDimitry Andric                A57Write_13cyc_1L_1I, A57Write_14cyc_1L_1I,
8890b57cec5SDimitry Andric                A57Write_15cyc_1L_1I, A57Write_16cyc_1L_1I,
8900b57cec5SDimitry Andric                A57Write_17cyc_1L_1I, A57Write_18cyc_1L_1I,
8910b57cec5SDimitry Andric                A57Write_19cyc_1L_1I, A57Write_20cyc_1L_1I]>;
8920b57cec5SDimitry Andricdef A57WriteVLDMcond_UPD : SchedWriteVariant<[
8930b57cec5SDimitry Andric  SchedVar<A57LMAddrPred1,  A57VLDMOpsListCond_Upd.Writes[0-1]>,
8940b57cec5SDimitry Andric  SchedVar<A57LMAddrPred2,  A57VLDMOpsListCond_Upd.Writes[0-3]>,
8950b57cec5SDimitry Andric  SchedVar<A57LMAddrPred3,  A57VLDMOpsListCond_Upd.Writes[0-5]>,
8960b57cec5SDimitry Andric  SchedVar<A57LMAddrPred4,  A57VLDMOpsListCond_Upd.Writes[0-7]>,
8970b57cec5SDimitry Andric  SchedVar<A57LMAddrPred5,  A57VLDMOpsListCond_Upd.Writes[0-9]>,
8980b57cec5SDimitry Andric  SchedVar<A57LMAddrPred6,  A57VLDMOpsListCond_Upd.Writes[0-11]>,
8990b57cec5SDimitry Andric  SchedVar<A57LMAddrPred7,  A57VLDMOpsListCond_Upd.Writes[0-13]>,
9000b57cec5SDimitry Andric  SchedVar<NoSchedPred,     A57VLDMOpsListCond_Upd.Writes[0-15]>
9010b57cec5SDimitry Andric]> { let Variadic=1; }
9020b57cec5SDimitry Andric
9030b57cec5SDimitry Andricdef A57WriteVLDM_UPD : SchedWriteVariant<[
9040b57cec5SDimitry Andric  SchedVar<IsPredicatedPred, [A57WriteVLDMcond_UPD]>,
9050b57cec5SDimitry Andric  SchedVar<NoSchedPred,      [A57WriteVLDMuncond_UPD]>
9060b57cec5SDimitry Andric]> { let Variadic=1; }
9070b57cec5SDimitry Andric
9080b57cec5SDimitry Andricdef : InstRW<[A57WrBackOne, A57WriteVLDM_UPD],
9090b57cec5SDimitry Andric  (instregex "VLDM(DIA_UPD|DDB_UPD|SIA_UPD|SDB_UPD)")>;
9100b57cec5SDimitry Andric
9110b57cec5SDimitry Andric// --- 3.13 FP Store Instructions ---
9120b57cec5SDimitry Andricdef : InstRW<[A57Write_1cyc_1S], (instregex "VSTR(D|S|H)")>;
9130b57cec5SDimitry Andric
9140b57cec5SDimitry Andricdef : InstRW<[A57Write_2cyc_1S], (instregex "VSTMQIA$")>;
9150b57cec5SDimitry Andric
9160b57cec5SDimitry Andricdef A57WriteVSTMs : SchedWriteVariant<[
9170b57cec5SDimitry Andric    SchedVar<A57LMAddrPred1, [A57Write_1cyc_1S]>,
9180b57cec5SDimitry Andric    SchedVar<A57LMAddrPred2, [A57Write_2cyc_1S]>,
9190b57cec5SDimitry Andric    SchedVar<A57LMAddrPred3, [A57Write_3cyc_1S]>,
9200b57cec5SDimitry Andric    SchedVar<A57LMAddrPred4, [A57Write_4cyc_1S]>,
9210b57cec5SDimitry Andric    SchedVar<A57LMAddrPred5, [A57Write_5cyc_1S]>,
9220b57cec5SDimitry Andric    SchedVar<A57LMAddrPred6, [A57Write_6cyc_1S]>,
9230b57cec5SDimitry Andric    SchedVar<A57LMAddrPred7, [A57Write_7cyc_1S]>,
9240b57cec5SDimitry Andric    SchedVar<A57LMAddrPred8, [A57Write_8cyc_1S]>,
9250b57cec5SDimitry Andric    SchedVar<NoSchedPred,    [A57Write_2cyc_1S]>
9260b57cec5SDimitry Andric]>;
9270b57cec5SDimitry Andricdef A57WriteVSTMd : SchedWriteVariant<[
9280b57cec5SDimitry Andric    SchedVar<A57LMAddrPred1, [A57Write_2cyc_1S]>,
9290b57cec5SDimitry Andric    SchedVar<A57LMAddrPred2, [A57Write_4cyc_1S]>,
9300b57cec5SDimitry Andric    SchedVar<A57LMAddrPred3, [A57Write_6cyc_1S]>,
9310b57cec5SDimitry Andric    SchedVar<A57LMAddrPred4, [A57Write_8cyc_1S]>,
9320b57cec5SDimitry Andric    SchedVar<A57LMAddrPred5, [A57Write_10cyc_1S]>,
9330b57cec5SDimitry Andric    SchedVar<A57LMAddrPred6, [A57Write_12cyc_1S]>,
9340b57cec5SDimitry Andric    SchedVar<A57LMAddrPred7, [A57Write_14cyc_1S]>,
9350b57cec5SDimitry Andric    SchedVar<A57LMAddrPred8, [A57Write_16cyc_1S]>,
9360b57cec5SDimitry Andric    SchedVar<NoSchedPred,    [A57Write_4cyc_1S]>
9370b57cec5SDimitry Andric]>;
9380b57cec5SDimitry Andricdef A57WriteVSTMs_Upd : SchedWriteVariant<[
9390b57cec5SDimitry Andric    SchedVar<A57LMAddrPred1, [A57Write_1cyc_1S_1I]>,
9400b57cec5SDimitry Andric    SchedVar<A57LMAddrPred2, [A57Write_2cyc_1S_1I]>,
9410b57cec5SDimitry Andric    SchedVar<A57LMAddrPred3, [A57Write_3cyc_1S_1I]>,
9420b57cec5SDimitry Andric    SchedVar<A57LMAddrPred4, [A57Write_4cyc_1S_1I]>,
9430b57cec5SDimitry Andric    SchedVar<A57LMAddrPred5, [A57Write_5cyc_1S_1I]>,
9440b57cec5SDimitry Andric    SchedVar<A57LMAddrPred6, [A57Write_6cyc_1S_1I]>,
9450b57cec5SDimitry Andric    SchedVar<A57LMAddrPred7, [A57Write_7cyc_1S_1I]>,
9460b57cec5SDimitry Andric    SchedVar<A57LMAddrPred8, [A57Write_8cyc_1S_1I]>,
9470b57cec5SDimitry Andric    SchedVar<NoSchedPred,    [A57Write_2cyc_1S_1I]>
9480b57cec5SDimitry Andric]>;
9490b57cec5SDimitry Andricdef A57WriteVSTMd_Upd : SchedWriteVariant<[
9500b57cec5SDimitry Andric    SchedVar<A57LMAddrPred1, [A57Write_2cyc_1S_1I]>,
9510b57cec5SDimitry Andric    SchedVar<A57LMAddrPred2, [A57Write_4cyc_1S_1I]>,
9520b57cec5SDimitry Andric    SchedVar<A57LMAddrPred3, [A57Write_6cyc_1S_1I]>,
9530b57cec5SDimitry Andric    SchedVar<A57LMAddrPred4, [A57Write_8cyc_1S_1I]>,
9540b57cec5SDimitry Andric    SchedVar<A57LMAddrPred5, [A57Write_10cyc_1S_1I]>,
9550b57cec5SDimitry Andric    SchedVar<A57LMAddrPred6, [A57Write_12cyc_1S_1I]>,
9560b57cec5SDimitry Andric    SchedVar<A57LMAddrPred7, [A57Write_14cyc_1S_1I]>,
9570b57cec5SDimitry Andric    SchedVar<A57LMAddrPred8, [A57Write_16cyc_1S_1I]>,
9580b57cec5SDimitry Andric    SchedVar<NoSchedPred,    [A57Write_2cyc_1S_1I]>
9590b57cec5SDimitry Andric]>;
9600b57cec5SDimitry Andric
9610b57cec5SDimitry Andricdef : InstRW<[A57WriteVSTMs], (instregex "VSTMSIA$")>;
9620b57cec5SDimitry Andricdef : InstRW<[A57WriteVSTMd], (instregex "VSTMDIA$")>;
9630b57cec5SDimitry Andricdef : InstRW<[A57WrBackOne, A57WriteVSTMs_Upd],
9640b57cec5SDimitry Andric  (instregex "VSTM(SIA_UPD|SDB_UPD)")>;
9650b57cec5SDimitry Andricdef : InstRW<[A57WrBackOne, A57WriteVSTMd_Upd],
9660b57cec5SDimitry Andric  (instregex "VSTM(DIA_UPD|DDB_UPD)")>;
9670b57cec5SDimitry Andric
9680b57cec5SDimitry Andric// --- 3.14 ASIMD Integer Instructions ---
9690b57cec5SDimitry Andric
9700b57cec5SDimitry Andric// ASIMD absolute diff, 3cyc F0/F1 for integer VABD
9710b57cec5SDimitry Andricdef : InstRW<[A57Write_3cyc_1V], (instregex "VABD(s|u)")>;
9720b57cec5SDimitry Andric
9730b57cec5SDimitry Andric// ASIMD absolute diff accum: 4(1) F1 for D-form, 5(2) F1 for Q-form
9740b57cec5SDimitry Andricdef A57WriteVABAD : SchedWriteRes<[A57UnitX]> { let Latency = 4; }
9750b57cec5SDimitry Andricdef A57ReadVABAD  : SchedReadAdvance<3, [A57WriteVABAD]>;
9760b57cec5SDimitry Andricdef : InstRW<[A57WriteVABAD, A57ReadVABAD],
9770b57cec5SDimitry Andric  (instregex "VABA(s|u)(v8i8|v4i16|v2i32)")>;
9780b57cec5SDimitry Andricdef A57WriteVABAQ : SchedWriteRes<[A57UnitX]> { let Latency = 5; }
9790b57cec5SDimitry Andricdef A57ReadVABAQ  : SchedReadAdvance<3, [A57WriteVABAQ]>;
9800b57cec5SDimitry Andricdef : InstRW<[A57WriteVABAQ, A57ReadVABAQ],
9810b57cec5SDimitry Andric  (instregex "VABA(s|u)(v16i8|v8i16|v4i32)")>;
9820b57cec5SDimitry Andric
9830b57cec5SDimitry Andric// ASIMD absolute diff accum long: 4(1) F1 for VABAL
9840b57cec5SDimitry Andricdef A57WriteVABAL : SchedWriteRes<[A57UnitX]> { let Latency = 4; }
9850b57cec5SDimitry Andricdef A57ReadVABAL  : SchedReadAdvance<3, [A57WriteVABAL]>;
9860b57cec5SDimitry Andricdef : InstRW<[A57WriteVABAL, A57ReadVABAL], (instregex "VABAL(s|u)")>;
9870b57cec5SDimitry Andric
9880b57cec5SDimitry Andric// ASIMD absolute diff long: 3cyc F0/F1 for VABDL
9890b57cec5SDimitry Andricdef : InstRW<[A57Write_3cyc_1V], (instregex "VABDL(s|u)")>;
9900b57cec5SDimitry Andric
9910b57cec5SDimitry Andric// ASIMD arith, basic
9920b57cec5SDimitry Andricdef : InstRW<[A57Write_3cyc_1V], (instregex "VADDv", "VADDL", "VADDW",
9930b57cec5SDimitry Andric  "VNEG(s8d|s16d|s32d|s8q|s16q|s32q|d|q)",
9940b57cec5SDimitry Andric  "VPADDi", "VPADDL", "VSUBv", "VSUBL", "VSUBW")>;
9950b57cec5SDimitry Andric
9960b57cec5SDimitry Andric// ASIMD arith, complex
9970b57cec5SDimitry Andricdef : InstRW<[A57Write_3cyc_1V], (instregex "VABS", "VADDHN", "VHADD", "VHSUB",
9980b57cec5SDimitry Andric  "VQABS", "VQADD", "VQNEG", "VQSUB",
9990b57cec5SDimitry Andric  "VRADDHN", "VRHADD", "VRSUBHN", "VSUBHN")>;
10000b57cec5SDimitry Andric
10010b57cec5SDimitry Andric// ASIMD compare
10020b57cec5SDimitry Andricdef : InstRW<[A57Write_3cyc_1V],
10030b57cec5SDimitry Andric  (instregex "VCEQ", "VCGE", "VCGT", "VCLE", "VTST", "VCLT")>;
10040b57cec5SDimitry Andric
10050b57cec5SDimitry Andric// ASIMD logical
10060b57cec5SDimitry Andricdef : InstRW<[A57Write_3cyc_1V],
10070b57cec5SDimitry Andric  (instregex "VAND", "VBIC", "VMVN", "VORR", "VORN", "VEOR")>;
10080b57cec5SDimitry Andric
10090b57cec5SDimitry Andric// ASIMD max/min
10100b57cec5SDimitry Andricdef : InstRW<[A57Write_3cyc_1V],
10110b57cec5SDimitry Andric  (instregex "(VMAX|VMIN)(s|u)", "(VPMAX|VPMIN)(s8|s16|s32|u8|u16|u32)")>;
10120b57cec5SDimitry Andric
10130b57cec5SDimitry Andric// ASIMD multiply, D-form: 5cyc F0 for r0px, 4cyc F0 for r1p0 and later
10140b57cec5SDimitry Andric// Cortex-A57 r1p0 and later reduce the latency of ASIMD multiply
10150b57cec5SDimitry Andric// and multiply-with-accumulate instructions relative to r0pX.
10160b57cec5SDimitry Andricdef A57WriteVMULD_VecInt : SchedWriteVariant<[
10170b57cec5SDimitry Andric  SchedVar<IsR1P0AndLaterPred, [A57Write_4cyc_1W]>,
10180b57cec5SDimitry Andric  SchedVar<NoSchedPred,        [A57Write_5cyc_1W]>]>;
10190b57cec5SDimitry Andricdef : InstRW<[A57WriteVMULD_VecInt], (instregex
10200b57cec5SDimitry Andric  "VMUL(v8i8|v4i16|v2i32|pd)", "VMULsl(v4i16|v2i32)",
10210b57cec5SDimitry Andric  "VQDMULH(sl)?(v4i16|v2i32)", "VQRDMULH(sl)?(v4i16|v2i32)")>;
10220b57cec5SDimitry Andric
10230b57cec5SDimitry Andric// ASIMD multiply, Q-form: 6cyc F0 for r0px, 5cyc F0 for r1p0 and later
10240b57cec5SDimitry Andricdef A57WriteVMULQ_VecInt : SchedWriteVariant<[
10250b57cec5SDimitry Andric  SchedVar<IsR1P0AndLaterPred, [A57Write_5cyc_1W]>,
10260b57cec5SDimitry Andric  SchedVar<NoSchedPred,        [A57Write_6cyc_1W]>]>;
10270b57cec5SDimitry Andricdef : InstRW<[A57WriteVMULQ_VecInt], (instregex
10280b57cec5SDimitry Andric  "VMUL(v16i8|v8i16|v4i32|pq)", "VMULsl(v8i16|v4i32)",
10290b57cec5SDimitry Andric  "VQDMULH(sl)?(v8i16|v4i32)", "VQRDMULH(sl)?(v8i16|v4i32)")>;
10300b57cec5SDimitry Andric
10310b57cec5SDimitry Andric// ASIMD multiply accumulate, D-form
10320b57cec5SDimitry Andric// 5cyc F0 for r0px, 4cyc F0 for r1p0 and later, 1cyc for accumulate sequence
10330b57cec5SDimitry Andric// (4 or 3 ReadAdvance)
10340b57cec5SDimitry Andricdef A57WriteVMLAD_VecInt : SchedWriteVariant<[
10350b57cec5SDimitry Andric  SchedVar<IsR1P0AndLaterPred, [A57Write_4cyc_1W]>,
10360b57cec5SDimitry Andric  SchedVar<NoSchedPred,        [A57Write_5cyc_1W]>]>;
10370b57cec5SDimitry Andricdef A57ReadVMLAD_VecInt : SchedReadVariant<[
10380b57cec5SDimitry Andric  SchedVar<IsR1P0AndLaterPred, [SchedReadAdvance<3, [A57WriteVMLAD_VecInt]>]>,
10390b57cec5SDimitry Andric  SchedVar<NoSchedPred,        [SchedReadAdvance<4, [A57WriteVMLAD_VecInt]>]>
10400b57cec5SDimitry Andric]>;
10410b57cec5SDimitry Andricdef : InstRW<[A57WriteVMLAD_VecInt, A57ReadVMLAD_VecInt],
10420b57cec5SDimitry Andric  (instregex "VMLA(sl)?(v8i8|v4i16|v2i32)", "VMLS(sl)?(v8i8|v4i16|v2i32)")>;
10430b57cec5SDimitry Andric
10440b57cec5SDimitry Andric// ASIMD multiply accumulate, Q-form
10450b57cec5SDimitry Andric// 6cyc F0 for r0px, 5cyc F0 for r1p0 and later, 2cyc for accumulate sequence
10460b57cec5SDimitry Andric// (4 or 3 ReadAdvance)
10470b57cec5SDimitry Andricdef A57WriteVMLAQ_VecInt : SchedWriteVariant<[
10480b57cec5SDimitry Andric  SchedVar<IsR1P0AndLaterPred, [A57Write_5cyc_1W]>,
10490b57cec5SDimitry Andric  SchedVar<NoSchedPred,        [A57Write_6cyc_1W]>]>;
10500b57cec5SDimitry Andricdef A57ReadVMLAQ_VecInt : SchedReadVariant<[
10510b57cec5SDimitry Andric  SchedVar<IsR1P0AndLaterPred, [SchedReadAdvance<3, [A57WriteVMLAQ_VecInt]>]>,
10520b57cec5SDimitry Andric  SchedVar<NoSchedPred,        [SchedReadAdvance<4, [A57WriteVMLAQ_VecInt]>]>
10530b57cec5SDimitry Andric]>;
10540b57cec5SDimitry Andricdef : InstRW<[A57WriteVMLAQ_VecInt, A57ReadVMLAQ_VecInt],
10550b57cec5SDimitry Andric  (instregex "VMLA(sl)?(v16i8|v8i16|v4i32)", "VMLS(sl)?(v16i8|v8i16|v4i32)")>;
10560b57cec5SDimitry Andric
10570b57cec5SDimitry Andric// ASIMD multiply accumulate long
10580b57cec5SDimitry Andric// 5cyc F0 for r0px, 4cyc F0 for r1p0 and later, 1cyc for accumulate sequence
10590b57cec5SDimitry Andric// (4 or 3 ReadAdvance)
10600b57cec5SDimitry Andricdef A57WriteVMLAL_VecInt : SchedWriteVariant<[
10610b57cec5SDimitry Andric  SchedVar<IsR1P0AndLaterPred, [A57Write_4cyc_1W]>,
10620b57cec5SDimitry Andric  SchedVar<NoSchedPred,        [A57Write_5cyc_1W]>]>;
10630b57cec5SDimitry Andricdef A57ReadVMLAL_VecInt : SchedReadVariant<[
10640b57cec5SDimitry Andric  SchedVar<IsR1P0AndLaterPred, [SchedReadAdvance<3, [A57WriteVMLAL_VecInt]>]>,
10650b57cec5SDimitry Andric  SchedVar<NoSchedPred,        [SchedReadAdvance<4, [A57WriteVMLAL_VecInt]>]>
10660b57cec5SDimitry Andric]>;
10670b57cec5SDimitry Andricdef : InstRW<[A57WriteVMLAL_VecInt, A57ReadVMLAL_VecInt],
10680b57cec5SDimitry Andric  (instregex "VMLAL(s|u)", "VMLSL(s|u)")>;
10690b57cec5SDimitry Andric
10700b57cec5SDimitry Andric// ASIMD multiply accumulate saturating long
10710b57cec5SDimitry Andric// 5cyc F0 for r0px, 4cyc F0 for r1p0 and later, 2cyc for accumulate sequence
10720b57cec5SDimitry Andric// (3 or 2 ReadAdvance)
10730b57cec5SDimitry Andricdef A57WriteVQDMLAL_VecInt : SchedWriteVariant<[
10740b57cec5SDimitry Andric  SchedVar<IsR1P0AndLaterPred, [A57Write_4cyc_1W]>,
10750b57cec5SDimitry Andric  SchedVar<NoSchedPred,        [A57Write_5cyc_1W]>]>;
10760b57cec5SDimitry Andricdef A57ReadVQDMLAL_VecInt : SchedReadVariant<[
10770b57cec5SDimitry Andric  SchedVar<IsR1P0AndLaterPred, [SchedReadAdvance<2, [A57WriteVQDMLAL_VecInt]>]>,
10780b57cec5SDimitry Andric  SchedVar<NoSchedPred,        [SchedReadAdvance<3, [A57WriteVQDMLAL_VecInt]>]>
10790b57cec5SDimitry Andric]>;
10800b57cec5SDimitry Andricdef : InstRW<[A57WriteVQDMLAL_VecInt, A57ReadVQDMLAL_VecInt],
10810b57cec5SDimitry Andric  (instregex "VQDMLAL", "VQDMLSL")>;
10820b57cec5SDimitry Andric
10830b57cec5SDimitry Andric// Vector Saturating Rounding Doubling Multiply Accumulate/Subtract Long
10840b57cec5SDimitry Andric// Scheduling info from VQDMLAL/VQDMLSL
10850b57cec5SDimitry Andricdef : InstRW<[A57WriteVQDMLAL_VecInt, A57ReadVQDMLAL_VecInt],
10860b57cec5SDimitry Andric  (instregex "VQRDMLAH", "VQRDMLSH")>;
10870b57cec5SDimitry Andric
10880b57cec5SDimitry Andric// ASIMD multiply long
10890b57cec5SDimitry Andric// 5cyc F0 for r0px, 4cyc F0 for r1p0 and later
10900b57cec5SDimitry Andricdef A57WriteVMULL_VecInt : SchedWriteVariant<[
10910b57cec5SDimitry Andric  SchedVar<IsR1P0AndLaterPred, [A57Write_4cyc_1W]>,
10920b57cec5SDimitry Andric  SchedVar<NoSchedPred,        [A57Write_5cyc_1W]>]>;
10930b57cec5SDimitry Andricdef : InstRW<[A57WriteVMULL_VecInt],
10940b57cec5SDimitry Andric  (instregex "VMULL(s|u|p8|sls|slu)", "VQDMULL")>;
10950b57cec5SDimitry Andric
10960b57cec5SDimitry Andric// ASIMD pairwise add and accumulate
10970b57cec5SDimitry Andric// 4cyc F1, 1cyc for accumulate sequence (3cyc ReadAdvance)
10980b57cec5SDimitry Andricdef A57WriteVPADAL : SchedWriteRes<[A57UnitX]> { let Latency = 4; }
10990b57cec5SDimitry Andricdef A57ReadVPADAL  : SchedReadAdvance<3, [A57WriteVPADAL]>;
11000b57cec5SDimitry Andricdef : InstRW<[A57WriteVPADAL, A57ReadVPADAL], (instregex "VPADAL(s|u)")>;
11010b57cec5SDimitry Andric
11020b57cec5SDimitry Andric// ASIMD shift accumulate
11030b57cec5SDimitry Andric// 4cyc F1, 1cyc for accumulate sequence (3cyc ReadAdvance)
11040b57cec5SDimitry Andricdef A57WriteVSRA : SchedWriteRes<[A57UnitX]> { let Latency = 4;  }
11050b57cec5SDimitry Andricdef A57ReadVSRA  : SchedReadAdvance<3, [A57WriteVSRA]>;
11060b57cec5SDimitry Andricdef : InstRW<[A57WriteVSRA, A57ReadVSRA], (instregex "VSRA", "VRSRA")>;
11070b57cec5SDimitry Andric
11080b57cec5SDimitry Andric// ASIMD shift by immed, basic
11090b57cec5SDimitry Andricdef : InstRW<[A57Write_3cyc_1X],
11100b57cec5SDimitry Andric  (instregex "VMOVL", "VSHLi", "VSHLL", "VSHR(s|u)", "VSHRN")>;
11110b57cec5SDimitry Andric
11120b57cec5SDimitry Andric// ASIMD shift by immed, complex
11130b57cec5SDimitry Andricdef : InstRW<[A57Write_4cyc_1X], (instregex
11140b57cec5SDimitry Andric  "VQRSHRN", "VQRSHRUN", "VQSHL(si|ui|su)", "VQSHRN", "VQSHRUN", "VRSHR(s|u)",
11150b57cec5SDimitry Andric  "VRSHRN")>;
11160b57cec5SDimitry Andric
11170b57cec5SDimitry Andric// ASIMD shift by immed and insert, basic, D-form
11180b57cec5SDimitry Andricdef : InstRW<[A57Write_4cyc_1X], (instregex
11190b57cec5SDimitry Andric  "VSLI(v8i8|v4i16|v2i32|v1i64)", "VSRI(v8i8|v4i16|v2i32|v1i64)")>;
11200b57cec5SDimitry Andric
11210b57cec5SDimitry Andric// ASIMD shift by immed and insert, basic, Q-form
11220b57cec5SDimitry Andricdef : InstRW<[A57Write_5cyc_1X], (instregex
11230b57cec5SDimitry Andric  "VSLI(v16i8|v8i16|v4i32|v2i64)", "VSRI(v16i8|v8i16|v4i32|v2i64)")>;
11240b57cec5SDimitry Andric
11250b57cec5SDimitry Andric// ASIMD shift by register, basic, D-form
11260b57cec5SDimitry Andricdef : InstRW<[A57Write_3cyc_1X], (instregex
11270b57cec5SDimitry Andric  "VSHL(s|u)(v8i8|v4i16|v2i32|v1i64)")>;
11280b57cec5SDimitry Andric
11290b57cec5SDimitry Andric// ASIMD shift by register, basic, Q-form
11300b57cec5SDimitry Andricdef : InstRW<[A57Write_4cyc_1X], (instregex
11310b57cec5SDimitry Andric  "VSHL(s|u)(v16i8|v8i16|v4i32|v2i64)")>;
11320b57cec5SDimitry Andric
11330b57cec5SDimitry Andric// ASIMD shift by register, complex, D-form
11340b57cec5SDimitry Andric// VQRSHL, VQSHL, VRSHL
11350b57cec5SDimitry Andricdef : InstRW<[A57Write_4cyc_1X], (instregex
11360b57cec5SDimitry Andric  "VQRSHL(s|u)(v8i8|v4i16|v2i32|v1i64)", "VQSHL(s|u)(v8i8|v4i16|v2i32|v1i64)",
11370b57cec5SDimitry Andric  "VRSHL(s|u)(v8i8|v4i16|v2i32|v1i64)")>;
11380b57cec5SDimitry Andric
11390b57cec5SDimitry Andric// ASIMD shift by register, complex, Q-form
11400b57cec5SDimitry Andricdef : InstRW<[A57Write_5cyc_1X], (instregex
11410b57cec5SDimitry Andric  "VQRSHL(s|u)(v16i8|v8i16|v4i32|v2i64)", "VQSHL(s|u)(v16i8|v8i16|v4i32|v2i64)",
11420b57cec5SDimitry Andric  "VRSHL(s|u)(v16i8|v8i16|v4i32|v2i64)")>;
11430b57cec5SDimitry Andric
11440b57cec5SDimitry Andric// --- 3.15 ASIMD Floating-Point Instructions ---
11450b57cec5SDimitry Andric// ASIMD FP absolute value
11460b57cec5SDimitry Andricdef : InstRW<[A57Write_3cyc_1V], (instregex "VABS(fd|fq|hd|hq)")>;
11470b57cec5SDimitry Andric
11480b57cec5SDimitry Andric// ASIMD FP arith
11490b57cec5SDimitry Andricdef : InstRW<[A57Write_5cyc_1V], (instregex "VABD(fd|fq|hd|hq)",
11500b57cec5SDimitry Andric  "VADD(fd|fq|hd|hq)", "VPADD(f|h)", "VSUB(fd|fq|hd|hq)")>;
11510b57cec5SDimitry Andric
11520b57cec5SDimitry Andricdef : InstRW<[A57Write_5cyc_1V], (instregex "VCADD", "VCMLA")>;
11530b57cec5SDimitry Andric
11540b57cec5SDimitry Andric// ASIMD FP compare
11550b57cec5SDimitry Andricdef : InstRW<[A57Write_5cyc_1V], (instregex "VAC(GE|GT|LE|LT)",
11560b57cec5SDimitry Andric  "VC(EQ|GE|GT|LE)(fd|fq|hd|hq)")>;
11570b57cec5SDimitry Andric
11580b57cec5SDimitry Andric// ASIMD FP convert, integer
11590b57cec5SDimitry Andricdef : InstRW<[A57Write_5cyc_1V], (instregex
11600b57cec5SDimitry Andric  "VCVT(f2sd|f2ud|s2fd|u2fd|f2sq|f2uq|s2fq|u2fq|f2xsd|f2xud|xs2fd|xu2fd)",
11610b57cec5SDimitry Andric  "VCVT(f2xsq|f2xuq|xs2fq|xu2fq)",
11620b57cec5SDimitry Andric  "VCVT(AN|MN|NN|PN)(SDf|SQf|UDf|UQf|SDh|SQh|UDh|UQh)")>;
11630b57cec5SDimitry Andric
11640b57cec5SDimitry Andric// ASIMD FP convert, half-precision: 8cyc F0/F1
11650b57cec5SDimitry Andricdef : InstRW<[A57Write_8cyc_1V], (instregex
11660b57cec5SDimitry Andric  "VCVT(h2sd|h2ud|s2hd|u2hd|h2sq|h2uq|s2hq|u2hq|h2xsd|h2xud|xs2hd|xu2hd)",
11670b57cec5SDimitry Andric  "VCVT(h2xsq|h2xuq|xs2hq|xu2hq)",
11680b57cec5SDimitry Andric  "VCVT(f2h|h2f)")>;
11690b57cec5SDimitry Andric
11700b57cec5SDimitry Andric// ASIMD FP max/min
11710b57cec5SDimitry Andricdef : InstRW<[A57Write_5cyc_1V], (instregex
11720b57cec5SDimitry Andric  "(VMAX|VMIN)(fd|fq|hd|hq)", "(VPMAX|VPMIN)(f|h)", "(NEON|VFP)_VMAXNM",
11730b57cec5SDimitry Andric  "(NEON|VFP)_VMINNM")>;
11740b57cec5SDimitry Andric
11750b57cec5SDimitry Andric// ASIMD FP multiply
11760b57cec5SDimitry Andricdef A57WriteVMUL_VecFP  : SchedWriteRes<[A57UnitV]> { let Latency = 5;  }
11770b57cec5SDimitry Andricdef : InstRW<[A57WriteVMUL_VecFP], (instregex "VMUL(sl)?(fd|fq|hd|hq)")>;
11780b57cec5SDimitry Andric
11790b57cec5SDimitry Andric// ASIMD FP multiply accumulate: 9cyc F0/F1, 4cyc for accumulate sequence
11800b57cec5SDimitry Andricdef A57WriteVMLA_VecFP  : SchedWriteRes<[A57UnitV]> { let Latency = 9;  }
11810b57cec5SDimitry Andricdef A57ReadVMLA_VecFP  :
11820b57cec5SDimitry Andric  SchedReadAdvance<5, [A57WriteVMLA_VecFP, A57WriteVMUL_VecFP]>;
11830b57cec5SDimitry Andricdef : InstRW<[A57WriteVMLA_VecFP, A57ReadVMLA_VecFP],
11840b57cec5SDimitry Andric  (instregex "(VMLA|VMLS)(sl)?(fd|fq|hd|hq)", "(VFMA|VFMS)(fd|fq|hd|hq)")>;
11850b57cec5SDimitry Andric
11860b57cec5SDimitry Andric// ASIMD FP negate
11870b57cec5SDimitry Andricdef : InstRW<[A57Write_3cyc_1V], (instregex "VNEG(fd|f32q|hd|hq)")>;
11880b57cec5SDimitry Andric
11890b57cec5SDimitry Andric// ASIMD FP round to integral
11900b57cec5SDimitry Andricdef : InstRW<[A57Write_5cyc_1V], (instregex
11910b57cec5SDimitry Andric  "VRINT(AN|MN|NN|PN|XN|ZN)(Df|Qf|Dh|Qh)")>;
11920b57cec5SDimitry Andric
11930b57cec5SDimitry Andric// --- 3.16 ASIMD Miscellaneous Instructions ---
11940b57cec5SDimitry Andric
11950b57cec5SDimitry Andric// ASIMD bitwise insert
1196e8d8bef9SDimitry Andricdef : InstRW<[A57Write_3cyc_1V], (instregex "VBIF", "VBIT", "VBSL", "VBSP")>;
11970b57cec5SDimitry Andric
11980b57cec5SDimitry Andric// ASIMD count
11990b57cec5SDimitry Andricdef : InstRW<[A57Write_3cyc_1V], (instregex "VCLS", "VCLZ", "VCNT")>;
12000b57cec5SDimitry Andric
12010b57cec5SDimitry Andric// ASIMD duplicate, core reg: 8cyc "L, F0/F1"
12020b57cec5SDimitry Andricdef : InstRW<[A57Write_8cyc_1L_1V], (instregex "VDUP(8|16|32)(d|q)")>;
12030b57cec5SDimitry Andric
12040b57cec5SDimitry Andric// ASIMD duplicate, scalar: 3cyc "F0/F1"
12050b57cec5SDimitry Andricdef : InstRW<[A57Write_3cyc_1V], (instregex "VDUPLN(8|16|32)(d|q)")>;
12060b57cec5SDimitry Andric
12070b57cec5SDimitry Andric// ASIMD extract
12080b57cec5SDimitry Andricdef : InstRW<[A57Write_3cyc_1V], (instregex "VEXT(d|q)(8|16|32|64)")>;
12090b57cec5SDimitry Andric
12100b57cec5SDimitry Andric// ASIMD move, immed
12110b57cec5SDimitry Andricdef : InstRW<[A57Write_3cyc_1V], (instregex
12120b57cec5SDimitry Andric  "VMOV(v8i8|v16i8|v4i16|v8i16|v2i32|v4i32|v1i64|v2i64|v2f32|v4f32)",
12130b57cec5SDimitry Andric  "VMOVD0", "VMOVQ0")>;
12140b57cec5SDimitry Andric
12150b57cec5SDimitry Andric// ASIMD move, narrowing
12160b57cec5SDimitry Andricdef : InstRW<[A57Write_3cyc_1V], (instregex "VMOVN")>;
12170b57cec5SDimitry Andric
12180b57cec5SDimitry Andric// ASIMD move, saturating
12190b57cec5SDimitry Andricdef : InstRW<[A57Write_4cyc_1X], (instregex "VQMOVN")>;
12200b57cec5SDimitry Andric
12210b57cec5SDimitry Andric// ASIMD reciprocal estimate
12220b57cec5SDimitry Andricdef : InstRW<[A57Write_5cyc_1V], (instregex "VRECPE", "VRSQRTE")>;
12230b57cec5SDimitry Andric
12240b57cec5SDimitry Andric// ASIMD reciprocal step, FZ
12250b57cec5SDimitry Andricdef : InstRW<[A57Write_9cyc_1V], (instregex "VRECPS", "VRSQRTS")>;
12260b57cec5SDimitry Andric
12270b57cec5SDimitry Andric// ASIMD reverse, swap, table lookup (1-2 reg)
12280b57cec5SDimitry Andricdef : InstRW<[A57Write_3cyc_1V], (instregex "VREV", "VSWP", "VTB(L|X)(1|2)")>;
12290b57cec5SDimitry Andric
12300b57cec5SDimitry Andric// ASIMD table lookup (3-4 reg)
12310b57cec5SDimitry Andricdef : InstRW<[A57Write_6cyc_1V], (instregex "VTBL(3|4)", "VTBX(3|4)")>;
12320b57cec5SDimitry Andric
12330b57cec5SDimitry Andric// ASIMD transfer, scalar to core reg: 6cyc "L, I0/I1"
12340b57cec5SDimitry Andricdef : InstRW<[A57Write_6cyc_1L_1I], (instregex "VGETLN")>;
12350b57cec5SDimitry Andric
12360b57cec5SDimitry Andric// ASIMD transfer, core reg to scalar: 8cyc "L, F0/F1"
12370b57cec5SDimitry Andricdef : InstRW<[A57Write_8cyc_1L_1V], (instregex "VSETLN")>;
12380b57cec5SDimitry Andric
12390b57cec5SDimitry Andric// ASIMD transpose
12400b57cec5SDimitry Andricdef : InstRW<[A57Write_3cyc_1V, A57Write_3cyc_1V], (instregex "VTRN")>;
12410b57cec5SDimitry Andric
12420b57cec5SDimitry Andric// ASIMD unzip/zip, D-form
12430b57cec5SDimitry Andricdef : InstRW<[A57Write_3cyc_1V, A57Write_3cyc_1V],
12440b57cec5SDimitry Andric  (instregex "VUZPd", "VZIPd")>;
12450b57cec5SDimitry Andric
12460b57cec5SDimitry Andric// ASIMD unzip/zip, Q-form
12470b57cec5SDimitry Andricdef : InstRW<[A57Write_6cyc_1V, A57Write_6cyc_1V],
12480b57cec5SDimitry Andric  (instregex "VUZPq", "VZIPq")>;
12490b57cec5SDimitry Andric
12500b57cec5SDimitry Andric// --- 3.17 ASIMD Load Instructions ---
12510b57cec5SDimitry Andric
12520b57cec5SDimitry Andric// Overriden via InstRW for this processor.
12530b57cec5SDimitry Andricdef : WriteRes<WriteVLD1, []>;
12540b57cec5SDimitry Andricdef : WriteRes<WriteVLD2, []>;
12550b57cec5SDimitry Andricdef : WriteRes<WriteVLD3, []>;
12560b57cec5SDimitry Andricdef : WriteRes<WriteVLD4, []>;
12570b57cec5SDimitry Andricdef : WriteRes<WriteVST1, []>;
12580b57cec5SDimitry Andricdef : WriteRes<WriteVST2, []>;
12590b57cec5SDimitry Andricdef : WriteRes<WriteVST3, []>;
12600b57cec5SDimitry Andricdef : WriteRes<WriteVST4, []>;
12610b57cec5SDimitry Andric
12620b57cec5SDimitry Andric// 1-2 reg: 5cyc L, +I for writeback, 1 cyc wb latency
12630b57cec5SDimitry Andricdef : InstRW<[A57Write_5cyc_1L], (instregex "VLD1(d|q)(8|16|32|64)$")>;
12640b57cec5SDimitry Andricdef : InstRW<[A57Write_5cyc_1L_1I, A57WrBackOne],
12650b57cec5SDimitry Andric  (instregex "VLD1(d|q)(8|16|32|64)wb")>;
12660b57cec5SDimitry Andric
12670b57cec5SDimitry Andric// 3-4 reg: 6cyc L, +I for writeback, 1 cyc wb latency
12680b57cec5SDimitry Andricdef : InstRW<[A57Write_6cyc_1L],
12690b57cec5SDimitry Andric  (instregex "VLD1(d|q)(8|16|32|64)(T|Q)$", "VLD1d64(T|Q)Pseudo")>;
12700b57cec5SDimitry Andric
12710b57cec5SDimitry Andricdef : InstRW<[A57Write_6cyc_1L_1I, A57WrBackOne],
12720b57cec5SDimitry Andric  (instregex "VLD1(d|q)(8|16|32|64)(T|Q)wb")>;
12730b57cec5SDimitry Andric
12740b57cec5SDimitry Andric// ASIMD load, 1 element, one lane and all lanes: 8cyc "L, F0/F1"
12750b57cec5SDimitry Andricdef : InstRW<[A57Write_8cyc_1L_1V], (instregex
12760b57cec5SDimitry Andric  "VLD1(LN|DUP)(d|q)(8|16|32)$", "VLD1(LN|DUP)(d|q)(8|16|32)Pseudo$")>;
12770b57cec5SDimitry Andricdef : InstRW<[A57Write_8cyc_1L_1V_1I, A57WrBackOne], (instregex
12780b57cec5SDimitry Andric  "VLD1(LN|DUP)(d|q)(8|16|32)(wb|_UPD)", "VLD1LNq(8|16|32)Pseudo_UPD")>;
12790b57cec5SDimitry Andric
12800b57cec5SDimitry Andric// ASIMD load, 2 element, multiple, 2 reg: 8cyc "L, F0/F1"
12810b57cec5SDimitry Andricdef : InstRW<[A57Write_8cyc_1L_1V],
12820b57cec5SDimitry Andric      (instregex "VLD2(d|q)(8|16|32)$", "VLD2q(8|16|32)Pseudo$")>;
12830b57cec5SDimitry Andricdef : InstRW<[A57Write_8cyc_1L_1V_1I, A57WrBackOne],
12840b57cec5SDimitry Andric      (instregex "VLD2(d|q)(8|16|32)wb", "VLD2q(8|16|32)PseudoWB")>;
12850b57cec5SDimitry Andric
12860b57cec5SDimitry Andric// ASIMD load, 2 element, multiple, 4 reg: 9cyc "L, F0/F1"
12870b57cec5SDimitry Andricdef : InstRW<[A57Write_9cyc_1L_1V], (instregex "VLD2b(8|16|32)$")>;
12880b57cec5SDimitry Andricdef : InstRW<[A57Write_9cyc_1L_1V_1I, A57WrBackOne],
12890b57cec5SDimitry Andric      (instregex "VLD2b(8|16|32)wb")>;
12900b57cec5SDimitry Andric
12910b57cec5SDimitry Andric// ASIMD load, 2 element, one lane and all lanes: 8cyc "L, F0/F1"
12920b57cec5SDimitry Andricdef : InstRW<[A57Write_8cyc_1L_1V, A57Write_8cyc_1L_1V],
12930b57cec5SDimitry Andric      (instregex "VLD2(DUP|LN)(d|q)(8|16|32|8x2|16x2|32x2)$",
12940b57cec5SDimitry Andric                 "VLD2LN(d|q)(8|16|32)Pseudo$")>;
12950b57cec5SDimitry Andric// 2 results + wb result
12960b57cec5SDimitry Andricdef : InstRW<[A57Write_8cyc_1L_1V_1I, A57Write_8cyc_1L_1V, A57WrBackOne],
12970b57cec5SDimitry Andric      (instregex "VLD2LN(d|q)(8|16|32)_UPD$")>;
12980b57cec5SDimitry Andric// 1 result + wb result
12990b57cec5SDimitry Andricdef : InstRW<[A57Write_8cyc_1L_1V_1I, A57WrBackOne],
13000b57cec5SDimitry Andric      (instregex "VLD2DUPd(8|16|32|8x2|16x2|32x2)wb",
13010b57cec5SDimitry Andric                 "VLD2LN(d|q)(8|16|32)Pseudo_UPD")>;
13020b57cec5SDimitry Andric
13030b57cec5SDimitry Andric// ASIMD load, 3 element, multiple, 3 reg: 9cyc "L, F0/F1"
13040b57cec5SDimitry Andric// 3 results
13050b57cec5SDimitry Andricdef : InstRW<[A57Write_9cyc_1L_1V, A57Write_9cyc_1L_1V, A57Write_9cyc_1L_1V],
13060b57cec5SDimitry Andric      (instregex "VLD3(d|q)(8|16|32)$")>;
13070b57cec5SDimitry Andric// 1 result
13080b57cec5SDimitry Andricdef : InstRW<[A57Write_9cyc_1L_1V],
13090b57cec5SDimitry Andric      (instregex "VLD3(d|q)(8|16|32)(oddP|P)seudo$")>;
13100b57cec5SDimitry Andric// 3 results + wb
13110b57cec5SDimitry Andricdef : InstRW<[A57Write_9cyc_1L_1V_1I, A57Write_9cyc_1L_1V_1I,
13120b57cec5SDimitry Andric              A57Write_9cyc_1L_1V_1I, A57WrBackOne],
13130b57cec5SDimitry Andric      (instregex "VLD3(d|q)(8|16|32)_UPD$")>;
13140b57cec5SDimitry Andric// 1 result + wb
13150b57cec5SDimitry Andricdef : InstRW<[A57Write_9cyc_1L_1V_1I, A57WrBackOne],
13160b57cec5SDimitry Andric      (instregex "VLD3(d|q)(8|16|32)(oddP|P)seudo_UPD")>;
13170b57cec5SDimitry Andric
13180b57cec5SDimitry Andric// ASIMD load, 3 element, one lane, size 32: 8cyc "L, F0/F1"
13190b57cec5SDimitry Andricdef : InstRW<[A57Write_8cyc_1L_1V, A57Write_8cyc_1L_1V, A57Write_8cyc_1L_1V],
13200b57cec5SDimitry Andric      (instregex "VLD3LN(d|q)32$",
13210b57cec5SDimitry Andric                 "VLD3LN(d|q)32Pseudo$")>;
13220b57cec5SDimitry Andricdef : InstRW<[A57Write_8cyc_1L_1V_1I, A57Write_8cyc_1L_1V_1I,
13230b57cec5SDimitry Andric              A57Write_8cyc_1L_1V_1I, A57WrBackOne],
13240b57cec5SDimitry Andric      (instregex "VLD3LN(d|q)32_UPD")>;
13250b57cec5SDimitry Andricdef : InstRW<[A57Write_8cyc_1L_1V_1I, A57WrBackOne],
13260b57cec5SDimitry Andric      (instregex "VLD3LN(d|q)32Pseudo_UPD")>;
13270b57cec5SDimitry Andric
13280b57cec5SDimitry Andric// ASIMD load, 3 element, one lane, size 8/16: 9cyc "L, F0/F1"
13290b57cec5SDimitry Andricdef : InstRW<[A57Write_9cyc_1L_1V, A57Write_9cyc_1L_1V, A57Write_9cyc_1L_1V],
13300b57cec5SDimitry Andric      (instregex "VLD3LN(d|q)(8|16)$",
13310b57cec5SDimitry Andric                 "VLD3LN(d|q)(8|16)Pseudo$")>;
13320b57cec5SDimitry Andricdef : InstRW<[A57Write_9cyc_1L_1V_1I, A57Write_9cyc_1L_1V_1I,
13330b57cec5SDimitry Andric              A57Write_9cyc_1L_1V_1I, A57WrBackOne],
13340b57cec5SDimitry Andric      (instregex "VLD3LN(d|q)(8|16)_UPD")>;
13350b57cec5SDimitry Andricdef : InstRW<[A57Write_9cyc_1L_1V_1I, A57WrBackOne],
13360b57cec5SDimitry Andric      (instregex "VLD3LN(d|q)(8|16)Pseudo_UPD")>;
13370b57cec5SDimitry Andric
13380b57cec5SDimitry Andric// ASIMD load, 3 element, all lanes: 8cyc "L, F0/F1"
13390b57cec5SDimitry Andricdef : InstRW<[A57Write_8cyc_1L_1V, A57Write_8cyc_1L_1V, A57Write_8cyc_1L_1V],
13400b57cec5SDimitry Andric      (instregex "VLD3DUP(d|q)(8|16|32)$",
13410b57cec5SDimitry Andric                 "VLD3DUP(d|q)(8|16|32)Pseudo$")>;
13420b57cec5SDimitry Andricdef : InstRW<[A57Write_8cyc_1L_1V_1I, A57Write_8cyc_1L_1V_1I,
13430b57cec5SDimitry Andric              A57Write_8cyc_1L_1V_1I, A57WrBackOne],
13440b57cec5SDimitry Andric      (instregex "VLD3DUP(d|q)(8|16|32)_UPD")>;
13450b57cec5SDimitry Andricdef : InstRW<[A57Write_8cyc_1L_1V_1I, A57WrBackOne],
13460b57cec5SDimitry Andric      (instregex "VLD3DUP(d|q)(8|16|32)Pseudo_UPD")>;
13470b57cec5SDimitry Andric
13480b57cec5SDimitry Andric// ASIMD load, 4 element, multiple, 4 reg: 9cyc "L, F0/F1"
13490b57cec5SDimitry Andricdef : InstRW<[A57Write_9cyc_1L_1V, A57Write_9cyc_1L_1V, A57Write_9cyc_1L_1V,
13500b57cec5SDimitry Andric              A57Write_9cyc_1L_1V],
13510b57cec5SDimitry Andric      (instregex "VLD4(d|q)(8|16|32)$")>;
13520b57cec5SDimitry Andricdef : InstRW<[A57Write_9cyc_1L_1V],
13530b57cec5SDimitry Andric      (instregex "VLD4(d|q)(8|16|32)(oddP|P)seudo$")>;
13540b57cec5SDimitry Andricdef : InstRW<[A57Write_9cyc_1L_1V_1I, A57Write_9cyc_1L_1V_1I,
13550b57cec5SDimitry Andric              A57Write_9cyc_1L_1V_1I, A57Write_9cyc_1L_1V_1I, A57WrBackOne],
13560b57cec5SDimitry Andric      (instregex "VLD4(d|q)(8|16|32)_UPD")>;
13570b57cec5SDimitry Andricdef : InstRW<[A57Write_9cyc_1L_1V_1I, A57WrBackOne],
13580b57cec5SDimitry Andric      (instregex  "VLD4(d|q)(8|16|32)(oddP|P)seudo_UPD")>;
13590b57cec5SDimitry Andric
13600b57cec5SDimitry Andric// ASIMD load, 4 element, one lane, size 32: 8cyc "L, F0/F1"
13610b57cec5SDimitry Andricdef : InstRW<[A57Write_8cyc_1L_1V, A57Write_8cyc_1L_1V, A57Write_8cyc_1L_1V,
13620b57cec5SDimitry Andric              A57Write_8cyc_1L_1V],
13630b57cec5SDimitry Andric      (instregex "VLD4LN(d|q)32$",
13640b57cec5SDimitry Andric                 "VLD4LN(d|q)32Pseudo$")>;
13650b57cec5SDimitry Andricdef : InstRW<[A57Write_8cyc_1L_1V_1I, A57Write_8cyc_1L_1V_1I,
13660b57cec5SDimitry Andric              A57Write_8cyc_1L_1V_1I, A57Write_8cyc_1L_1V_1I,
13670b57cec5SDimitry Andric              A57WrBackOne],
13680b57cec5SDimitry Andric      (instregex "VLD4LN(d|q)32_UPD")>;
13690b57cec5SDimitry Andricdef : InstRW<[A57Write_8cyc_1L_1V_1I, A57WrBackOne],
13700b57cec5SDimitry Andric      (instregex "VLD4LN(d|q)32Pseudo_UPD")>;
13710b57cec5SDimitry Andric
13720b57cec5SDimitry Andric// ASIMD load, 4 element, one lane, size 8/16: 9cyc "L, F0/F1"
13730b57cec5SDimitry Andricdef : InstRW<[A57Write_9cyc_1L_1V, A57Write_9cyc_1L_1V, A57Write_9cyc_1L_1V,
13740b57cec5SDimitry Andric              A57Write_9cyc_1L_1V],
13750b57cec5SDimitry Andric      (instregex "VLD4LN(d|q)(8|16)$",
13760b57cec5SDimitry Andric                 "VLD4LN(d|q)(8|16)Pseudo$")>;
13770b57cec5SDimitry Andricdef : InstRW<[A57Write_9cyc_1L_1V_1I, A57Write_9cyc_1L_1V_1I,
13780b57cec5SDimitry Andric              A57Write_9cyc_1L_1V_1I, A57Write_9cyc_1L_1V_1I,
13790b57cec5SDimitry Andric              A57WrBackOne],
13800b57cec5SDimitry Andric      (instregex "VLD4LN(d|q)(8|16)_UPD")>;
13810b57cec5SDimitry Andricdef : InstRW<[A57Write_9cyc_1L_1V_1I, A57WrBackOne],
13820b57cec5SDimitry Andric      (instregex "VLD4LN(d|q)(8|16)Pseudo_UPD")>;
13830b57cec5SDimitry Andric
13840b57cec5SDimitry Andric// ASIMD load, 4 element, all lanes: 8cyc "L, F0/F1"
13850b57cec5SDimitry Andricdef : InstRW<[A57Write_8cyc_1L_1V, A57Write_8cyc_1L_1V, A57Write_8cyc_1L_1V,
13860b57cec5SDimitry Andric              A57Write_8cyc_1L_1V],
13870b57cec5SDimitry Andric      (instregex "VLD4DUP(d|q)(8|16|32)$",
13880b57cec5SDimitry Andric                 "VLD4DUP(d|q)(8|16|32)Pseudo$")>;
13890b57cec5SDimitry Andricdef : InstRW<[A57Write_8cyc_1L_1V_1I, A57Write_8cyc_1L_1V_1I,
13900b57cec5SDimitry Andric              A57Write_8cyc_1L_1V_1I, A57Write_8cyc_1L_1V_1I,
13910b57cec5SDimitry Andric              A57WrBackOne],
13920b57cec5SDimitry Andric      (instregex "VLD4DUP(d|q)(8|16|32)_UPD")>;
13930b57cec5SDimitry Andricdef : InstRW<[A57Write_8cyc_1L_1V_1I, A57WrBackOne],
13940b57cec5SDimitry Andric      (instregex "VLD4DUP(d|q)(8|16|32)Pseudo_UPD")>;
13950b57cec5SDimitry Andric
13960b57cec5SDimitry Andric// --- 3.18 ASIMD Store Instructions ---
13970b57cec5SDimitry Andric
13980b57cec5SDimitry Andric// ASIMD store, 1 element, multiple, 1 reg: 1cyc S
13990b57cec5SDimitry Andricdef : InstRW<[A57Write_1cyc_1S], (instregex "VST1d(8|16|32|64)$")>;
14000b57cec5SDimitry Andricdef : InstRW<[A57WrBackOne, A57Write_1cyc_1S_1I],
14010b57cec5SDimitry Andric      (instregex "VST1d(8|16|32|64)wb")>;
14020b57cec5SDimitry Andric// ASIMD store, 1 element, multiple, 2 reg: 2cyc S
14030b57cec5SDimitry Andricdef : InstRW<[A57Write_2cyc_1S], (instregex "VST1q(8|16|32|64)$")>;
14040b57cec5SDimitry Andricdef : InstRW<[A57WrBackOne, A57Write_2cyc_1S_1I],
14050b57cec5SDimitry Andric      (instregex "VST1q(8|16|32|64)wb")>;
14060b57cec5SDimitry Andric// ASIMD store, 1 element, multiple, 3 reg: 3cyc S
14070b57cec5SDimitry Andricdef : InstRW<[A57Write_3cyc_1S],
14080b57cec5SDimitry Andric      (instregex "VST1d(8|16|32|64)T$", "VST1d64TPseudo$")>;
14090b57cec5SDimitry Andricdef : InstRW<[A57WrBackOne, A57Write_3cyc_1S_1I],
14100b57cec5SDimitry Andric      (instregex "VST1d(8|16|32|64)Twb", "VST1d64TPseudoWB")>;
14110b57cec5SDimitry Andric// ASIMD store, 1 element, multiple, 4 reg: 4cyc S
14120b57cec5SDimitry Andricdef : InstRW<[A57Write_4cyc_1S],
14130b57cec5SDimitry Andric      (instregex "VST1d(8|16|32|64)(Q|QPseudo)$")>;
14140b57cec5SDimitry Andricdef : InstRW<[A57WrBackOne, A57Write_4cyc_1S_1I],
14150b57cec5SDimitry Andric      (instregex "VST1d(8|16|32|64)(Qwb|QPseudoWB)")>;
14160b57cec5SDimitry Andric// ASIMD store, 1 element, one lane: 3cyc "F0/F1, S"
14170b57cec5SDimitry Andricdef : InstRW<[A57Write_3cyc_1S_1V],
14180b57cec5SDimitry Andric      (instregex "VST1LNd(8|16|32)$", "VST1LNq(8|16|32)Pseudo$")>;
14190b57cec5SDimitry Andricdef : InstRW<[A57WrBackOne, A57Write_3cyc_1S_1V_1I],
14200b57cec5SDimitry Andric      (instregex "VST1LNd(8|16|32)_UPD", "VST1LNq(8|16|32)Pseudo_UPD")>;
14210b57cec5SDimitry Andric// ASIMD store, 2 element, multiple, 2 reg: 3cyc "F0/F1, S"
14220b57cec5SDimitry Andricdef : InstRW<[A57Write_3cyc_1S_1V],
14230b57cec5SDimitry Andric      (instregex "VST2(d|b)(8|16|32)$")>;
14240b57cec5SDimitry Andricdef : InstRW<[A57WrBackOne, A57Write_3cyc_1S_1V_1I],
14250b57cec5SDimitry Andric      (instregex "VST2(b|d)(8|16|32)wb")>;
14260b57cec5SDimitry Andric// ASIMD store, 2 element, multiple, 4 reg: 4cyc "F0/F1, S"
14270b57cec5SDimitry Andricdef : InstRW<[A57Write_4cyc_1S_1V],
14280b57cec5SDimitry Andric      (instregex "VST2q(8|16|32)$", "VST2q(8|16|32)Pseudo$")>;
14290b57cec5SDimitry Andricdef : InstRW<[A57WrBackOne, A57Write_4cyc_1S_1V_1I],
14300b57cec5SDimitry Andric      (instregex "VST2q(8|16|32)wb", "VST2q(8|16|32)PseudoWB")>;
14310b57cec5SDimitry Andric// ASIMD store, 2 element, one lane: 3cyc "F0/F1, S"
14320b57cec5SDimitry Andricdef : InstRW<[A57Write_3cyc_1S_1V],
14330b57cec5SDimitry Andric      (instregex "VST2LN(d|q)(8|16|32)$", "VST2LN(d|q)(8|16|32)Pseudo$")>;
14340b57cec5SDimitry Andricdef : InstRW<[A57WrBackOne, A57Write_3cyc_1S_1V_1I],
14350b57cec5SDimitry Andric      (instregex "VST2LN(d|q)(8|16|32)_UPD",
14360b57cec5SDimitry Andric                 "VST2LN(d|q)(8|16|32)Pseudo_UPD")>;
14370b57cec5SDimitry Andric// ASIMD store, 3 element, multiple, 3 reg
14380b57cec5SDimitry Andricdef : InstRW<[A57Write_3cyc_1S_1V],
14390b57cec5SDimitry Andric      (instregex "VST3(d|q)(8|16|32)$", "VST3(d|q)(8|16|32)(oddP|P)seudo$")>;
14400b57cec5SDimitry Andricdef : InstRW<[A57WrBackOne, A57Write_3cyc_1S_1V_1I],
14410b57cec5SDimitry Andric      (instregex "VST3(d|q)(8|16|32)_UPD",
14420b57cec5SDimitry Andric                 "VST3(d|q)(8|16|32)(oddP|P)seudo_UPD$")>;
14430b57cec5SDimitry Andric// ASIMD store, 3 element, one lane
14440b57cec5SDimitry Andricdef : InstRW<[A57Write_3cyc_1S_1V],
14450b57cec5SDimitry Andric      (instregex "VST3LN(d|q)(8|16|32)$", "VST3LN(d|q)(8|16|32)Pseudo$")>;
14460b57cec5SDimitry Andricdef : InstRW<[A57WrBackOne, A57Write_3cyc_1S_1V_1I],
14470b57cec5SDimitry Andric      (instregex "VST3LN(d|q)(8|16|32)_UPD",
14480b57cec5SDimitry Andric                 "VST3LN(d|q)(8|16|32)Pseudo_UPD")>;
14490b57cec5SDimitry Andric// ASIMD store, 4 element, multiple, 4 reg
14500b57cec5SDimitry Andricdef : InstRW<[A57Write_4cyc_1S_1V],
14510b57cec5SDimitry Andric      (instregex "VST4(d|q)(8|16|32)$", "VST4(d|q)(8|16|32)(oddP|P)seudo$")>;
14520b57cec5SDimitry Andricdef : InstRW<[A57WrBackOne, A57Write_4cyc_1S_1V_1I],
14530b57cec5SDimitry Andric      (instregex "VST4(d|q)(8|16|32)_UPD",
14540b57cec5SDimitry Andric                 "VST4(d|q)(8|16|32)(oddP|P)seudo_UPD$")>;
14550b57cec5SDimitry Andric// ASIMD store, 4 element, one lane
14560b57cec5SDimitry Andricdef : InstRW<[A57Write_3cyc_1S_1V],
14570b57cec5SDimitry Andric      (instregex "VST4LN(d|q)(8|16|32)$", "VST4LN(d|q)(8|16|32)Pseudo$")>;
14580b57cec5SDimitry Andricdef : InstRW<[A57WrBackOne, A57Write_3cyc_1S_1V_1I],
14590b57cec5SDimitry Andric      (instregex "VST4LN(d|q)(8|16|32)_UPD",
14600b57cec5SDimitry Andric                 "VST4LN(d|q)(8|16|32)Pseudo_UPD")>;
14610b57cec5SDimitry Andric
14620b57cec5SDimitry Andric// --- 3.19 Cryptography Extensions ---
14630b57cec5SDimitry Andric// Crypto AES ops
14640b57cec5SDimitry Andric// AESD, AESE, AESIMC, AESMC: 3cyc F0
14650b57cec5SDimitry Andricdef : InstRW<[A57Write_3cyc_1W], (instregex "^AES")>;
14660b57cec5SDimitry Andric// Crypto polynomial (64x64) multiply long (VMULL.P64): 3cyc F0
14670b57cec5SDimitry Andricdef : InstRW<[A57Write_3cyc_1W], (instregex "^VMULLp64")>;
14680b57cec5SDimitry Andric// Crypto SHA1 xor ops: 6cyc F0/F1
14690b57cec5SDimitry Andricdef : InstRW<[A57Write_6cyc_2V], (instregex "^SHA1SU0")>;
14700b57cec5SDimitry Andric// Crypto SHA1 fast ops: 3cyc F0
14710b57cec5SDimitry Andricdef : InstRW<[A57Write_3cyc_1W], (instregex "^SHA1(H|SU1)")>;
14720b57cec5SDimitry Andric// Crypto SHA1 slow ops: 6cyc F0
14730b57cec5SDimitry Andricdef : InstRW<[A57Write_6cyc_2W], (instregex "^SHA1[CMP]")>;
14740b57cec5SDimitry Andric// Crypto SHA256 fast ops: 3cyc F0
14750b57cec5SDimitry Andricdef : InstRW<[A57Write_3cyc_1W], (instregex "^SHA256SU0")>;
14760b57cec5SDimitry Andric// Crypto SHA256 slow ops: 6cyc F0
14770b57cec5SDimitry Andricdef : InstRW<[A57Write_6cyc_2W], (instregex "^SHA256(H|H2|SU1)")>;
14780b57cec5SDimitry Andric
14790b57cec5SDimitry Andric// --- 3.20 CRC ---
14800b57cec5SDimitry Andricdef : InstRW<[A57Write_3cyc_1W], (instregex "^(t2)?CRC32")>;
14810b57cec5SDimitry Andric
14820b57cec5SDimitry Andric// -----------------------------------------------------------------------------
14830b57cec5SDimitry Andric// Common definitions
14840b57cec5SDimitry Andricdef : WriteRes<WriteNoop, []> { let Latency = 0; let NumMicroOps = 0; }
1485e8d8bef9SDimitry Andricdef : SchedAlias<WriteALU, CheckBranchForm<0, A57BranchForm<A57Write_1cyc_1I>>>;
14860b57cec5SDimitry Andric
14870b57cec5SDimitry Andricdef : SchedAlias<WriteBr, A57Write_1cyc_1B>;
14880b57cec5SDimitry Andricdef : SchedAlias<WriteBrL, A57Write_1cyc_1B_1I>;
14890b57cec5SDimitry Andricdef : SchedAlias<WriteBrTbl, A57Write_1cyc_1B_1I>;
14900b57cec5SDimitry Andricdef : SchedAlias<WritePreLd, A57Write_4cyc_1L>;
14910b57cec5SDimitry Andric
14920b57cec5SDimitry Andricdef : SchedAlias<WriteLd, A57Write_4cyc_1L>;
14930b57cec5SDimitry Andricdef : SchedAlias<WriteST, A57Write_1cyc_1S>;
14940b57cec5SDimitry Andricdef : ReadAdvance<ReadALU, 0>;
14950b57cec5SDimitry Andric
14960b57cec5SDimitry Andric} // SchedModel = CortexA57Model
14970b57cec5SDimitry Andric
1498