xref: /freebsd-src/contrib/llvm-project/llvm/lib/Target/ARM/ARMScheduleM85.td (revision 5f757f3ff9144b609b3c433dfd370cc6bdc191ad)
1*5f757f3fSDimitry Andric//=- ARMScheduleM85.td - ARM Cortex-M85 Scheduling Definitions -*- tablegen -*-=//
2*5f757f3fSDimitry Andric//
3*5f757f3fSDimitry Andric// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*5f757f3fSDimitry Andric// See https://llvm.org/LICENSE.txt for license information.
5*5f757f3fSDimitry Andric// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*5f757f3fSDimitry Andric//
7*5f757f3fSDimitry Andric//===----------------------------------------------------------------------===//
8*5f757f3fSDimitry Andric//
9*5f757f3fSDimitry Andric// This file defines the machine model for the ARM Cortex-M85 processor.
10*5f757f3fSDimitry Andric//
11*5f757f3fSDimitry Andric// All timing is referred to EX2.  Thus, operands which are needed at EX1 are
12*5f757f3fSDimitry Andric// stated to have a ReadAdvance of -1.  The FP/MVE pipe actually begins at EX3
13*5f757f3fSDimitry Andric// but is described as if it were in EX2 to avoid having unnaturally long latencies
14*5f757f3fSDimitry Andric// with delayed inputs on every instruction.  Instead, whenever an FP instruction
15*5f757f3fSDimitry Andric// must access a GP register or a non-FP instruction (which includes loads/stores)
16*5f757f3fSDimitry Andric// must access an FP register, the operand timing is adjusted:
17*5f757f3fSDimitry Andric//     FP accessing GPR:     read one cycle later, write one cycle later
18*5f757f3fSDimitry Andric//                           NOTE: absolute spec timing already includes this if
19*5f757f3fSDimitry Andric//                                 referenced to EX2
20*5f757f3fSDimitry Andric//     non-FP accessing FPR: read one cycle earlier, write one cycle earlier
21*5f757f3fSDimitry Andric//===----------------------------------------------------------------------===//
22*5f757f3fSDimitry Andric
23*5f757f3fSDimitry Andricdef CortexM85Model : SchedMachineModel {
24*5f757f3fSDimitry Andric  let IssueWidth = 2;        // Dual issue for most instructions.
25*5f757f3fSDimitry Andric  let MicroOpBufferSize = 0; // M85 is in-order.
26*5f757f3fSDimitry Andric  let LoadLatency = 2;       // Best case for load-use case.
27*5f757f3fSDimitry Andric  let MispredictPenalty = 4; // Mispredict cost for forward branches is 7,
28*5f757f3fSDimitry Andric                             // but 4 works better
29*5f757f3fSDimitry Andric  let CompleteModel = 0;
30*5f757f3fSDimitry Andric}
31*5f757f3fSDimitry Andric
32*5f757f3fSDimitry Andriclet SchedModel = CortexM85Model in {
33*5f757f3fSDimitry Andric
34*5f757f3fSDimitry Andric//===--------------------------------------------------------------------===//
35*5f757f3fSDimitry Andric// CortexM85 has two ALU, two LOAD, two STORE, a MAC, a BRANCH and two VFP
36*5f757f3fSDimitry Andric// pipes (with three units).  There are three shifters available: one per
37*5f757f3fSDimitry Andric// stage.
38*5f757f3fSDimitry Andric
39*5f757f3fSDimitry Andricdef M85UnitLoadL  : ProcResource<1> { let BufferSize = 0; }
40*5f757f3fSDimitry Andricdef M85UnitLoadH  : ProcResource<1> { let BufferSize = 0; }
41*5f757f3fSDimitry Andricdef M85UnitLoad   : ProcResGroup<[M85UnitLoadL,M85UnitLoadH]> { let BufferSize = 0; }
42*5f757f3fSDimitry Andricdef M85UnitStoreL : ProcResource<1> { let BufferSize = 0; }
43*5f757f3fSDimitry Andricdef M85UnitStoreH : ProcResource<1> { let BufferSize = 0; }
44*5f757f3fSDimitry Andricdef M85UnitStore  : ProcResGroup<[M85UnitStoreL,M85UnitStoreH]> { let BufferSize = 0; }
45*5f757f3fSDimitry Andricdef M85UnitALU    : ProcResource<2> { let BufferSize = 0; }
46*5f757f3fSDimitry Andricdef M85UnitShift1 : ProcResource<1> { let BufferSize = 0; }
47*5f757f3fSDimitry Andricdef M85UnitShift2 : ProcResource<1> { let BufferSize = 0; }
48*5f757f3fSDimitry Andricdef M85UnitMAC    : ProcResource<1> { let BufferSize = 0; }
49*5f757f3fSDimitry Andricdef M85UnitBranch : ProcResource<1> { let BufferSize = 0; }
50*5f757f3fSDimitry Andricdef M85UnitVFPAL  : ProcResource<1> { let BufferSize = 0; }
51*5f757f3fSDimitry Andricdef M85UnitVFPAH  : ProcResource<1> { let BufferSize = 0; }
52*5f757f3fSDimitry Andricdef M85UnitVFPA   : ProcResGroup<[M85UnitVFPAL,M85UnitVFPAH]> { let BufferSize = 0; }
53*5f757f3fSDimitry Andricdef M85UnitVFPBL  : ProcResource<1> { let BufferSize = 0; }
54*5f757f3fSDimitry Andricdef M85UnitVFPBH  : ProcResource<1> { let BufferSize = 0; }
55*5f757f3fSDimitry Andricdef M85UnitVFPB   : ProcResGroup<[M85UnitVFPBL,M85UnitVFPBH]> { let BufferSize = 0; }
56*5f757f3fSDimitry Andricdef M85UnitVFPCL  : ProcResource<1> { let BufferSize = 0; }
57*5f757f3fSDimitry Andricdef M85UnitVFPCH  : ProcResource<1> { let BufferSize = 0; }
58*5f757f3fSDimitry Andricdef M85UnitVFPC   : ProcResGroup<[M85UnitVFPCL,M85UnitVFPCH]> { let BufferSize = 0; }
59*5f757f3fSDimitry Andricdef M85UnitVFPD   : ProcResource<1> { let BufferSize = 0; }
60*5f757f3fSDimitry Andricdef M85UnitVPortL : ProcResource<1> { let BufferSize = 0; }
61*5f757f3fSDimitry Andricdef M85UnitVPortH : ProcResource<1> { let BufferSize = 0; }
62*5f757f3fSDimitry Andricdef M85UnitVPort  : ProcResGroup<[M85UnitVPortL,M85UnitVPortH]> { let BufferSize = 0; }
63*5f757f3fSDimitry Andricdef M85UnitSIMD   : ProcResource<1> { let BufferSize = 0; }
64*5f757f3fSDimitry Andricdef M85UnitLShift : ProcResource<1> { let BufferSize = 0; }
65*5f757f3fSDimitry Andricdef M85UnitDiv    : ProcResource<1> { let BufferSize = 0; }
66*5f757f3fSDimitry Andric
67*5f757f3fSDimitry Andricdef M85UnitSlot0 : ProcResource<1> { let BufferSize = 0; }
68*5f757f3fSDimitry Andric
69*5f757f3fSDimitry Andric//===---------------------------------------------------------------------===//
70*5f757f3fSDimitry Andric// Subtarget-specific SchedWrite types with map ProcResources and set latency.
71*5f757f3fSDimitry Andric
72*5f757f3fSDimitry Andricdef : WriteRes<WriteALU, [M85UnitALU]> { let Latency = 1; }
73*5f757f3fSDimitry Andric
74*5f757f3fSDimitry Andric// Basic ALU with shifts.
75*5f757f3fSDimitry Andriclet Latency = 1 in {
76*5f757f3fSDimitry Andric  def : WriteRes<WriteALUsi,  [M85UnitALU, M85UnitShift1]>;
77*5f757f3fSDimitry Andric  def : WriteRes<WriteALUsr,  [M85UnitALU, M85UnitShift1]>;
78*5f757f3fSDimitry Andric  def : WriteRes<WriteALUSsr, [M85UnitALU, M85UnitShift1]>;
79*5f757f3fSDimitry Andric}
80*5f757f3fSDimitry Andric
81*5f757f3fSDimitry Andric// Compares.
82*5f757f3fSDimitry Andricdef : WriteRes<WriteCMP,   [M85UnitALU]> { let Latency = 1; }
83*5f757f3fSDimitry Andricdef : WriteRes<WriteCMPsi, [M85UnitALU, M85UnitShift1]> { let Latency = 2; }
84*5f757f3fSDimitry Andricdef : WriteRes<WriteCMPsr, [M85UnitALU, M85UnitShift1]> { let Latency = 2; }
85*5f757f3fSDimitry Andric
86*5f757f3fSDimitry Andric// Multiplies.
87*5f757f3fSDimitry Andriclet Latency = 2 in {
88*5f757f3fSDimitry Andric  def : WriteRes<WriteMUL16,   [M85UnitMAC]>;
89*5f757f3fSDimitry Andric  def : WriteRes<WriteMUL32,   [M85UnitMAC]>;
90*5f757f3fSDimitry Andric  def : WriteRes<WriteMUL64Lo, [M85UnitMAC]>;
91*5f757f3fSDimitry Andric  def : WriteRes<WriteMUL64Hi, []> { let NumMicroOps = 0; }
92*5f757f3fSDimitry Andric}
93*5f757f3fSDimitry Andric
94*5f757f3fSDimitry Andric// Multiply-accumulates.
95*5f757f3fSDimitry Andriclet Latency = 2 in {
96*5f757f3fSDimitry Andricdef : WriteRes<WriteMAC16,   [M85UnitMAC]>;
97*5f757f3fSDimitry Andricdef : WriteRes<WriteMAC32,   [M85UnitMAC]>;
98*5f757f3fSDimitry Andricdef : WriteRes<WriteMAC64Lo, [M85UnitMAC]>;
99*5f757f3fSDimitry Andricdef : WriteRes<WriteMAC64Hi, []> { let NumMicroOps = 0; }
100*5f757f3fSDimitry Andric}
101*5f757f3fSDimitry Andric
102*5f757f3fSDimitry Andric// Divisions.
103*5f757f3fSDimitry Andricdef : WriteRes<WriteDIV, [M85UnitDiv]> {
104*5f757f3fSDimitry Andric  let Latency = 7;
105*5f757f3fSDimitry Andric}
106*5f757f3fSDimitry Andric
107*5f757f3fSDimitry Andric// Loads/Stores.
108*5f757f3fSDimitry Andricdef : WriteRes<WriteLd,    [M85UnitLoad]> { let Latency = 1; }
109*5f757f3fSDimitry Andricdef : WriteRes<WritePreLd, [M85UnitLoad]> { let Latency = 2; }
110*5f757f3fSDimitry Andricdef : WriteRes<WriteST,    [M85UnitStore]> { let Latency = 2; }
111*5f757f3fSDimitry Andricdef M85WriteLdWide : SchedWriteRes<[M85UnitLoadL, M85UnitLoadH]> { let Latency = 1; }
112*5f757f3fSDimitry Andricdef M85WriteStWide : SchedWriteRes<[M85UnitStoreL, M85UnitStoreH]> { let Latency = 2; }
113*5f757f3fSDimitry Andric
114*5f757f3fSDimitry Andric// Branches.
115*5f757f3fSDimitry Andricdef : WriteRes<WriteBr,    [M85UnitBranch]> { let Latency = 2; }
116*5f757f3fSDimitry Andricdef : WriteRes<WriteBrL,   [M85UnitBranch]> { let Latency = 2; }
117*5f757f3fSDimitry Andricdef : WriteRes<WriteBrTbl, [M85UnitBranch]> { let Latency = 2; }
118*5f757f3fSDimitry Andric
119*5f757f3fSDimitry Andric// Noop.
120*5f757f3fSDimitry Andricdef : WriteRes<WriteNoop, []> { let Latency = 0; let NumMicroOps = 0; }
121*5f757f3fSDimitry Andric
122*5f757f3fSDimitry Andric//===---------------------------------------------------------------------===//
123*5f757f3fSDimitry Andric// Sched definitions for floating-point instructions
124*5f757f3fSDimitry Andric//
125*5f757f3fSDimitry Andric// Floating point conversions.
126*5f757f3fSDimitry Andricdef : WriteRes<WriteFPCVT, [M85UnitVFPB, M85UnitVPort, M85UnitSlot0]> {
127*5f757f3fSDimitry Andric  let Latency = 2;
128*5f757f3fSDimitry Andric}
129*5f757f3fSDimitry Andricdef : WriteRes<WriteFPMOV, [M85UnitVPort, M85UnitSlot0]> { let Latency = 1; }
130*5f757f3fSDimitry Andricdef M85WriteFPMOV64 : SchedWriteRes<[M85UnitVPortL, M85UnitVPortH, M85UnitSlot0]> { let Latency = 1; }
131*5f757f3fSDimitry Andric
132*5f757f3fSDimitry Andric// ALU operations (32/64-bit).  These go down the FP pipeline.
133*5f757f3fSDimitry Andricdef : WriteRes<WriteFPALU32, [M85UnitVFPA, M85UnitVPort, M85UnitSlot0]> {
134*5f757f3fSDimitry Andric  let Latency = 2;
135*5f757f3fSDimitry Andric}
136*5f757f3fSDimitry Andricdef : WriteRes<WriteFPALU64, [M85UnitVFPAL, M85UnitVFPAH, M85UnitVPortL, M85UnitVPortH, M85UnitSlot0]> {
137*5f757f3fSDimitry Andric  let Latency = 6;
138*5f757f3fSDimitry Andric}
139*5f757f3fSDimitry Andric
140*5f757f3fSDimitry Andric// Multiplication
141*5f757f3fSDimitry Andricdef : WriteRes<WriteFPMUL32, [M85UnitVFPB, M85UnitVPort, M85UnitSlot0]> {
142*5f757f3fSDimitry Andric  let Latency = 3;
143*5f757f3fSDimitry Andric}
144*5f757f3fSDimitry Andricdef : WriteRes<WriteFPMUL64, [M85UnitVFPBL, M85UnitVFPBH, M85UnitVPortL, M85UnitVPortH, M85UnitSlot0]> {
145*5f757f3fSDimitry Andric  let Latency = 8;
146*5f757f3fSDimitry Andric}
147*5f757f3fSDimitry Andric
148*5f757f3fSDimitry Andric// Multiply-accumulate.  FPMAC goes down the FP Pipeline.
149*5f757f3fSDimitry Andricdef : WriteRes<WriteFPMAC32, [M85UnitVFPB, M85UnitVPort, M85UnitSlot0]> {
150*5f757f3fSDimitry Andric  let Latency = 5;
151*5f757f3fSDimitry Andric}
152*5f757f3fSDimitry Andricdef : WriteRes<WriteFPMAC64, [M85UnitVFPBL, M85UnitVFPBH, M85UnitVPortL, M85UnitVPortH, M85UnitSlot0]> {
153*5f757f3fSDimitry Andric  let Latency = 14;
154*5f757f3fSDimitry Andric}
155*5f757f3fSDimitry Andric
156*5f757f3fSDimitry Andric// Division.   Effective scheduling latency is 3, though real latency is larger
157*5f757f3fSDimitry Andricdef : WriteRes<WriteFPDIV32, [M85UnitVFPB, M85UnitVPort, M85UnitSlot0]> {
158*5f757f3fSDimitry Andric  let Latency = 14;
159*5f757f3fSDimitry Andric}
160*5f757f3fSDimitry Andricdef : WriteRes<WriteFPDIV64, [M85UnitVFPBL, M85UnitVFPBH, M85UnitVPortL, M85UnitVPortH, M85UnitSlot0]> {
161*5f757f3fSDimitry Andric  let Latency = 29;
162*5f757f3fSDimitry Andric}
163*5f757f3fSDimitry Andric
164*5f757f3fSDimitry Andric// Square-root.  Effective scheduling latency is 3, though real latency is larger
165*5f757f3fSDimitry Andricdef : WriteRes<WriteFPSQRT32, [M85UnitVFPB, M85UnitVPort, M85UnitSlot0]> {
166*5f757f3fSDimitry Andric  let Latency = 14;
167*5f757f3fSDimitry Andric}
168*5f757f3fSDimitry Andricdef : WriteRes<WriteFPSQRT64, [M85UnitVFPBL, M85UnitVFPBH, M85UnitVPortL, M85UnitVPortH, M85UnitSlot0]> {
169*5f757f3fSDimitry Andric  let Latency = 29;
170*5f757f3fSDimitry Andric}
171*5f757f3fSDimitry Andric
172*5f757f3fSDimitry Andriclet NumMicroOps = 0 in {
173*5f757f3fSDimitry Andric  def M85SingleIssue : SchedWriteRes<[]> { let SingleIssue = 1; }
174*5f757f3fSDimitry Andric  def M85Slot0Only   : SchedWriteRes<[M85UnitSlot0]> { }
175*5f757f3fSDimitry Andric}
176*5f757f3fSDimitry Andric
177*5f757f3fSDimitry Andric// What pipeline stage operands need to be ready for depending on
178*5f757f3fSDimitry Andric// where they come from.
179*5f757f3fSDimitry Andricdef : ReadAdvance<ReadALUsr, 0>;
180*5f757f3fSDimitry Andricdef : ReadAdvance<ReadMUL, 0>;
181*5f757f3fSDimitry Andricdef : ReadAdvance<ReadMAC, 1>;
182*5f757f3fSDimitry Andricdef : ReadAdvance<ReadALU, 0>;
183*5f757f3fSDimitry Andricdef : ReadAdvance<ReadFPMUL, 0>;
184*5f757f3fSDimitry Andricdef : ReadAdvance<ReadFPMAC, 3>;
185*5f757f3fSDimitry Andricdef M85Read_ISSm1 : SchedReadAdvance<-2>;    // operands needed at ISS
186*5f757f3fSDimitry Andricdef M85Read_ISS : SchedReadAdvance<-1>;    // operands needed at EX1
187*5f757f3fSDimitry Andricdef M85Read_EX1 : SchedReadAdvance<0>;     // operands needed at EX2
188*5f757f3fSDimitry Andricdef M85Read_EX2 : SchedReadAdvance<1>;    // operands needed at EX3
189*5f757f3fSDimitry Andricdef M85Read_EX3 : SchedReadAdvance<2>;    // operands needed at EX4
190*5f757f3fSDimitry Andricdef M85Read_EX4 : SchedReadAdvance<3>;    // operands needed at EX5
191*5f757f3fSDimitry Andricdef M85Write1   : SchedWriteRes<[]> {
192*5f757f3fSDimitry Andric  let Latency = 1;
193*5f757f3fSDimitry Andric  let NumMicroOps = 0;
194*5f757f3fSDimitry Andric}
195*5f757f3fSDimitry Andricdef M85Write2   : SchedWriteRes<[]> {
196*5f757f3fSDimitry Andric  let Latency = 2;
197*5f757f3fSDimitry Andric  let NumMicroOps = 0;
198*5f757f3fSDimitry Andric}
199*5f757f3fSDimitry Andricdef M85WriteShift2   : SchedWriteRes<[M85UnitALU, M85UnitShift2]> {}
200*5f757f3fSDimitry Andric
201*5f757f3fSDimitry Andric// Non general purpose instructions may not be dual issued. These
202*5f757f3fSDimitry Andric// use both issue units.
203*5f757f3fSDimitry Andricdef M85NonGeneralPurpose : SchedWriteRes<[]> {
204*5f757f3fSDimitry Andric  // Assume that these will go down the main ALU pipeline.
205*5f757f3fSDimitry Andric  // In reality, many look likely to stall the whole pipeline.
206*5f757f3fSDimitry Andric  let Latency = 3;
207*5f757f3fSDimitry Andric  let SingleIssue = 1;
208*5f757f3fSDimitry Andric}
209*5f757f3fSDimitry Andric
210*5f757f3fSDimitry Andric// List the non general purpose instructions.
211*5f757f3fSDimitry Andricdef : InstRW<[M85NonGeneralPurpose],
212*5f757f3fSDimitry Andric                (instregex "t2MRS", "tSVC", "tBKPT", "t2MSR", "t2DMB", "t2DSB",
213*5f757f3fSDimitry Andric                           "t2ISB", "t2HVC", "t2SMC", "t2UDF", "ERET", "tHINT",
214*5f757f3fSDimitry Andric                           "t2HINT", "t2CLREX", "t2CLRM", "BUNDLE")>;
215*5f757f3fSDimitry Andric
216*5f757f3fSDimitry Andric//===---------------------------------------------------------------------===//
217*5f757f3fSDimitry Andric// Sched definitions for load/store
218*5f757f3fSDimitry Andric//
219*5f757f3fSDimitry Andric// Mark whether the loads/stores must be single-issue
220*5f757f3fSDimitry Andric// Address operands are needed earlier
221*5f757f3fSDimitry Andric// Data operands are needed later
222*5f757f3fSDimitry Andric
223*5f757f3fSDimitry Andriclet NumMicroOps = 0 in {
224*5f757f3fSDimitry Andric  def M85BaseUpdate : SchedWriteRes<[]> {
225*5f757f3fSDimitry Andric    // Update is bypassable out of EX1
226*5f757f3fSDimitry Andric    let Latency = 0;
227*5f757f3fSDimitry Andric  }
228*5f757f3fSDimitry Andric  def M85MVERBaseUpdate : SchedWriteRes<[]> { let Latency = 1; }
229*5f757f3fSDimitry Andric  // Q register base update is available in EX3 to bypass into EX2/ISS.
230*5f757f3fSDimitry Andric  //  Latency=2 matches what we want for ISS, Latency=1 for EX2.  Going
231*5f757f3fSDimitry Andric  //  with 2, as base update into another load/store is most likely.  Could
232*5f757f3fSDimitry Andric  //  change later in an override.
233*5f757f3fSDimitry Andric  def M85MVEQBaseUpdate : SchedWriteRes<[]> { let Latency = 2; }
234*5f757f3fSDimitry Andric  def M85LoadLatency1 : SchedWriteRes<[]> { let Latency = 1; }
235*5f757f3fSDimitry Andric}
236*5f757f3fSDimitry Andricdef M85SlowLoad : SchedWriteRes<[M85UnitLoad]> { let Latency = 2; }
237*5f757f3fSDimitry Andric
238*5f757f3fSDimitry Andric// Byte and half-word loads should have greater latency than other loads.
239*5f757f3fSDimitry Andric// So should load exclusive?
240*5f757f3fSDimitry Andric
241*5f757f3fSDimitry Andricdef : InstRW<[M85SlowLoad],
242*5f757f3fSDimitry Andric               (instregex "t2LDR(B|H|SB|SH)pc")>;
243*5f757f3fSDimitry Andricdef : InstRW<[M85SlowLoad, M85Read_ISS],
244*5f757f3fSDimitry Andric               (instregex "t2LDR(B|H|SB|SH)T", "t2LDR(B|H|SB|SH)i",
245*5f757f3fSDimitry Andric                          "tLDRspi", "tLDR(B|H)i")>;
246*5f757f3fSDimitry Andricdef : InstRW<[M85SlowLoad, M85Read_ISS, M85Read_ISS],
247*5f757f3fSDimitry Andric               (instregex "t2LDR(B|H|SB|SH)s")>;
248*5f757f3fSDimitry Andricdef : InstRW<[M85SlowLoad, M85Read_ISS, M85Read_ISS],
249*5f757f3fSDimitry Andric               (instregex "tLDR(B|H)r", "tLDR(SB|SH)")>;
250*5f757f3fSDimitry Andricdef : InstRW<[M85SlowLoad, M85BaseUpdate, M85Read_ISS],
251*5f757f3fSDimitry Andric               (instregex "t2LDR(B|H|SB|SH)_(POST|PRE)")>;
252*5f757f3fSDimitry Andric
253*5f757f3fSDimitry Andric// Exclusive/acquire/release loads/stores cannot be dual-issued
254*5f757f3fSDimitry Andricdef : InstRW<[WriteLd, M85SingleIssue, M85Read_ISS],
255*5f757f3fSDimitry Andric               (instregex "t2LDREX$", "t2LDA(EX)?$")>;
256*5f757f3fSDimitry Andricdef : InstRW<[M85WriteLdWide, M85LoadLatency1, M85SingleIssue, M85Read_ISS],
257*5f757f3fSDimitry Andric               (instregex "t2LDAEXD$")>;
258*5f757f3fSDimitry Andricdef : InstRW<[M85SlowLoad, M85SingleIssue, M85Read_ISS],
259*5f757f3fSDimitry Andric               (instregex "t2LDREX(B|H)", "t2LDA(EX)?(B|H)$")>;
260*5f757f3fSDimitry Andricdef : InstRW<[WriteST, M85SingleIssue, M85Read_EX2, M85Read_ISS],
261*5f757f3fSDimitry Andric               (instregex "t2STREX(B|H)?$", "t2STL(EX)?(B|H)?$")>;
262*5f757f3fSDimitry Andricdef : InstRW<[M85WriteStWide, M85SingleIssue, M85Read_EX2, M85Read_EX2, M85Read_ISS],
263*5f757f3fSDimitry Andric               (instregex "t2STLEXD$")>;
264*5f757f3fSDimitry Andric
265*5f757f3fSDimitry Andric// Load/store multiples end issue groups.
266*5f757f3fSDimitry Andric
267*5f757f3fSDimitry Andricdef : InstRW<[M85WriteLdWide, M85SingleIssue, M85Read_ISS],
268*5f757f3fSDimitry Andric               (instregex "(t|t2)LDM(DB|IA)$")>;
269*5f757f3fSDimitry Andricdef : InstRW<[M85WriteStWide, M85SingleIssue, M85Read_ISS],
270*5f757f3fSDimitry Andric               (instregex "(t|t2)STM(DB|IA)$")>;
271*5f757f3fSDimitry Andricdef : InstRW<[M85BaseUpdate, M85WriteLdWide, M85SingleIssue, M85Read_ISS],
272*5f757f3fSDimitry Andric               (instregex "(t|t2)LDM(DB|IA)_UPD$", "tPOP")>;
273*5f757f3fSDimitry Andricdef : InstRW<[M85BaseUpdate, M85WriteStWide, M85SingleIssue, M85Read_ISS],
274*5f757f3fSDimitry Andric               (instregex "(t|t2)STM(DB|IA)_UPD$", "tPUSH")>;
275*5f757f3fSDimitry Andric
276*5f757f3fSDimitry Andric// Load/store doubles
277*5f757f3fSDimitry Andric
278*5f757f3fSDimitry Andricdef : InstRW<[M85BaseUpdate, M85WriteStWide,
279*5f757f3fSDimitry Andric              M85Read_EX2, M85Read_EX2, M85Read_ISS],
280*5f757f3fSDimitry Andric               (instregex "t2STRD_(PRE|POST)")>;
281*5f757f3fSDimitry Andricdef : InstRW<[M85WriteStWide, M85Read_EX2, M85Read_EX2, M85Read_ISS],
282*5f757f3fSDimitry Andric               (instregex "t2STRDi")>;
283*5f757f3fSDimitry Andricdef : InstRW<[M85WriteLdWide, M85LoadLatency1, M85BaseUpdate, M85Read_ISS],
284*5f757f3fSDimitry Andric               (instregex "t2LDRD_(PRE|POST)")>;
285*5f757f3fSDimitry Andricdef : InstRW<[M85WriteLdWide, M85LoadLatency1, M85Read_ISS],
286*5f757f3fSDimitry Andric               (instregex "t2LDRDi")>;
287*5f757f3fSDimitry Andric
288*5f757f3fSDimitry Andric// Word load / preload
289*5f757f3fSDimitry Andricdef : InstRW<[WriteLd],
290*5f757f3fSDimitry Andric               (instregex "t2LDRpc", "t2PL[DI]pci", "tLDRpci")>;
291*5f757f3fSDimitry Andricdef : InstRW<[WriteLd, M85Read_ISS],
292*5f757f3fSDimitry Andric               (instregex "t2LDR(i|T)", "t2PL[DI](W)?i", "tLDRi")>;
293*5f757f3fSDimitry Andricdef : InstRW<[WriteLd, M85Read_ISS, M85Read_ISS],
294*5f757f3fSDimitry Andric               (instregex "t2LDRs", "t2PL[DI](w)?s", "tLDRr")>;
295*5f757f3fSDimitry Andricdef : InstRW<[WriteLd, M85BaseUpdate, M85Read_ISS],
296*5f757f3fSDimitry Andric               (instregex "t2LDR_(POST|PRE)")>;
297*5f757f3fSDimitry Andric
298*5f757f3fSDimitry Andric// Stores
299*5f757f3fSDimitry Andricdef : InstRW<[M85BaseUpdate, WriteST, M85Read_EX2, M85Read_ISS],
300*5f757f3fSDimitry Andric               (instregex "t2STR(B|H)?_(POST|PRE)")>;
301*5f757f3fSDimitry Andricdef : InstRW<[WriteST, M85Read_EX2, M85Read_ISS, M85Read_ISS],
302*5f757f3fSDimitry Andric               (instregex "t2STR(B|H)?s$", "tSTR(B|H)?r$")>;
303*5f757f3fSDimitry Andricdef : InstRW<[WriteST, M85Read_EX2, M85Read_ISS],
304*5f757f3fSDimitry Andric               (instregex "t2STR(B|H)?(i|T)", "tSTR(B|H)?i$", "tSTRspi")>;
305*5f757f3fSDimitry Andric
306*5f757f3fSDimitry Andric// TBB/TBH - single-issue only
307*5f757f3fSDimitry Andric
308*5f757f3fSDimitry Andricdef M85TableLoad : SchedWriteRes<[M85UnitLoad]> { let SingleIssue = 1; }
309*5f757f3fSDimitry Andric
310*5f757f3fSDimitry Andricdef : InstRW<[M85TableLoad, M85Read_ISS, M85Read_ISS],
311*5f757f3fSDimitry Andric                (instregex "t2TB")>;
312*5f757f3fSDimitry Andric
313*5f757f3fSDimitry Andric// VFP/MVE loads and stores
314*5f757f3fSDimitry Andric//   Note: timing for VLDR/VSTR special has not been broken out
315*5f757f3fSDimitry Andric//   Note 2: see notes at top of file for the reason load latency is 1 and
316*5f757f3fSDimitry Andric//           store data is in EX3.
317*5f757f3fSDimitry Andric
318*5f757f3fSDimitry Andricdef M85LoadSP  : SchedWriteRes<[M85UnitLoad, M85UnitVPort]>;
319*5f757f3fSDimitry Andricdef M85LoadDP  : SchedWriteRes<[M85UnitLoadL, M85UnitLoadH,
320*5f757f3fSDimitry Andric                                M85UnitVPortL, M85UnitVPortH]>;
321*5f757f3fSDimitry Andricdef M85LoadSys  : SchedWriteRes<[M85UnitLoad, M85UnitVPort,
322*5f757f3fSDimitry Andric                                 M85UnitVFPA, M85UnitVFPB, M85UnitVFPC, M85UnitVFPD]> {
323*5f757f3fSDimitry Andric  let Latency = 4;
324*5f757f3fSDimitry Andric}
325*5f757f3fSDimitry Andricdef M85StoreSP : SchedWriteRes<[M85UnitStore, M85UnitVPort]>;
326*5f757f3fSDimitry Andricdef M85StoreDP : SchedWriteRes<[M85UnitStoreL, M85UnitStoreH,
327*5f757f3fSDimitry Andric                                M85UnitVPortL, M85UnitVPortH]>;
328*5f757f3fSDimitry Andricdef M85StoreSys : SchedWriteRes<[M85UnitStore, M85UnitVPort,
329*5f757f3fSDimitry Andric                                 M85UnitVFPA, M85UnitVFPB, M85UnitVFPC, M85UnitVFPD]>;
330*5f757f3fSDimitry Andriclet ReleaseAtCycles = [2,2,1,1], EndGroup = 1 in {
331*5f757f3fSDimitry Andric  def M85LoadMVE  : SchedWriteRes<[M85UnitLoadL, M85UnitLoadH,
332*5f757f3fSDimitry Andric                                   M85UnitVPortL, M85UnitVPortH]>;
333*5f757f3fSDimitry Andric  def M85LoadMVELate  : SchedWriteRes<[M85UnitLoadL, M85UnitLoadH,
334*5f757f3fSDimitry Andric                                       M85UnitVPortL, M85UnitVPortH]> {
335*5f757f3fSDimitry Andric    let Latency = 4; // 3 cycles later
336*5f757f3fSDimitry Andric  }
337*5f757f3fSDimitry Andric  def M85StoreMVE : SchedWriteRes<[M85UnitStoreL, M85UnitStoreH,
338*5f757f3fSDimitry Andric                                   M85UnitVPortL, M85UnitVPortH]>;
339*5f757f3fSDimitry Andric}
340*5f757f3fSDimitry Andric
341*5f757f3fSDimitry Andricdef : InstRW<[M85LoadSP, M85Read_ISS], (instregex "VLDR(S|H)$")>;
342*5f757f3fSDimitry Andricdef : InstRW<[M85LoadSys, M85Read_ISS], (instregex "VLDR_")>;
343*5f757f3fSDimitry Andricdef : InstRW<[M85LoadDP, M85Read_ISS], (instregex "VLDRD$")>;
344*5f757f3fSDimitry Andricdef : InstRW<[M85StoreSP, M85Read_EX3, M85Read_ISS], (instregex "VSTR(S|H)$")>;
345*5f757f3fSDimitry Andricdef : InstRW<[M85StoreSys, M85Read_EX1, M85Read_ISS], (instregex "VSTR_")>;
346*5f757f3fSDimitry Andricdef : InstRW<[M85StoreDP, M85Read_EX3, M85Read_ISS], (instregex "VSTRD$")>;
347*5f757f3fSDimitry Andric
348*5f757f3fSDimitry Andricdef : InstRW<[M85LoadMVELate, M85Read_ISS],
349*5f757f3fSDimitry Andric               (instregex "MVE_VLD[24]._[0-9]+$")>;
350*5f757f3fSDimitry Andricdef : InstRW<[M85LoadMVELate, M85MVERBaseUpdate, M85Read_ISS],
351*5f757f3fSDimitry Andric               (instregex "MVE_VLD[24].*wb")>;
352*5f757f3fSDimitry Andricdef : InstRW<[M85LoadMVE, M85Read_ISS],
353*5f757f3fSDimitry Andric               (instregex "MVE_VLDR.*(8|16|32|64)$")>;
354*5f757f3fSDimitry Andricdef : InstRW<[M85LoadMVE, M85SingleIssue, M85Read_ISS, M85Read_ISS],
355*5f757f3fSDimitry Andric               (instregex "MVE_VLDR.*(_rq|_rq|_rq_u)$")>;
356*5f757f3fSDimitry Andricdef : InstRW<[M85LoadMVE, M85SingleIssue, M85Read_ISS],
357*5f757f3fSDimitry Andric               (instregex "MVE_VLDR.*_qi$")>;
358*5f757f3fSDimitry Andricdef : InstRW<[M85MVERBaseUpdate, M85LoadMVE, M85Read_ISS],
359*5f757f3fSDimitry Andric               (instregex "MVE_VLDR.*(_post|[^i]_pre)$")>;
360*5f757f3fSDimitry Andricdef : InstRW<[M85MVEQBaseUpdate, M85SingleIssue, M85LoadMVE, M85Read_ISS],
361*5f757f3fSDimitry Andric               (instregex "MVE_VLDR.*(qi_pre)$")>;
362*5f757f3fSDimitry Andric
363*5f757f3fSDimitry Andricdef : InstRW<[M85StoreMVE, M85Read_EX3, M85Read_ISS],
364*5f757f3fSDimitry Andric               (instregex "MVE_VST[24]._[0-9]+$")>;
365*5f757f3fSDimitry Andricdef : InstRW<[M85StoreMVE, M85Read_EX3, M85MVERBaseUpdate, M85Read_ISS],
366*5f757f3fSDimitry Andric               (instregex "MVE_VST[24].*wb")>;
367*5f757f3fSDimitry Andricdef : InstRW<[M85StoreMVE, M85Read_EX3, M85Read_ISS],
368*5f757f3fSDimitry Andric               (instregex "MVE_VSTR.*(8|16|32|64)$")>;
369*5f757f3fSDimitry Andricdef : InstRW<[M85StoreMVE, M85SingleIssue, M85Read_EX3, M85Read_ISS, M85Read_ISS],
370*5f757f3fSDimitry Andric               (instregex "MVE_VSTR.*(_rq|_rq|_rq_u)$")>;
371*5f757f3fSDimitry Andricdef : InstRW<[M85StoreMVE, M85SingleIssue, M85Read_EX3, M85Read_ISS],
372*5f757f3fSDimitry Andric               (instregex "MVE_VSTR.*_qi$")>;
373*5f757f3fSDimitry Andricdef : InstRW<[M85MVERBaseUpdate, M85StoreMVE, M85Read_EX3, M85Read_ISS],
374*5f757f3fSDimitry Andric               (instregex "MVE_VSTR.*(_post|[^i]_pre)$")>;
375*5f757f3fSDimitry Andricdef : InstRW<[M85MVEQBaseUpdate, M85SingleIssue, M85StoreMVE,
376*5f757f3fSDimitry Andric              M85Read_EX3, M85Read_ISS],
377*5f757f3fSDimitry Andric               (instregex "MVE_VSTR.*(qi_pre)$")>;
378*5f757f3fSDimitry Andric
379*5f757f3fSDimitry Andric// Load/store multiples end issue groups.
380*5f757f3fSDimitry Andric
381*5f757f3fSDimitry Andricdef : InstRW<[M85WriteLdWide, M85SingleIssue, M85Read_ISS],
382*5f757f3fSDimitry Andric               (instregex "VLDM(S|D|Q)(DB|IA)$")>;
383*5f757f3fSDimitry Andricdef : InstRW<[M85WriteStWide, M85SingleIssue, M85Read_ISS, M85Read_EX3],
384*5f757f3fSDimitry Andric               (instregex "VSTM(S|D|Q)(DB|IA)$")>;
385*5f757f3fSDimitry Andricdef : InstRW<[M85BaseUpdate, M85WriteLdWide, M85SingleIssue, M85Read_ISS],
386*5f757f3fSDimitry Andric               (instregex "VLDM(S|D|Q)(DB|IA)_UPD$", "VLLDM")>;
387*5f757f3fSDimitry Andricdef : InstRW<[M85BaseUpdate, M85WriteStWide, M85SingleIssue,
388*5f757f3fSDimitry Andric              M85Read_ISS, M85Read_EX3],
389*5f757f3fSDimitry Andric               (instregex "VSTM(S|D|Q)(DB|IA)_UPD$", "VLSTM")>;
390*5f757f3fSDimitry Andric
391*5f757f3fSDimitry Andric//===---------------------------------------------------------------------===//
392*5f757f3fSDimitry Andric// Sched definitions for ALU
393*5f757f3fSDimitry Andric//
394*5f757f3fSDimitry Andric
395*5f757f3fSDimitry Andric// Non-small shifted ALU operands are read a cycle early; small LSLs
396*5f757f3fSDimitry Andric// aren't, as they don't require the shifter.
397*5f757f3fSDimitry Andric
398*5f757f3fSDimitry Andricdef M85NonsmallShiftWrite : SchedWriteRes<[M85UnitALU,M85UnitShift1]> {
399*5f757f3fSDimitry Andric  let Latency = 1;
400*5f757f3fSDimitry Andric}
401*5f757f3fSDimitry Andric
402*5f757f3fSDimitry Andricdef M85WriteALUsi : SchedWriteVariant<[
403*5f757f3fSDimitry Andric  SchedVar<NoSchedPred, [M85NonsmallShiftWrite]>
404*5f757f3fSDimitry Andric]>;
405*5f757f3fSDimitry Andricdef M85Ex1ReadNoFastBypass : SchedReadAdvance<-1,
406*5f757f3fSDimitry Andric                                   [WriteLd, M85WriteLdWide, M85LoadLatency1]>;
407*5f757f3fSDimitry Andricdef M85ReadALUsi : SchedReadVariant<[
408*5f757f3fSDimitry Andric  SchedVar<NoSchedPred, [M85Read_ISS]>
409*5f757f3fSDimitry Andric]>;
410*5f757f3fSDimitry Andric
411*5f757f3fSDimitry Andricdef : InstRW<[M85WriteALUsi, M85Read_EX1, M85ReadALUsi],
412*5f757f3fSDimitry Andric               (instregex "t2(ADC|ADDS|BIC|EOR|ORN|ORR|RSBS|RSB|SBC|"
413*5f757f3fSDimitry Andric                          "SUBS|CMP|CMNz|TEQ|TST)rs$")>;
414*5f757f3fSDimitry Andricdef : InstRW<[M85WriteALUsi, M85ReadALUsi],
415*5f757f3fSDimitry Andric               (instregex "t2MVNs")>;
416*5f757f3fSDimitry Andric
417*5f757f3fSDimitry Andric// CortexM85 treats LSL #0 as needing a shifter. In practice the throughput
418*5f757f3fSDimitry Andric// seems to reliably be 2 when run on a cyclemodel, so we don't require a
419*5f757f3fSDimitry Andric// shift resource.
420*5f757f3fSDimitry Andricdef : InstRW<[M85WriteALUsi, M85Read_EX1, M85ReadALUsi],
421*5f757f3fSDimitry Andric               (instregex "t2(ADC|ADDS|BIC|EOR|ORN|ORR|RSBS|RSB|SBC|"
422*5f757f3fSDimitry Andric                          "SUBS|CMP|CMNz|TEQ|TST)rr$")>;
423*5f757f3fSDimitry Andricdef : InstRW<[M85WriteALUsi, M85ReadALUsi],
424*5f757f3fSDimitry Andric               (instregex "t2MVNr")>;
425*5f757f3fSDimitry Andric
426*5f757f3fSDimitry Andric// Shift instructions: most pure shifts (i.e. MOV w/ shift) will use whichever
427*5f757f3fSDimitry Andric// shifter is free, thus it is possible to dual-issue them freely with anything
428*5f757f3fSDimitry Andric// else.  As a result, they are not modeled as needing a shifter.
429*5f757f3fSDimitry Andric// RRX is odd because it must use the EX2 shifter, so it cannot dual-issue with
430*5f757f3fSDimitry Andric// itself.
431*5f757f3fSDimitry Andric//
432*5f757f3fSDimitry Andric// Note that pure shifts which use the EX1 shifter would need their operands
433*5f757f3fSDimitry Andric// a cycle earlier.  However, they are only forced to use the EX1 shifter
434*5f757f3fSDimitry Andric// when issuing against an RRX instructions, which should be rare.
435*5f757f3fSDimitry Andric
436*5f757f3fSDimitry Andricdef : InstRW<[M85WriteShift2],
437*5f757f3fSDimitry Andric               (instregex "t2RRX$")>;
438*5f757f3fSDimitry Andricdef : InstRW<[WriteALU],
439*5f757f3fSDimitry Andric               (instregex "(t|t2)(LSL|LSR|ASR|ROR|SBFX|UBFX)", "t2MOVsr(a|l)")>;
440*5f757f3fSDimitry Andric
441*5f757f3fSDimitry Andric// Instructions that use the shifter, but have normal timing
442*5f757f3fSDimitry Andric
443*5f757f3fSDimitry Andricdef : InstRW<[WriteALUsi,M85Slot0Only], (instregex "t2(BFC|BFI)$")>;
444*5f757f3fSDimitry Andric
445*5f757f3fSDimitry Andric// Stack pointer add/sub happens in EX1 with checks in EX2
446*5f757f3fSDimitry Andric
447*5f757f3fSDimitry Andricdef M85WritesToSPPred : MCSchedPredicate<CheckRegOperand<0, SP>>;
448*5f757f3fSDimitry Andric
449*5f757f3fSDimitry Andricdef M85ReadForSP : SchedReadVariant<[
450*5f757f3fSDimitry Andric  SchedVar<M85WritesToSPPred, [M85Read_ISS]>,
451*5f757f3fSDimitry Andric  SchedVar<NoSchedPred, [M85Read_EX1]>
452*5f757f3fSDimitry Andric]>;
453*5f757f3fSDimitry Andricdef M85ReadForSPShift : SchedReadVariant<[
454*5f757f3fSDimitry Andric  SchedVar<M85WritesToSPPred, [M85Read_ISS]>,
455*5f757f3fSDimitry Andric  SchedVar<NoSchedPred, [M85Read_ISS]>
456*5f757f3fSDimitry Andric]>;
457*5f757f3fSDimitry Andric
458*5f757f3fSDimitry Andricdef : InstRW<[WriteALU, M85Read_ISS],
459*5f757f3fSDimitry Andric               (instregex "tADDspi", "tSUBspi")>;
460*5f757f3fSDimitry Andricdef : InstRW<[WriteALU, M85ReadForSP],
461*5f757f3fSDimitry Andric               (instregex "t2(ADD|SUB)ri", "t2MOVr", "tMOVr")>;
462*5f757f3fSDimitry Andricdef : InstRW<[WriteALU, M85ReadForSP, M85ReadForSP],
463*5f757f3fSDimitry Andric               (instregex "tADDrSP", "tADDspr", "tADDhirr")>;
464*5f757f3fSDimitry Andricdef : InstRW<[M85WriteALUsi, M85ReadForSP, M85ReadForSPShift],
465*5f757f3fSDimitry Andric               (instregex "t2(ADD|SUB)rs")>;
466*5f757f3fSDimitry Andric
467*5f757f3fSDimitry Andricdef : InstRW<[WriteALU, M85Slot0Only], (instregex "t2CLZ")>;
468*5f757f3fSDimitry Andric
469*5f757f3fSDimitry Andric// MAC operations that don't have SchedRW set
470*5f757f3fSDimitry Andric
471*5f757f3fSDimitry Andricdef : InstRW<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC], (instregex "t2SML[AS]D")>;
472*5f757f3fSDimitry Andric
473*5f757f3fSDimitry Andric// Divides are special because they stall for their latency, and so look like
474*5f757f3fSDimitry Andric// two cycles as far as scheduling opportunities go.  By putting M85Write2
475*5f757f3fSDimitry Andric// first, we make the operand latency 2, but keep the instruction latency 7.
476*5f757f3fSDimitry Andric// Divide operands are read early.
477*5f757f3fSDimitry Andric
478*5f757f3fSDimitry Andricdef : InstRW<[M85Write2, WriteDIV, M85Read_ISS, M85Read_ISS, WriteALU],
479*5f757f3fSDimitry Andric               (instregex "t2(S|U)DIV")>;
480*5f757f3fSDimitry Andric
481*5f757f3fSDimitry Andric// DSP extension operations
482*5f757f3fSDimitry Andric
483*5f757f3fSDimitry Andricdef M85WriteSIMD1   : SchedWriteRes<[M85UnitSIMD, M85UnitALU, M85UnitSlot0]> {
484*5f757f3fSDimitry Andric  let Latency = 1;
485*5f757f3fSDimitry Andric}
486*5f757f3fSDimitry Andricdef M85WriteSIMD2   : SchedWriteRes<[M85UnitSIMD, M85UnitALU, M85UnitSlot0]> {
487*5f757f3fSDimitry Andric  let Latency = 2;
488*5f757f3fSDimitry Andric}
489*5f757f3fSDimitry Andricdef M85WriteShSIMD0 : SchedWriteRes<[M85UnitSIMD, M85UnitALU,
490*5f757f3fSDimitry Andric                                       M85UnitShift1, M85UnitSlot0]> {
491*5f757f3fSDimitry Andric  let Latency = 0; // Finishes at EX1
492*5f757f3fSDimitry Andric}
493*5f757f3fSDimitry Andricdef M85WriteShSIMD1 : SchedWriteRes<[M85UnitSIMD, M85UnitALU,
494*5f757f3fSDimitry Andric                                       M85UnitShift1, M85UnitSlot0]> {
495*5f757f3fSDimitry Andric  let Latency = 1;
496*5f757f3fSDimitry Andric}
497*5f757f3fSDimitry Andricdef M85WriteShSIMD2 : SchedWriteRes<[M85UnitSIMD, M85UnitALU,
498*5f757f3fSDimitry Andric                                     M85UnitShift1, M85UnitSlot0]> {
499*5f757f3fSDimitry Andric  let Latency = 2;
500*5f757f3fSDimitry Andric}
501*5f757f3fSDimitry Andric
502*5f757f3fSDimitry Andricdef : InstRW<[M85WriteShSIMD2, M85Read_ISS],
503*5f757f3fSDimitry Andric               (instregex "t2(S|U)SAT")>;
504*5f757f3fSDimitry Andricdef : InstRW<[M85WriteSIMD1, ReadALU],
505*5f757f3fSDimitry Andric               (instregex "(t|t2)(S|U)XT(B|H)")>;
506*5f757f3fSDimitry Andricdef : InstRW<[M85WriteSIMD1, ReadALU, ReadALU],
507*5f757f3fSDimitry Andric               (instregex "t2(S|SH|U|UH)(ADD16|ADD8|ASX|SAX|SUB16|SUB8)",
508*5f757f3fSDimitry Andric                          "t2SEL")>;
509*5f757f3fSDimitry Andricdef : InstRW<[M85WriteSIMD2, ReadALU, ReadALU],
510*5f757f3fSDimitry Andric               (instregex "t2(Q|UQ)(ADD|ASX|SAX|SUB)", "t2USAD8")>;
511*5f757f3fSDimitry Andricdef : InstRW<[M85WriteShSIMD2, M85Read_ISS, M85Read_ISS],
512*5f757f3fSDimitry Andric               (instregex "t2QD(ADD|SUB)")>;
513*5f757f3fSDimitry Andricdef : InstRW<[M85WriteShSIMD0, M85Read_ISS],
514*5f757f3fSDimitry Andric               (instregex "t2(RBIT|REV)", "tREV")>;
515*5f757f3fSDimitry Andricdef : InstRW<[M85WriteShSIMD1, ReadALU, M85Read_ISS],
516*5f757f3fSDimitry Andric               (instregex "t2PKH(BT|TB)", "t2(S|U)XTA")>;
517*5f757f3fSDimitry Andricdef : InstRW<[M85WriteSIMD2, ReadALU, ReadALU, M85Read_EX2],
518*5f757f3fSDimitry Andric               (instregex "t2USADA8")>;
519*5f757f3fSDimitry Andric
520*5f757f3fSDimitry Andric// MSR/MRS
521*5f757f3fSDimitry Andricdef : InstRW<[M85NonGeneralPurpose], (instregex "MSR", "MRS")>;
522*5f757f3fSDimitry Andric
523*5f757f3fSDimitry Andric// 64-bit shift operations in EX3
524*5f757f3fSDimitry Andric
525*5f757f3fSDimitry Andricdef M85WriteLShift : SchedWriteRes<[M85UnitLShift, M85UnitALU]> {
526*5f757f3fSDimitry Andric  let Latency = 2;
527*5f757f3fSDimitry Andric}
528*5f757f3fSDimitry Andricdef M85WriteLat2  : SchedWriteRes<[]>  { let Latency = 2; let NumMicroOps = 0; }
529*5f757f3fSDimitry Andric
530*5f757f3fSDimitry Andricdef : InstRW<[M85WriteLShift, M85WriteLat2, M85Read_EX2, M85Read_EX2],
531*5f757f3fSDimitry Andric               (instregex "MVE_(ASRLi|LSLLi|LSRL|SQSHLL|SRSHRL|UQSHLL|URSHRL)$")>;
532*5f757f3fSDimitry Andricdef : InstRW<[M85WriteLShift, M85WriteLat2,
533*5f757f3fSDimitry Andric              M85Read_EX2, M85Read_EX2, M85Read_EX2],
534*5f757f3fSDimitry Andric               (instregex "MVE_(ASRLr|LSLLr|SQRSHRL|UQRSHLL)$")>;
535*5f757f3fSDimitry Andricdef : InstRW<[M85WriteLShift, M85Read_EX2, M85Read_EX2],
536*5f757f3fSDimitry Andric               (instregex "MVE_(SQRSHR|UQRSHL)$")>;
537*5f757f3fSDimitry Andricdef : InstRW<[M85WriteLShift, M85Read_EX2],
538*5f757f3fSDimitry Andric               (instregex "MVE_(SQSHL|SRSHR|UQSHL|URSHR)$")>;
539*5f757f3fSDimitry Andric
540*5f757f3fSDimitry Andric// Loop control/branch future instructions
541*5f757f3fSDimitry Andric
542*5f757f3fSDimitry Andricdef M85LE   : SchedWriteRes<[]> { let NumMicroOps = 0; let Latency = -2; }
543*5f757f3fSDimitry Andric
544*5f757f3fSDimitry Andricdef : InstRW<[WriteALU], (instregex "t2BF(_|Lr|i|Li|r)")>;
545*5f757f3fSDimitry Andric
546*5f757f3fSDimitry Andricdef : InstRW<[WriteALU], (instregex "MVE_LCTP")>;
547*5f757f3fSDimitry Andricdef : InstRW<[WriteALU],
548*5f757f3fSDimitry Andric               (instregex "t2DLS", "t2WLS", "MVE_DLSTP", "MVE_WLSTP")>;
549*5f757f3fSDimitry Andricdef : InstRW<[M85LE], (instregex "t2LE$")>;
550*5f757f3fSDimitry Andricdef : InstRW<[M85LE, M85Read_ISSm1],
551*5f757f3fSDimitry Andric               (instregex "t2LEUpdate", "MVE_LETP")>;  // LE is executed at ISS
552*5f757f3fSDimitry Andric
553*5f757f3fSDimitry Andric// Conditional selects
554*5f757f3fSDimitry Andric
555*5f757f3fSDimitry Andricdef : InstRW<[M85WriteLShift, M85Read_EX2, M85Read_EX2, M85Read_EX2],
556*5f757f3fSDimitry Andric              (instregex "t2(CSEL|CSINC|CSINV|CSNEG)")>;
557*5f757f3fSDimitry Andric
558*5f757f3fSDimitry Andric//===---------------------------------------------------------------------===//
559*5f757f3fSDimitry Andric// Sched definitions for FP and MVE operations
560*5f757f3fSDimitry Andric
561*5f757f3fSDimitry Andriclet NumMicroOps = 0 in {
562*5f757f3fSDimitry Andric  def M85OverrideVFPLat5 : SchedWriteRes<[]> { let Latency = 5; }
563*5f757f3fSDimitry Andric  def M85OverrideVFPLat4 : SchedWriteRes<[]> { let Latency = 4; }
564*5f757f3fSDimitry Andric  def M85OverrideVFPLat3 : SchedWriteRes<[]> { let Latency = 3; }
565*5f757f3fSDimitry Andric  def M85OverrideVFPLat2 : SchedWriteRes<[]> { let Latency = 2; }
566*5f757f3fSDimitry Andric}
567*5f757f3fSDimitry Andric
568*5f757f3fSDimitry Andriclet Latency = 1 in {
569*5f757f3fSDimitry Andric   def M85GroupALat1S : SchedWriteRes<[M85UnitVFPA, M85UnitVPort, M85UnitSlot0]>;
570*5f757f3fSDimitry Andric   def M85GroupBLat1S : SchedWriteRes<[M85UnitVFPB, M85UnitVPort, M85UnitSlot0]>;
571*5f757f3fSDimitry Andric   def M85GroupCLat1S : SchedWriteRes<[M85UnitVFPC, M85UnitVPort, M85UnitSlot0]>;
572*5f757f3fSDimitry Andric   def M85GroupALat1D : SchedWriteRes<[M85UnitVFPAL, M85UnitVFPAH, M85UnitVPortL, M85UnitVPortH, M85UnitSlot0]>;
573*5f757f3fSDimitry Andric   def M85GroupBLat1D : SchedWriteRes<[M85UnitVFPBL, M85UnitVFPBH, M85UnitVPortL, M85UnitVPortH, M85UnitSlot0]>;
574*5f757f3fSDimitry Andric   def M85GroupCLat1D : SchedWriteRes<[M85UnitVFPCL, M85UnitVFPCH, M85UnitVPortL, M85UnitVPortH, M85UnitSlot0]>;
575*5f757f3fSDimitry Andric   def M85GroupABLat1S : SchedWriteRes<[M85UnitVPort, M85UnitSlot0]>;
576*5f757f3fSDimitry Andric}
577*5f757f3fSDimitry Andriclet Latency = 2 in {
578*5f757f3fSDimitry Andric   def M85GroupBLat2S : SchedWriteRes<[M85UnitVFPB, M85UnitVPort, M85UnitSlot0]>;
579*5f757f3fSDimitry Andric   def M85GroupBLat2D : SchedWriteRes<[M85UnitVFPBL, M85UnitVFPBH, M85UnitVPortL, M85UnitVPortH, M85UnitSlot0]>;
580*5f757f3fSDimitry Andric   def M85GroupABLat2S : SchedWriteRes<[M85UnitVPort, M85UnitSlot0]>;
581*5f757f3fSDimitry Andric   def M85GroupABLat2D : SchedWriteRes<[M85UnitVPortL, M85UnitVPortH, M85UnitSlot0]>;
582*5f757f3fSDimitry Andric}
583*5f757f3fSDimitry Andric
584*5f757f3fSDimitry Andric// Instructions which are missing default schedules
585*5f757f3fSDimitry Andricdef : InstRW<[M85GroupALat1S],  (instregex "V(FP_VMAXNM|FP_VMINNM)(H|S)$")>;
586*5f757f3fSDimitry Andricdef : InstRW<[M85GroupALat1D],  (instregex "V(FP_VMAXNM|FP_VMINNM)D$")>;
587*5f757f3fSDimitry Andricdef : InstRW<[M85GroupCLat1S],  (instregex "VCMPE?Z?(H|S)$")>;
588*5f757f3fSDimitry Andricdef : InstRW<[M85GroupCLat1D],  (instregex "VCMPE?Z?D$")>;
589*5f757f3fSDimitry Andricdef : InstRW<[M85GroupBLat2S],
590*5f757f3fSDimitry Andric               (instregex "VCVT(A|M|N|P|R|X|Z)(S|U)(H|S)",
591*5f757f3fSDimitry Andric                          "VRINT(A|M|N|P|R|X|Z)(H|S)")>;
592*5f757f3fSDimitry Andricdef : InstRW<[M85GroupBLat2D],
593*5f757f3fSDimitry Andric               (instregex "VCVT(B|T)(DH|HD)", "VCVT(A|M|N|P|R|X|Z)(S|U)D",
594*5f757f3fSDimitry Andric                          "V.*TOD", "VTO.*D", "VCVTDS", "VCVTSD",
595*5f757f3fSDimitry Andric                          "VRINT(A|M|N|P|R|X|Z)D")>;
596*5f757f3fSDimitry Andricdef : InstRW<[M85GroupABLat1S], (instregex "VINSH")>;
597*5f757f3fSDimitry Andricdef : InstRW<[M85GroupBLat1S],  (instregex "V(ABS|NEG)(H|S)$")>;
598*5f757f3fSDimitry Andricdef : InstRW<[M85GroupBLat1D],  (instregex "V(ABS|NEG)D$")>;
599*5f757f3fSDimitry Andric
600*5f757f3fSDimitry Andric// VMRS/VMSR
601*5f757f3fSDimitry Andriclet SingleIssue = 1 in {
602*5f757f3fSDimitry Andric  def M85VMRSEarly : SchedWriteRes<[M85UnitVPort]> { let Latency = 2;}
603*5f757f3fSDimitry Andric  def M85VMRSLate  : SchedWriteRes<[M85UnitVPort]> { let Latency = 4; }
604*5f757f3fSDimitry Andric  def M85VMSREarly : SchedWriteRes<[M85UnitVPort]> { let Latency = 1; }
605*5f757f3fSDimitry Andric  def M85VMSRLate  : SchedWriteRes<[M85UnitVPort]> { let Latency = 3; }
606*5f757f3fSDimitry Andric}
607*5f757f3fSDimitry Andric
608*5f757f3fSDimitry Andricdef M85FPSCRFlagPred : MCSchedPredicate<
609*5f757f3fSDimitry Andric                           CheckAll<[CheckIsRegOperand<0>,
610*5f757f3fSDimitry Andric                                     CheckRegOperand<0, PC>]>>;
611*5f757f3fSDimitry Andric
612*5f757f3fSDimitry Andricdef M85VMRSFPSCR : SchedWriteVariant<[
613*5f757f3fSDimitry Andric  SchedVar<M85FPSCRFlagPred, [M85VMRSEarly]>,
614*5f757f3fSDimitry Andric  SchedVar<NoSchedPred, [M85VMRSLate]>
615*5f757f3fSDimitry Andric]>;
616*5f757f3fSDimitry Andric
617*5f757f3fSDimitry Andricdef : InstRW<[M85VMSREarly, M85Read_EX2],
618*5f757f3fSDimitry Andric               (instregex "VMSR$", "VMSR_FPSCR_NZCVQC", "VMSR_P0", "VMSR_VPR")>;
619*5f757f3fSDimitry Andricdef : InstRW<[M85VMRSEarly], (instregex "VMRS_P0", "VMRS_VPR", "FMSTAT")>;
620*5f757f3fSDimitry Andricdef : InstRW<[M85VMRSLate], (instregex "VMRS_FPSCR_NZCVQC")>;
621*5f757f3fSDimitry Andricdef : InstRW<[M85VMRSFPSCR], (instregex "VMRS$")>;
622*5f757f3fSDimitry Andric// Not matching properly
623*5f757f3fSDimitry Andric//def : InstRW<[M85VMSRLate, M85Read_EX2], (instregex "VMSR_FPCTX(NS|S)")>;
624*5f757f3fSDimitry Andric//def : InstRW<[M85VMRSLate], (instregex "VMRS_FPCTX(NS|S)")>;
625*5f757f3fSDimitry Andric
626*5f757f3fSDimitry Andric// VSEL cannot bypass in its implied $cpsr operand; model as earlier read
627*5f757f3fSDimitry Andricdef : InstRW<[M85GroupBLat1S, ReadALU, ReadALU, M85Read_ISS],
628*5f757f3fSDimitry Andric               (instregex "VSEL.*(S|H)$")>;
629*5f757f3fSDimitry Andricdef : InstRW<[M85GroupBLat1D, ReadALU, ReadALU, M85Read_ISS],
630*5f757f3fSDimitry Andric               (instregex "VSEL.*D$")>;
631*5f757f3fSDimitry Andric
632*5f757f3fSDimitry Andric// VMOV
633*5f757f3fSDimitry Andricdef : InstRW<[WriteFPMOV],
634*5f757f3fSDimitry Andric               (instregex "VMOV(H|S)$", "FCONST(H|S)")>;
635*5f757f3fSDimitry Andricdef : InstRW<[WriteFPMOV, M85Read_EX2],
636*5f757f3fSDimitry Andric               (instregex "VMOVHR$", "VMOVSR$")>;
637*5f757f3fSDimitry Andricdef : InstRW<[M85GroupABLat2S],
638*5f757f3fSDimitry Andric               (instregex "VMOVRH$", "VMOVRS$")>;
639*5f757f3fSDimitry Andricdef : InstRW<[M85WriteFPMOV64],
640*5f757f3fSDimitry Andric               (instregex "VMOVD$")>;
641*5f757f3fSDimitry Andricdef : InstRW<[M85WriteFPMOV64],
642*5f757f3fSDimitry Andric               (instregex "FCONSTD")>;
643*5f757f3fSDimitry Andricdef : InstRW<[M85WriteFPMOV64, M85Read_EX2, M85Read_EX2],
644*5f757f3fSDimitry Andric               (instregex "VMOVDRR")>;
645*5f757f3fSDimitry Andricdef : InstRW<[M85WriteFPMOV64, M85Write1, M85Read_EX2, M85Read_EX2],
646*5f757f3fSDimitry Andric               (instregex "VMOVSRR")>;
647*5f757f3fSDimitry Andricdef : InstRW<[M85GroupABLat2D, M85Write2],
648*5f757f3fSDimitry Andric               (instregex "VMOV(RRD|RRS)")>;
649*5f757f3fSDimitry Andric
650*5f757f3fSDimitry Andric// These shouldn't even exist, but Cortex-m55 defines them, so here they are.
651*5f757f3fSDimitry Andricdef : InstRW<[WriteFPMOV, M85Read_EX2],
652*5f757f3fSDimitry Andric               (instregex "VGETLNi32$")>;
653*5f757f3fSDimitry Andricdef : InstRW<[M85GroupABLat2S],
654*5f757f3fSDimitry Andric               (instregex "VSETLNi32")>;
655*5f757f3fSDimitry Andric
656*5f757f3fSDimitry Andric// Larger-latency overrides
657*5f757f3fSDimitry Andric
658*5f757f3fSDimitry Andricdef M85FPDIV16 : SchedWriteRes<[M85UnitVFPB, M85UnitVPort, M85UnitSlot0]> {
659*5f757f3fSDimitry Andric  let Latency = 8;
660*5f757f3fSDimitry Andric}
661*5f757f3fSDimitry Andricdef : InstRW<[M85OverrideVFPLat2, M85FPDIV16], (instregex "VDIVH")>;
662*5f757f3fSDimitry Andricdef : InstRW<[M85OverrideVFPLat2, WriteFPDIV32],   (instregex "VDIVS")>;
663*5f757f3fSDimitry Andricdef : InstRW<[M85OverrideVFPLat2, WriteFPDIV64],   (instregex "VDIVD")>;
664*5f757f3fSDimitry Andricdef : InstRW<[M85OverrideVFPLat2, M85FPDIV16], (instregex "VSQRTH")>;
665*5f757f3fSDimitry Andricdef : InstRW<[M85OverrideVFPLat2, WriteFPSQRT32],  (instregex "VSQRTS")>;
666*5f757f3fSDimitry Andricdef : InstRW<[M85OverrideVFPLat2, WriteFPSQRT64],  (instregex "VSQRTD")>;
667*5f757f3fSDimitry Andricdef : InstRW<[M85OverrideVFPLat3, WriteFPMUL64],   (instregex "V(MUL|NMUL)D")>;
668*5f757f3fSDimitry Andricdef : InstRW<[M85OverrideVFPLat2, WriteFPALU64],   (instregex "V(ADD|SUB)D")>;
669*5f757f3fSDimitry Andric
670*5f757f3fSDimitry Andric// Multiply-accumulate.  Chained SP timing is correct; rest need overrides
671*5f757f3fSDimitry Andric// Double-precision chained MAC should also be seen as having latency of 5,
672*5f757f3fSDimitry Andric// as stalls stall everything.
673*5f757f3fSDimitry Andric
674*5f757f3fSDimitry Andricdef : InstRW<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL],
675*5f757f3fSDimitry Andric               (instregex "VN?ML(A|S)H")>;
676*5f757f3fSDimitry Andric
677*5f757f3fSDimitry Andricdef : InstRW<[M85OverrideVFPLat5, WriteFPMAC64,
678*5f757f3fSDimitry Andric              ReadFPMUL, ReadFPMUL, ReadFPMUL],
679*5f757f3fSDimitry Andric               (instregex "VN?ML(A|S)D$")>;
680*5f757f3fSDimitry Andric
681*5f757f3fSDimitry Andric// Single-precision fused MACs look like latency 4 with advance of 2.
682*5f757f3fSDimitry Andric
683*5f757f3fSDimitry Andricdef M85ReadFPMAC2   : SchedReadAdvance<2>;
684*5f757f3fSDimitry Andric
685*5f757f3fSDimitry Andricdef : InstRW<[M85OverrideVFPLat4, WriteFPMAC32,
686*5f757f3fSDimitry Andric              M85ReadFPMAC2, ReadFPMUL, ReadFPMUL],
687*5f757f3fSDimitry Andric               (instregex "VF(N)?M(A|S)(H|S)$")>;
688*5f757f3fSDimitry Andric
689*5f757f3fSDimitry Andric// Double-precision fused MAC looks like latency 4.
690*5f757f3fSDimitry Andric
691*5f757f3fSDimitry Andricdef : InstRW<[M85OverrideVFPLat4, WriteFPMAC64,
692*5f757f3fSDimitry Andric              ReadFPMUL, ReadFPMUL, ReadFPMUL],
693*5f757f3fSDimitry Andric               (instregex "VF(N)?M(A|S)D$")>;
694*5f757f3fSDimitry Andric
695*5f757f3fSDimitry Andric// MVE beatwise instructions
696*5f757f3fSDimitry Andric// NOTE: Q-register timing for the 2nd beat is off by a cycle and needs
697*5f757f3fSDimitry Andric//       DAG overrides to correctly set latencies.
698*5f757f3fSDimitry Andric// NOTE2: MVE integer MAC->MAC accumulate latencies are set as if the
699*5f757f3fSDimitry Andric//        accumulate value arrives from an unmatching MAC instruction;
700*5f757f3fSDimitry Andric//        matching ones are handled via DAG mutation.  These are marked as
701*5f757f3fSDimitry Andric//        "limited accumulate bypass"
702*5f757f3fSDimitry Andric
703*5f757f3fSDimitry Andriclet Latency = 4, EndGroup = 1 in {
704*5f757f3fSDimitry Andric   def M85GrpALat2MveR : SchedWriteRes<[M85UnitVFPAL, M85UnitVFPAH, M85UnitVPortL, M85UnitVPortH, M85UnitSlot0]> {
705*5f757f3fSDimitry Andric     let ReleaseAtCycles = [2,2,1,1,1];
706*5f757f3fSDimitry Andric   }
707*5f757f3fSDimitry Andric   def M85GrpABLat2MveR : SchedWriteRes<[M85UnitVPortL, M85UnitVPortH, M85UnitSlot0]>;
708*5f757f3fSDimitry Andric   def M85GrpBLat2MveR : SchedWriteRes<[M85UnitVFPBL, M85UnitVFPBH, M85UnitVPortL, M85UnitVPortH, M85UnitSlot0]> {
709*5f757f3fSDimitry Andric     let ReleaseAtCycles = [2,2,1,1,1];
710*5f757f3fSDimitry Andric   }
711*5f757f3fSDimitry Andric   def M85Lat2MveR : SchedWriteRes<[]> { let NumMicroOps = 0; }
712*5f757f3fSDimitry Andric   def M85GrpBLat4Mve : SchedWriteRes<[M85UnitVFPBL, M85UnitVFPBH, M85UnitVPortL, M85UnitVPortH, M85UnitSlot0]> {
713*5f757f3fSDimitry Andric     let ReleaseAtCycles = [2,2,1,1,1];
714*5f757f3fSDimitry Andric   }
715*5f757f3fSDimitry Andric}
716*5f757f3fSDimitry Andriclet Latency = 3, EndGroup = 1 in {
717*5f757f3fSDimitry Andric   def M85GrpBLat3Mve : SchedWriteRes<[M85UnitVFPBL, M85UnitVFPBH, M85UnitVPortL, M85UnitVPortH, M85UnitSlot0]> {
718*5f757f3fSDimitry Andric     let ReleaseAtCycles = [2,2,1,1,1];
719*5f757f3fSDimitry Andric   }
720*5f757f3fSDimitry Andric   def M85GrpBLat1MveR : SchedWriteRes<[M85UnitVFPBL, M85UnitVFPBH, M85UnitVPortL, M85UnitVPortH, M85UnitSlot0]> {
721*5f757f3fSDimitry Andric     let ReleaseAtCycles = [2,2,1,1,1];
722*5f757f3fSDimitry Andric   }
723*5f757f3fSDimitry Andric   def M85Lat1MveR : SchedWriteRes<[]> { let NumMicroOps = 0; }
724*5f757f3fSDimitry Andric}
725*5f757f3fSDimitry Andriclet Latency = 2, EndGroup = 1 in {
726*5f757f3fSDimitry Andric   def M85GrpALat2Mve : SchedWriteRes<[M85UnitVFPAL, M85UnitVFPAH, M85UnitVPortL, M85UnitVPortH, M85UnitSlot0]> {
727*5f757f3fSDimitry Andric     let ReleaseAtCycles = [2,2,1,1,1];
728*5f757f3fSDimitry Andric   }
729*5f757f3fSDimitry Andric   def M85GrpABLat2Mve : SchedWriteRes<[M85UnitVPortL, M85UnitVPortH, M85UnitSlot0]>;
730*5f757f3fSDimitry Andric   def M85GrpBLat2Mve : SchedWriteRes<[M85UnitVFPBL, M85UnitVFPBH, M85UnitVPortL, M85UnitVPortH, M85UnitSlot0]> {
731*5f757f3fSDimitry Andric     let ReleaseAtCycles = [2,2,1,1,1];
732*5f757f3fSDimitry Andric   }
733*5f757f3fSDimitry Andric   def M85Lat2Mve : SchedWriteRes<[]> { let NumMicroOps = 0; }
734*5f757f3fSDimitry Andric}
735*5f757f3fSDimitry Andriclet Latency = 1, EndGroup = 1 in {
736*5f757f3fSDimitry Andric   def M85GrpALat1Mve : SchedWriteRes<[M85UnitVFPAL, M85UnitVFPAH, M85UnitVPortL, M85UnitVPortH, M85UnitSlot0]> {
737*5f757f3fSDimitry Andric     let ReleaseAtCycles = [2,2,1,1,1];
738*5f757f3fSDimitry Andric   }
739*5f757f3fSDimitry Andric   def M85GrpABLat1Mve : SchedWriteRes<[M85UnitVPortL, M85UnitVPortH, M85UnitSlot0]>;
740*5f757f3fSDimitry Andric   def M85GrpBLat1Mve : SchedWriteRes<[M85UnitVFPBL, M85UnitVFPBH, M85UnitVPortL, M85UnitVPortH, M85UnitSlot0]> {
741*5f757f3fSDimitry Andric     let ReleaseAtCycles = [2,2,1,1,1];
742*5f757f3fSDimitry Andric   }
743*5f757f3fSDimitry Andric   def M85GrpCLat1Mve : SchedWriteRes<[M85UnitVFPCL, M85UnitVFPCH, M85UnitVPortL, M85UnitVPortH, M85UnitSlot0]> {
744*5f757f3fSDimitry Andric     let ReleaseAtCycles = [2,2,1,1,1];
745*5f757f3fSDimitry Andric   }
746*5f757f3fSDimitry Andric   def M85GrpDLat1Mve : SchedWriteRes<[M85UnitVFPD, M85UnitVPortL, M85UnitVPortH, M85UnitSlot0]> {
747*5f757f3fSDimitry Andric     let ReleaseAtCycles = [2,1,1,1];
748*5f757f3fSDimitry Andric   }
749*5f757f3fSDimitry Andric}
750*5f757f3fSDimitry Andric
751*5f757f3fSDimitry Andricdef : InstRW<[M85GrpABLat1Mve, M85Read_EX1, M85Read_EX2, M85Read_EX2],
752*5f757f3fSDimitry Andric                (instregex "MVE_VMOV_q_rr")>;
753*5f757f3fSDimitry Andric
754*5f757f3fSDimitry Andricdef : InstRW<[M85GrpABLat1Mve, M85Read_EX2],
755*5f757f3fSDimitry Andric                (instregex "MVE_VMOV_to_lane_(8|16|32)")>;
756*5f757f3fSDimitry Andric
757*5f757f3fSDimitry Andricdef : InstRW<[M85GrpABLat1Mve],
758*5f757f3fSDimitry Andric                (instregex "MVE_VAND$",
759*5f757f3fSDimitry Andric                           "MVE_VBIC$", "MVE_VBICimm",
760*5f757f3fSDimitry Andric                           "MVE_VCLSs(8|16|32)",
761*5f757f3fSDimitry Andric                           "MVE_VCLZs(8|16|32)",
762*5f757f3fSDimitry Andric                           "MVE_VEOR",
763*5f757f3fSDimitry Andric                           "MVE_VMOVimmf32", "MVE_VMOVimmi(8|16|32|64)",
764*5f757f3fSDimitry Andric                           "MVE_VMVN$", "MVE_VMVNimmi(16|32)",
765*5f757f3fSDimitry Andric                           "MVE_VORN$",
766*5f757f3fSDimitry Andric                           "MVE_VORR$", "MVE_VORRimm", "MQPRCopy",
767*5f757f3fSDimitry Andric                           "MVE_VPSEL",
768*5f757f3fSDimitry Andric                           "MVE_VREV(16|32|64)_(8|16|32)"
769*5f757f3fSDimitry Andric                           )>;
770*5f757f3fSDimitry Andric
771*5f757f3fSDimitry Andricdef : InstRW<[M85GrpABLat2MveR, M85Lat2MveR],
772*5f757f3fSDimitry Andric                (instregex "MVE_VMOV_rr_q")>;
773*5f757f3fSDimitry Andric
774*5f757f3fSDimitry Andricdef : InstRW<[M85GrpABLat2MveR],
775*5f757f3fSDimitry Andric                (instregex "MVE_VMOV_from_lane_(32|u8|s8|u16|s16)")>;
776*5f757f3fSDimitry Andric
777*5f757f3fSDimitry Andricdef : InstRW<[M85GrpALat1Mve, M85Lat1MveR,
778*5f757f3fSDimitry Andric              M85Read_EX1, M85Read_EX1, M85Read_EX2],
779*5f757f3fSDimitry Andric                (instregex "MVE_VADC$")>;
780*5f757f3fSDimitry Andric
781*5f757f3fSDimitry Andricdef : InstRW<[M85GrpALat1Mve, M85Lat1MveR],
782*5f757f3fSDimitry Andric                (instregex "MVE_VADCI")>;
783*5f757f3fSDimitry Andric
784*5f757f3fSDimitry Andricdef : InstRW<[M85GrpALat1Mve, M85Read_EX1, M85Read_EX2],
785*5f757f3fSDimitry Andric                (instregex "MVE_VADD_qr_i(8|16|32)",
786*5f757f3fSDimitry Andric                           "MVE_VBRSR(16|32|8)",
787*5f757f3fSDimitry Andric                           "MVE_VHADD_qr_[su](8|16|32)",
788*5f757f3fSDimitry Andric                           "MVE_VHSUB_qr_[su](8|16|32)",
789*5f757f3fSDimitry Andric                           "MVE_VQADD_qr_[su](8|16|32)",
790*5f757f3fSDimitry Andric                           "MVE_VQSUB_qr_[su](8|16|32)",
791*5f757f3fSDimitry Andric                           "MVE_VSHL_qr[su](8|16|32)",
792*5f757f3fSDimitry Andric                           "MVE_VSUB_qr_i(8|16|32)"
793*5f757f3fSDimitry Andric                )>;
794*5f757f3fSDimitry Andric
795*5f757f3fSDimitry Andricdef : InstRW<[M85GrpALat1Mve],
796*5f757f3fSDimitry Andric                (instregex "MVE_VABD(s|u)(8|16|32)",
797*5f757f3fSDimitry Andric                           "MVE_VABS(s|u)(8|16|32)",
798*5f757f3fSDimitry Andric                           "MVE_V(MAX|MIN)A?[us](8|16|32)",
799*5f757f3fSDimitry Andric                           "MVE_VADDi(8|16|32)",
800*5f757f3fSDimitry Andric                           "MVE_VCADDi(8|16|32)",
801*5f757f3fSDimitry Andric                           "MVE_VHCADDs(8|16|32)",
802*5f757f3fSDimitry Andric                           "MVE_VHSUB[su](8|16|32)",
803*5f757f3fSDimitry Andric                           "MVE_VMOVL[su](8|16)[tb]h",
804*5f757f3fSDimitry Andric                           "MVE_VMOVNi(16|32)[tb]h",
805*5f757f3fSDimitry Andric                           "MVE_VMULL[BT]?[p](8|16|32)(bh|th)?",
806*5f757f3fSDimitry Andric                           "MVE_VNEGs(8|16|32)",
807*5f757f3fSDimitry Andric                           "MVE_VQABSs(8|16|32)",
808*5f757f3fSDimitry Andric                           "MVE_VQADD[su](8|16|32)",
809*5f757f3fSDimitry Andric                           "MVE_VQNEGs(8|16|32)",
810*5f757f3fSDimitry Andric                           "MVE_VQSUB[su](8|16|32)",
811*5f757f3fSDimitry Andric                           "MVE_VR?HADD[su](8|16|32)",
812*5f757f3fSDimitry Andric                           "MVE_VSBC$", "MVE_VSBCI",
813*5f757f3fSDimitry Andric                           "MVE_VSHL_by_vec[su](8|16|32)",
814*5f757f3fSDimitry Andric                           "MVE_VSHL_immi(8|16|32)",
815*5f757f3fSDimitry Andric                           "MVE_VSHLL_imm[su](8|16)[bt]h",
816*5f757f3fSDimitry Andric                           "MVE_VSHLL_lw[su](8|16)[bt]h",
817*5f757f3fSDimitry Andric                           "MVE_VSHRNi(16|32)[bt]h",
818*5f757f3fSDimitry Andric                           "MVE_VSHR_imm[su](8|16|32)",
819*5f757f3fSDimitry Andric                           "MVE_VSLIimm[su]?(8|16|32)",
820*5f757f3fSDimitry Andric                           "MVE_VSRIimm[su]?(8|16|32)",
821*5f757f3fSDimitry Andric                           "MVE_VSUBi(8|16|32)"
822*5f757f3fSDimitry Andric                 )>;
823*5f757f3fSDimitry Andric
824*5f757f3fSDimitry Andricdef : InstRW<[M85GrpALat2Mve, M85Lat2MveR, M85Read_EX2, M85Read_EX2],
825*5f757f3fSDimitry Andric                (instregex "MVE_V(D|I)WDUPu(8|16|32)")>;
826*5f757f3fSDimitry Andric
827*5f757f3fSDimitry Andricdef : InstRW<[M85GrpALat2Mve, M85Lat2MveR, M85Read_EX2],
828*5f757f3fSDimitry Andric                (instregex "MVE_V(D|I)DUPu(8|16|32)")>;
829*5f757f3fSDimitry Andric
830*5f757f3fSDimitry Andricdef : InstRW<[M85GrpALat2Mve, M85Read_EX1, M85Read_EX2],
831*5f757f3fSDimitry Andric                (instregex "MVE_V(Q|R|QR)SHL_qr[su](8|16|32)",
832*5f757f3fSDimitry Andric                           "MVE_VADD_qr_f(16|32)",
833*5f757f3fSDimitry Andric                           "MVE_VSUB_qr_f(16|32)"
834*5f757f3fSDimitry Andric                )>;
835*5f757f3fSDimitry Andric
836*5f757f3fSDimitry Andricdef : InstRW<[M85GrpALat1Mve, M85Read_EX2],
837*5f757f3fSDimitry Andric                (instregex "MVE_VDUP(8|16|32)")>;
838*5f757f3fSDimitry Andric
839*5f757f3fSDimitry Andricdef : InstRW<[M85GrpBLat1Mve],
840*5f757f3fSDimitry Andric                (instregex "MVE_VABSf(16|32)",
841*5f757f3fSDimitry Andric                           "MVE_V(MAX|MIN)NMA?f(16|32)",
842*5f757f3fSDimitry Andric                           "MVE_VNEGf(16|32)"
843*5f757f3fSDimitry Andric                )>;
844*5f757f3fSDimitry Andric
845*5f757f3fSDimitry Andricdef : InstRW<[M85GrpBLat2MveR, M85Lat2MveR, M85Read_EX3, M85Read_EX3],
846*5f757f3fSDimitry Andric                (instregex "MVE_VADDLV[us]32acc")>;
847*5f757f3fSDimitry Andric
848*5f757f3fSDimitry Andricdef : InstRW<[M85GrpBLat2MveR, M85Lat2MveR],
849*5f757f3fSDimitry Andric                (instregex "MVE_VADDLV[us]32no_acc")>;
850*5f757f3fSDimitry Andric
851*5f757f3fSDimitry Andricdef : InstRW<[M85GrpBLat2MveR, M85Read_EX3],
852*5f757f3fSDimitry Andric                (instregex "MVE_VADDV[us](8|16|32)acc"
853*5f757f3fSDimitry Andric                )>;
854*5f757f3fSDimitry Andric
855*5f757f3fSDimitry Andricdef : InstRW<[M85GrpALat2MveR, M85Read_EX3],
856*5f757f3fSDimitry Andric                (instregex "MVE_V(MAX|MIN)A?V[us](8|16|32)",
857*5f757f3fSDimitry Andric                           "MVE_VABAV(s|u)(8|16|32)"
858*5f757f3fSDimitry Andric                )>;
859*5f757f3fSDimitry Andric
860*5f757f3fSDimitry Andricdef : InstRW<[M85GrpALat2MveR],
861*5f757f3fSDimitry Andric                (instregex "MVE_VADDV[us](8|16|32)no_acc")>;
862*5f757f3fSDimitry Andric
863*5f757f3fSDimitry Andricdef : InstRW<[M85GrpALat2Mve],
864*5f757f3fSDimitry Andric                (instregex "MVE_V(Q|R|QR)SHL_by_vec[su](8|16|32)",
865*5f757f3fSDimitry Andric                           "MVE_VABDf(16|32)",
866*5f757f3fSDimitry Andric                           "MVE_VADDf(16|32)",
867*5f757f3fSDimitry Andric                           "MVE_VCADDf(16|32)",
868*5f757f3fSDimitry Andric                           "MVE_VQMOVU?N[su](8|16|32)[tb]h",
869*5f757f3fSDimitry Andric                           "MVE_VQR?SHL(U_)?imm[su](8|16|32)",
870*5f757f3fSDimitry Andric                           "MVE_VQR?SHRN[bt]h[su](16|32)",
871*5f757f3fSDimitry Andric                           "MVE_VQR?SHRUNs(16|32)[bt]h",
872*5f757f3fSDimitry Andric                           "MVE_VRSHR_imm[su](8|16|32)",
873*5f757f3fSDimitry Andric                           "MVE_VRSHRNi(16|32)[bt]h",
874*5f757f3fSDimitry Andric                           "MVE_VSUBf(16|32)"
875*5f757f3fSDimitry Andric                 )>;
876*5f757f3fSDimitry Andric
877*5f757f3fSDimitry Andricdef : InstRW<[M85GrpBLat2MveR, M85Read_EX2],
878*5f757f3fSDimitry Andric                (instregex "MVE_V(MAX|MIN)NMA?Vf(16|32)")>;
879*5f757f3fSDimitry Andric
880*5f757f3fSDimitry Andricdef : InstRW<[M85GrpBLat2Mve, M85Read_EX1, M85Read_EX2],
881*5f757f3fSDimitry Andric                (instregex "MVE_VMUL_qr_i(8|16|32)")>;
882*5f757f3fSDimitry Andric
883*5f757f3fSDimitry Andricdef : InstRW<[M85GrpBLat2Mve, M85Read_EX1, M85Read_EX2],
884*5f757f3fSDimitry Andric                (instregex "MVE_VQDMULL_qr_s(16|32)[tb]h")>;
885*5f757f3fSDimitry Andric
886*5f757f3fSDimitry Andricdef : InstRW<[M85GrpBLat2Mve, M85Read_EX1, M85Read_EX2],
887*5f757f3fSDimitry Andric                (instregex "MVE_VQR?DMULH_qr_s(8|16|32)")>;
888*5f757f3fSDimitry Andric
889*5f757f3fSDimitry Andricdef : InstRW<[M85GrpBLat2Mve, M85Read_EX1, M85Read_EX1, M85Read_EX3],
890*5f757f3fSDimitry Andric                // limited accumulate bypass
891*5f757f3fSDimitry Andric                (instregex "MVE_VMLAS?_qr_i(8|16|32)")>;
892*5f757f3fSDimitry Andric
893*5f757f3fSDimitry Andricdef : InstRW<[M85GrpBLat2Mve, M85Read_EX1, M85Read_EX1, M85Read_EX2],
894*5f757f3fSDimitry Andric                // limited accumulate bypass
895*5f757f3fSDimitry Andric                (instregex "MVE_VQR?DMLAS?H_qrs(8|16|32)")>;
896*5f757f3fSDimitry Andric
897*5f757f3fSDimitry Andricdef : InstRW<[M85GrpBLat2Mve],
898*5f757f3fSDimitry Andric                // limited accumulate bypass
899*5f757f3fSDimitry Andric                (instregex "MVE_VQR?DML[AS]DHX?s(8|16|32)")>;
900*5f757f3fSDimitry Andric
901*5f757f3fSDimitry Andricdef : InstRW<[M85GrpBLat2MveR, M85Lat2MveR, M85Read_EX3, M85Read_EX3],
902*5f757f3fSDimitry Andric                (instregex "MVE_VR?ML[AS]LDAVH?ax?[su](8|16|32)")>;
903*5f757f3fSDimitry Andric
904*5f757f3fSDimitry Andricdef : InstRW<[M85GrpBLat2MveR, M85Lat2MveR],
905*5f757f3fSDimitry Andric                (instregex "MVE_VR?ML[AS]LDAVH?x?[su](8|16|32)")>;
906*5f757f3fSDimitry Andric
907*5f757f3fSDimitry Andricdef : InstRW<[M85GrpBLat2MveR, M85Read_EX3],
908*5f757f3fSDimitry Andric                (instregex "MVE_VML[AS]DAVax?[su](8|16|32)")>;
909*5f757f3fSDimitry Andric
910*5f757f3fSDimitry Andricdef : InstRW<[M85GrpBLat2MveR],
911*5f757f3fSDimitry Andric                (instregex "MVE_VML[AS]DAVx?[su](8|16|32)")>;
912*5f757f3fSDimitry Andric
913*5f757f3fSDimitry Andricdef : InstRW<[M85GrpBLat2Mve],
914*5f757f3fSDimitry Andric                (instregex "MVE_VCVTf16(u|s)16", "MVE_VCVTf32(u|s)32",
915*5f757f3fSDimitry Andric                           "MVE_VCVT(u|s)16f16", "MVE_VCVT(u|s)32f32",
916*5f757f3fSDimitry Andric                           "MVE_VCVTf16f32", "MVE_VCVTf32f16",
917*5f757f3fSDimitry Andric                           "MVE_VMULL[BT]?[su](8|16|32)(bh|th)?",
918*5f757f3fSDimitry Andric                           "MVE_VMUL(t1)*i(8|16|32)",
919*5f757f3fSDimitry Andric                           "MVE_VQDMULLs(16|32)[tb]h",
920*5f757f3fSDimitry Andric                           "MVE_VQR?DMULHi(8|16|32)",
921*5f757f3fSDimitry Andric                           "MVE_VR?MULH[su](8|16|32)",
922*5f757f3fSDimitry Andric                           "MVE_VRINTf(16|32)"
923*5f757f3fSDimitry Andric                )>;
924*5f757f3fSDimitry Andric
925*5f757f3fSDimitry Andricdef : InstRW<[M85GrpBLat3Mve, M85Read_EX1, M85Read_EX2],
926*5f757f3fSDimitry Andric                (instregex "MVE_VMUL_qr_f(16|32)")>;
927*5f757f3fSDimitry Andric
928*5f757f3fSDimitry Andricdef : InstRW<[M85GrpBLat3Mve],
929*5f757f3fSDimitry Andric                (instregex "MVE_VCMULf(16|32)",
930*5f757f3fSDimitry Andric                           "MVE_VMULf(16|32)"
931*5f757f3fSDimitry Andric                )>;
932*5f757f3fSDimitry Andric
933*5f757f3fSDimitry Andricdef : InstRW<[M85GrpBLat4Mve, M85Read_EX3, M85Read_EX1, M85Read_EX2],
934*5f757f3fSDimitry Andric                (instregex "MVE_VFMA_qr_Sf(16|32)", // VFMAS
935*5f757f3fSDimitry Andric                           "MVE_VFMA_qr_f(16|32)" // VFMA
936*5f757f3fSDimitry Andric                )>;
937*5f757f3fSDimitry Andric
938*5f757f3fSDimitry Andricdef : InstRW<[M85GrpBLat4Mve, M85Read_EX3],
939*5f757f3fSDimitry Andric                (instregex "MVE_VCMLAf(16|32)")>;
940*5f757f3fSDimitry Andric
941*5f757f3fSDimitry Andricdef : InstRW<[M85GrpBLat4Mve, M85Read_EX3],
942*5f757f3fSDimitry Andric                (instregex "MVE_VFM(A|S)f(16|32)")>;
943*5f757f3fSDimitry Andric
944*5f757f3fSDimitry Andricdef : InstRW<[M85GrpCLat1Mve, M85Read_EX1, M85Read_EX1, M85Read_EX2],
945*5f757f3fSDimitry Andric                (instregex "MVE_VPTv(4|8)f(16|32)r")>;
946*5f757f3fSDimitry Andric
947*5f757f3fSDimitry Andricdef : InstRW<[M85GrpCLat1Mve, M85Read_EX1, M85Read_EX1, M85Read_EX2],
948*5f757f3fSDimitry Andric                (instregex "MVE_VPTv(4|8|16)(i|s|u)(8|16|32)r")>;
949*5f757f3fSDimitry Andric
950*5f757f3fSDimitry Andricdef : InstRW<[M85GrpCLat1Mve, M85Read_EX1, M85Read_EX2],
951*5f757f3fSDimitry Andric                (instregex "MVE_VCMP[isu](8|16|32)r$", "MVE_VCMPf(16|32)r$")>;
952*5f757f3fSDimitry Andric
953*5f757f3fSDimitry Andricdef : InstRW<[M85GrpDLat1Mve, M85Read_EX2],
954*5f757f3fSDimitry Andric                (instregex "MVE_VCTP(8|16|32|64)")>;
955*5f757f3fSDimitry Andric
956*5f757f3fSDimitry Andricdef : InstRW<[M85GrpCLat1Mve],
957*5f757f3fSDimitry Andric                (instregex "MVE_VCMPf(16|32)$", "MVE_VCMP[isu](8|16|32)$",
958*5f757f3fSDimitry Andric                           "MVE_VPTv(4|8)f(16|32)$",
959*5f757f3fSDimitry Andric                           "MVE_VPTv(4|8|16)(i|s|u)(8|16|32)$"
960*5f757f3fSDimitry Andric                )>;
961*5f757f3fSDimitry Andric
962*5f757f3fSDimitry Andricdef : InstRW<[M85GrpDLat1Mve],
963*5f757f3fSDimitry Andric                (instregex "MVE_VPNOT",
964*5f757f3fSDimitry Andric                           "MVE_VPST"
965*5f757f3fSDimitry Andric                )>;
966*5f757f3fSDimitry Andric
967*5f757f3fSDimitry Andricdef : InstRW<[M85Lat2MveR, M85GrpALat2Mve, M85Read_EX1, M85Read_EX2],
968*5f757f3fSDimitry Andric                (instregex "MVE_VSHLC")>;
969*5f757f3fSDimitry Andric
970*5f757f3fSDimitry Andric// VFP instructions
971*5f757f3fSDimitry Andric
972*5f757f3fSDimitry Andricdef : WriteRes<WriteVLD1, []>;
973*5f757f3fSDimitry Andricdef : WriteRes<WriteVLD2, []>;
974*5f757f3fSDimitry Andricdef : WriteRes<WriteVLD3, []>;
975*5f757f3fSDimitry Andricdef : WriteRes<WriteVLD4, []>;
976*5f757f3fSDimitry Andricdef : WriteRes<WriteVST1, []>;
977*5f757f3fSDimitry Andricdef : WriteRes<WriteVST2, []>;
978*5f757f3fSDimitry Andricdef : WriteRes<WriteVST3, []>;
979*5f757f3fSDimitry Andricdef : WriteRes<WriteVST4, []>;
980*5f757f3fSDimitry Andric
981*5f757f3fSDimitry Andric}  // SchedModel = CortexCortexM85Model
982