xref: /freebsd-src/contrib/llvm-project/llvm/lib/Target/ARM/ARMScheduleM7.td (revision fe6060f10f634930ff71b7c50291ddc610da2475)
1e8d8bef9SDimitry Andric//=- ARMScheduleM7.td - ARM Cortex-M7 Scheduling Definitions -*- tablegen -*-=//
2e8d8bef9SDimitry Andric//
3e8d8bef9SDimitry Andric// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4e8d8bef9SDimitry Andric// See https://llvm.org/LICENSE.txt for license information.
5e8d8bef9SDimitry Andric// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6e8d8bef9SDimitry Andric//
7e8d8bef9SDimitry Andric//===----------------------------------------------------------------------===//
8e8d8bef9SDimitry Andric//
9e8d8bef9SDimitry Andric// This file defines the SchedRead/Write data for the ARM Cortex-M7 processor.
10e8d8bef9SDimitry Andric//
11e8d8bef9SDimitry Andric//===----------------------------------------------------------------------===//
12e8d8bef9SDimitry Andric
13e8d8bef9SDimitry Andricdef CortexM7Model : SchedMachineModel {
14e8d8bef9SDimitry Andric  let IssueWidth = 2;        // Dual issue for most instructions.
15e8d8bef9SDimitry Andric  let MicroOpBufferSize = 0; // The Cortex-M7 is in-order.
16e8d8bef9SDimitry Andric  let LoadLatency = 2;       // Best case for load-use case.
17e8d8bef9SDimitry Andric  let MispredictPenalty = 4; // Mispredict cost for forward branches is 6,
18e8d8bef9SDimitry Andric                             // but 4 works better
19e8d8bef9SDimitry Andric  let CompleteModel = 0;
20e8d8bef9SDimitry Andric}
21e8d8bef9SDimitry Andric
22*fe6060f1SDimitry Andriclet SchedModel = CortexM7Model in {
23*fe6060f1SDimitry Andric
24e8d8bef9SDimitry Andric//===--------------------------------------------------------------------===//
25e8d8bef9SDimitry Andric// The Cortex-M7 has two ALU, two LOAD, a STORE, a MAC, a BRANCH and a VFP
26e8d8bef9SDimitry Andric// pipe. The stages relevant to scheduling are as follows:
27e8d8bef9SDimitry Andric//
28e8d8bef9SDimitry Andric//   EX1: address generation  shifts
29e8d8bef9SDimitry Andric//   EX2: fast load data      ALUs                  FP operation
30e8d8bef9SDimitry Andric//   EX3: slow load data      integer writeback     FP operation
31e8d8bef9SDimitry Andric//   EX4: store data                                FP writeback
32e8d8bef9SDimitry Andric//
33e8d8bef9SDimitry Andric// There are shifters in both EX1 and EX2, and some instructions can be
34e8d8bef9SDimitry Andric// flexibly allocated between them.  EX2 is used as the "zero" point
35e8d8bef9SDimitry Andric// for scheduling, so simple ALU operations executing in EX2 will have
36e8d8bef9SDimitry Andric// ReadAdvance<0> (the default) for their source operands and Latency = 1.
37e8d8bef9SDimitry Andric
38*fe6060f1SDimitry Andricdef M7UnitLoadL  : ProcResource<1> { let BufferSize = 0; }
39*fe6060f1SDimitry Andricdef M7UnitLoadH  : ProcResource<1> { let BufferSize = 0; }
40*fe6060f1SDimitry Andricdef M7UnitLoad   : ProcResGroup<[M7UnitLoadL,M7UnitLoadH]> { let BufferSize = 0; }
41e8d8bef9SDimitry Andricdef M7UnitStore  : ProcResource<1> { let BufferSize = 0; }
42e8d8bef9SDimitry Andricdef M7UnitALU    : ProcResource<2>;
43e8d8bef9SDimitry Andricdef M7UnitShift1 : ProcResource<1> { let BufferSize = 0; }
44e8d8bef9SDimitry Andricdef M7UnitShift2 : ProcResource<1> { let BufferSize = 0; }
45e8d8bef9SDimitry Andricdef M7UnitMAC    : ProcResource<1> { let BufferSize = 0; }
46e8d8bef9SDimitry Andricdef M7UnitBranch : ProcResource<1> { let BufferSize = 0; }
47e8d8bef9SDimitry Andricdef M7UnitVFP    : ProcResource<1> { let BufferSize = 0; }
48*fe6060f1SDimitry Andricdef M7UnitVPortL : ProcResource<1> { let BufferSize = 0; }
49*fe6060f1SDimitry Andricdef M7UnitVPortH : ProcResource<1> { let BufferSize = 0; }
50*fe6060f1SDimitry Andricdef M7UnitVPort  : ProcResGroup<[M7UnitVPortL,M7UnitVPortH]> { let BufferSize = 0; }
51e8d8bef9SDimitry Andricdef M7UnitSIMD   : ProcResource<1> { let BufferSize = 0; }
52e8d8bef9SDimitry Andric
53e8d8bef9SDimitry Andric//===---------------------------------------------------------------------===//
54e8d8bef9SDimitry Andric// Subtarget-specific SchedWrite types with map ProcResources and set latency.
55e8d8bef9SDimitry Andric
56e8d8bef9SDimitry Andricdef : WriteRes<WriteALU, [M7UnitALU]> { let Latency = 1; }
57e8d8bef9SDimitry Andric
58e8d8bef9SDimitry Andric// Basic ALU with shifts.
59e8d8bef9SDimitry Andriclet Latency = 1 in {
60e8d8bef9SDimitry Andric  def : WriteRes<WriteALUsi,  [M7UnitALU, M7UnitShift1]>;
61e8d8bef9SDimitry Andric  def : WriteRes<WriteALUsr,  [M7UnitALU, M7UnitShift1]>;
62e8d8bef9SDimitry Andric  def : WriteRes<WriteALUSsr, [M7UnitALU, M7UnitShift1]>;
63e8d8bef9SDimitry Andric}
64e8d8bef9SDimitry Andric
65e8d8bef9SDimitry Andric// Compares.
66e8d8bef9SDimitry Andricdef : WriteRes<WriteCMP,   [M7UnitALU]> { let Latency = 1; }
67e8d8bef9SDimitry Andricdef : WriteRes<WriteCMPsi, [M7UnitALU, M7UnitShift1]> { let Latency = 2; }
68e8d8bef9SDimitry Andricdef : WriteRes<WriteCMPsr, [M7UnitALU, M7UnitShift1]> { let Latency = 2; }
69e8d8bef9SDimitry Andric
70e8d8bef9SDimitry Andric// Multiplies.
71e8d8bef9SDimitry Andriclet Latency = 2 in {
72e8d8bef9SDimitry Andric  def : WriteRes<WriteMUL16,   [M7UnitMAC]>;
73e8d8bef9SDimitry Andric  def : WriteRes<WriteMUL32,   [M7UnitMAC]>;
74e8d8bef9SDimitry Andric  def : WriteRes<WriteMUL64Lo, [M7UnitMAC]>;
75e8d8bef9SDimitry Andric  def : WriteRes<WriteMUL64Hi, []> { let NumMicroOps = 0; }
76e8d8bef9SDimitry Andric}
77e8d8bef9SDimitry Andric
78e8d8bef9SDimitry Andric// Multiply-accumulates.
79e8d8bef9SDimitry Andriclet Latency = 2 in {
80e8d8bef9SDimitry Andric  def : WriteRes<WriteMAC16,   [M7UnitMAC]>;
81e8d8bef9SDimitry Andric  def : WriteRes<WriteMAC32,   [M7UnitMAC]>;
82e8d8bef9SDimitry Andric  def : WriteRes<WriteMAC64Lo, [M7UnitMAC]> { let Latency = 2; }
83e8d8bef9SDimitry Andric  def : WriteRes<WriteMAC64Hi, []> { let NumMicroOps = 0; }
84e8d8bef9SDimitry Andric}
85e8d8bef9SDimitry Andric
86e8d8bef9SDimitry Andric// Divisions.
87e8d8bef9SDimitry Andric// These cannot be dual-issued with any instructions.
88e8d8bef9SDimitry Andricdef : WriteRes<WriteDIV, [M7UnitALU]> {
89e8d8bef9SDimitry Andric  let Latency = 7;
90e8d8bef9SDimitry Andric  let SingleIssue = 1;
91e8d8bef9SDimitry Andric}
92e8d8bef9SDimitry Andric
93e8d8bef9SDimitry Andric// Loads/Stores.
94e8d8bef9SDimitry Andricdef : WriteRes<WriteLd,    [M7UnitLoad]> { let Latency = 1; }
95e8d8bef9SDimitry Andricdef : WriteRes<WritePreLd, [M7UnitLoad]> { let Latency = 2; }
96e8d8bef9SDimitry Andricdef : WriteRes<WriteST,    [M7UnitStore]> { let Latency = 2; }
97e8d8bef9SDimitry Andric
98e8d8bef9SDimitry Andric// Branches.
99e8d8bef9SDimitry Andricdef : WriteRes<WriteBr,    [M7UnitBranch]> { let Latency = 2; }
100e8d8bef9SDimitry Andricdef : WriteRes<WriteBrL,   [M7UnitBranch]> { let Latency = 2; }
101e8d8bef9SDimitry Andricdef : WriteRes<WriteBrTbl, [M7UnitBranch]> { let Latency = 2; }
102e8d8bef9SDimitry Andric
103e8d8bef9SDimitry Andric// Noop.
104e8d8bef9SDimitry Andricdef : WriteRes<WriteNoop, []> { let Latency = 0; }
105e8d8bef9SDimitry Andric
106e8d8bef9SDimitry Andric//===---------------------------------------------------------------------===//
107e8d8bef9SDimitry Andric// Sched definitions for floating-point instructions
108e8d8bef9SDimitry Andric//
109e8d8bef9SDimitry Andric// Floating point conversions.
110e8d8bef9SDimitry Andricdef : WriteRes<WriteFPCVT, [M7UnitVFP, M7UnitVPort]> { let Latency = 3; }
111e8d8bef9SDimitry Andricdef : WriteRes<WriteFPMOV, [M7UnitVPort]>            { let Latency = 3; }
112*fe6060f1SDimitry Andricdef M7WriteFPMOV64 : SchedWriteRes<[M7UnitVPortL, M7UnitVPortH]> {
113*fe6060f1SDimitry Andric  let Latency = 3;
114*fe6060f1SDimitry Andric}
115e8d8bef9SDimitry Andric
116e8d8bef9SDimitry Andric// The FP pipeline has a latency of 3 cycles.
117e8d8bef9SDimitry Andric// ALU operations (32/64-bit).  These go down the FP pipeline.
118e8d8bef9SDimitry Andricdef : WriteRes<WriteFPALU32, [M7UnitVFP, M7UnitVPort]>  { let Latency = 3; }
119*fe6060f1SDimitry Andricdef : WriteRes<WriteFPALU64, [M7UnitVFP, M7UnitVPortL, M7UnitVPortH]> {
120e8d8bef9SDimitry Andric  let Latency = 4;
121e8d8bef9SDimitry Andric  let BeginGroup = 1;
122e8d8bef9SDimitry Andric}
123e8d8bef9SDimitry Andric
124e8d8bef9SDimitry Andric// Multiplication
125e8d8bef9SDimitry Andricdef : WriteRes<WriteFPMUL32, [M7UnitVFP, M7UnitVPort]> { let Latency = 3; }
126*fe6060f1SDimitry Andricdef : WriteRes<WriteFPMUL64, [M7UnitVFP, M7UnitVPortL, M7UnitVPortH]> {
127e8d8bef9SDimitry Andric  let Latency = 7;
128e8d8bef9SDimitry Andric  let BeginGroup = 1;
129e8d8bef9SDimitry Andric}
130e8d8bef9SDimitry Andric
131e8d8bef9SDimitry Andric// Multiply-accumulate.  FPMAC goes down the FP Pipeline.
132e8d8bef9SDimitry Andricdef : WriteRes<WriteFPMAC32, [M7UnitVFP, M7UnitVPort]> { let Latency = 6; }
133*fe6060f1SDimitry Andricdef : WriteRes<WriteFPMAC64, [M7UnitVFP, M7UnitVPortL, M7UnitVPortH]> {
134e8d8bef9SDimitry Andric  let Latency = 11;
135e8d8bef9SDimitry Andric  let BeginGroup = 1;
136e8d8bef9SDimitry Andric}
137e8d8bef9SDimitry Andric
138e8d8bef9SDimitry Andric// Division.   Effective scheduling latency is 3, though real latency is larger
139e8d8bef9SDimitry Andricdef : WriteRes<WriteFPDIV32, [M7UnitVFP, M7UnitVPort]>  { let Latency = 16; }
140*fe6060f1SDimitry Andricdef : WriteRes<WriteFPDIV64, [M7UnitVFP, M7UnitVPortL, M7UnitVPortH]> {
141e8d8bef9SDimitry Andric  let Latency = 30;
142e8d8bef9SDimitry Andric  let BeginGroup = 1;
143e8d8bef9SDimitry Andric}
144e8d8bef9SDimitry Andric
145e8d8bef9SDimitry Andric// Square-root.  Effective scheduling latency is 3; real latency is larger
146e8d8bef9SDimitry Andricdef : WriteRes<WriteFPSQRT32, [M7UnitVFP, M7UnitVPort]> { let Latency = 16; }
147*fe6060f1SDimitry Andricdef : WriteRes<WriteFPSQRT64, [M7UnitVFP, M7UnitVPortL, M7UnitVPortH]> {
148e8d8bef9SDimitry Andric  let Latency = 30;
149e8d8bef9SDimitry Andric  let BeginGroup = 1;
150e8d8bef9SDimitry Andric}
151e8d8bef9SDimitry Andric
152e8d8bef9SDimitry Andricdef M7WriteShift2   : SchedWriteRes<[M7UnitALU, M7UnitShift2]> {}
153e8d8bef9SDimitry Andric
154e8d8bef9SDimitry Andric// Not used for M7, but needing definitions anyway
155e8d8bef9SDimitry Andricdef : WriteRes<WriteVLD1, []>;
156e8d8bef9SDimitry Andricdef : WriteRes<WriteVLD2, []>;
157e8d8bef9SDimitry Andricdef : WriteRes<WriteVLD3, []>;
158e8d8bef9SDimitry Andricdef : WriteRes<WriteVLD4, []>;
159e8d8bef9SDimitry Andricdef : WriteRes<WriteVST1, []>;
160e8d8bef9SDimitry Andricdef : WriteRes<WriteVST2, []>;
161e8d8bef9SDimitry Andricdef : WriteRes<WriteVST3, []>;
162e8d8bef9SDimitry Andricdef : WriteRes<WriteVST4, []>;
163e8d8bef9SDimitry Andric
164e8d8bef9SDimitry Andricdef M7SingleIssue : SchedWriteRes<[]> {
165e8d8bef9SDimitry Andric  let SingleIssue = 1;
166e8d8bef9SDimitry Andric  let NumMicroOps = 0;
167e8d8bef9SDimitry Andric}
168e8d8bef9SDimitry Andricdef M7Slot0Only   : SchedWriteRes<[]> {
169e8d8bef9SDimitry Andric  let BeginGroup = 1;
170e8d8bef9SDimitry Andric  let NumMicroOps = 0;
171e8d8bef9SDimitry Andric}
172e8d8bef9SDimitry Andric
173e8d8bef9SDimitry Andric// What pipeline stage operands need to be ready for depending on
174e8d8bef9SDimitry Andric// where they come from.
175e8d8bef9SDimitry Andricdef : ReadAdvance<ReadALUsr, 0>;
176e8d8bef9SDimitry Andricdef : ReadAdvance<ReadMUL, 0>;
177e8d8bef9SDimitry Andricdef : ReadAdvance<ReadMAC, 1>;
178e8d8bef9SDimitry Andricdef : ReadAdvance<ReadALU, 0>;
179e8d8bef9SDimitry Andricdef : ReadAdvance<ReadFPMUL, 0>;
180e8d8bef9SDimitry Andricdef : ReadAdvance<ReadFPMAC, 3>;
181e8d8bef9SDimitry Andricdef M7Read_ISS : SchedReadAdvance<-1>;     // operands needed at EX1
182e8d8bef9SDimitry Andricdef M7Read_EX2   : SchedReadAdvance<1>;    // operands needed at EX3
183e8d8bef9SDimitry Andricdef M7Read_EX3   : SchedReadAdvance<2>;    // operands needed at EX4
184e8d8bef9SDimitry Andric
185e8d8bef9SDimitry Andric// Non general purpose instructions may not be dual issued. These
186e8d8bef9SDimitry Andric// use both issue units.
187e8d8bef9SDimitry Andricdef M7NonGeneralPurpose : SchedWriteRes<[]> {
188e8d8bef9SDimitry Andric  // Assume that these will go down the main ALU pipeline.
189e8d8bef9SDimitry Andric  // In reality, many look likely to stall the whole pipeline.
190e8d8bef9SDimitry Andric  let Latency = 3;
191e8d8bef9SDimitry Andric  let SingleIssue = 1;
192e8d8bef9SDimitry Andric}
193e8d8bef9SDimitry Andric
194e8d8bef9SDimitry Andric// List the non general purpose instructions.
195e8d8bef9SDimitry Andricdef : InstRW<[M7NonGeneralPurpose], (instregex "t2MRS", "tSVC", "tBKPT",
196e8d8bef9SDimitry Andric                                     "t2MSR", "t2DMB", "t2DSB", "t2ISB",
197e8d8bef9SDimitry Andric                                     "t2HVC", "t2SMC", "t2UDF", "ERET",
198e8d8bef9SDimitry Andric                                     "tHINT", "t2HINT", "t2CLREX", "BUNDLE")>;
199e8d8bef9SDimitry Andric
200e8d8bef9SDimitry Andric//===---------------------------------------------------------------------===//
201e8d8bef9SDimitry Andric// Sched definitions for load/store
202e8d8bef9SDimitry Andric//
203e8d8bef9SDimitry Andric// Mark whether the loads/stores must be single-issue
204e8d8bef9SDimitry Andric// Address operands are needed earlier
205e8d8bef9SDimitry Andric// Data operands are needed later
206e8d8bef9SDimitry Andric
207e8d8bef9SDimitry Andricdef M7BaseUpdate : SchedWriteRes<[]> {
208e8d8bef9SDimitry Andric    let Latency = 0; // Update is bypassable out of EX1
209e8d8bef9SDimitry Andric    let NumMicroOps = 0;
210e8d8bef9SDimitry Andric}
211e8d8bef9SDimitry Andricdef M7LoadLatency1 : SchedWriteRes<[]> {
212e8d8bef9SDimitry Andric    let Latency = 1;
213e8d8bef9SDimitry Andric    let NumMicroOps = 0;
214e8d8bef9SDimitry Andric}
215e8d8bef9SDimitry Andricdef M7SlowLoad : SchedWriteRes<[M7UnitLoad]>            { let Latency = 2; }
216e8d8bef9SDimitry Andric
217e8d8bef9SDimitry Andric// Byte and half-word loads should have greater latency than other loads.
218e8d8bef9SDimitry Andric// So should load exclusive.
219e8d8bef9SDimitry Andric
220e8d8bef9SDimitry Andricdef : InstRW<[M7SlowLoad],
221e8d8bef9SDimitry Andric      (instregex "t2LDR(B|H|SB|SH)pc")>;
222e8d8bef9SDimitry Andricdef : InstRW<[M7SlowLoad, M7Read_ISS],
223e8d8bef9SDimitry Andric      (instregex "t2LDR(B|H|SB|SH)T", "t2LDR(B|H|SB|SH)i",
224e8d8bef9SDimitry Andric                 "tLDR(B|H)i")>;
225e8d8bef9SDimitry Andricdef : InstRW<[M7SlowLoad, M7Read_ISS, M7Read_ISS],
226e8d8bef9SDimitry Andric      (instregex "t2LDR(B|H|SB|SH)s", "tLDR(B|H)r", "tLDR(SB|SH)")>;
227e8d8bef9SDimitry Andricdef : InstRW<[M7SlowLoad, M7BaseUpdate, M7Read_ISS],
228e8d8bef9SDimitry Andric      (instregex "t2LDR(B|H|SB|SH)_(POST|PRE)")>;
229e8d8bef9SDimitry Andric
230e8d8bef9SDimitry Andric// Exclusive loads/stores cannot be dual-issued
231e8d8bef9SDimitry Andricdef : InstRW<[WriteLd, M7Slot0Only, M7Read_ISS],
232e8d8bef9SDimitry Andric      (instregex "t2LDREX$")>;
233e8d8bef9SDimitry Andricdef : InstRW<[M7SlowLoad, M7Slot0Only, M7Read_ISS],
234e8d8bef9SDimitry Andric      (instregex "t2LDREX(B|H)")>;
235e8d8bef9SDimitry Andricdef : InstRW<[WriteST, M7SingleIssue, M7Read_EX2, M7Read_ISS],
236e8d8bef9SDimitry Andric      (instregex "t2STREX(B|H)?$")>;
237e8d8bef9SDimitry Andric
238e8d8bef9SDimitry Andric// Load/store multiples cannot be dual-issued.  Note that default scheduling
239e8d8bef9SDimitry Andric// occurs around read/write times of individual registers in the list; read
240e8d8bef9SDimitry Andric// time for STM cannot be overridden because it is a variadic source operand.
241e8d8bef9SDimitry Andric
242e8d8bef9SDimitry Andricdef : InstRW<[WriteLd, M7SingleIssue, M7Read_ISS],
243e8d8bef9SDimitry Andric      (instregex "(t|t2)LDM(DB|IA)$")>;
244e8d8bef9SDimitry Andricdef : InstRW<[WriteST, M7SingleIssue, M7Read_ISS],
245e8d8bef9SDimitry Andric      (instregex "(t|t2)STM(DB|IA)$")>;
246e8d8bef9SDimitry Andricdef : InstRW<[M7BaseUpdate, WriteLd, M7SingleIssue, M7Read_ISS],
247e8d8bef9SDimitry Andric      (instregex "(t|t2)LDM(DB|IA)_UPD$", "tPOP")>;
248e8d8bef9SDimitry Andricdef : InstRW<[M7BaseUpdate, WriteST, M7SingleIssue, M7Read_ISS],
249e8d8bef9SDimitry Andric      (instregex "(t|t2)STM(DB|IA)_UPD$", "tPUSH")>;
250e8d8bef9SDimitry Andric
251e8d8bef9SDimitry Andric// Load/store doubles cannot be dual-issued.
252e8d8bef9SDimitry Andric
253e8d8bef9SDimitry Andricdef : InstRW<[M7BaseUpdate, WriteST, M7SingleIssue,
254e8d8bef9SDimitry Andric              M7Read_EX2, M7Read_EX2, M7Read_ISS],
255e8d8bef9SDimitry Andric      (instregex "t2STRD_(PRE|POST)")>;
256e8d8bef9SDimitry Andricdef : InstRW<[WriteST, M7SingleIssue, M7Read_EX2, M7Read_EX2, M7Read_ISS],
257e8d8bef9SDimitry Andric      (instregex "t2STRDi")>;
258e8d8bef9SDimitry Andricdef : InstRW<[WriteLd, M7LoadLatency1, M7SingleIssue, M7BaseUpdate, M7Read_ISS],
259e8d8bef9SDimitry Andric      (instregex "t2LDRD_(PRE|POST)")>;
260e8d8bef9SDimitry Andricdef : InstRW<[WriteLd, M7LoadLatency1, M7SingleIssue, M7Read_ISS],
261e8d8bef9SDimitry Andric      (instregex "t2LDRDi")>;
262e8d8bef9SDimitry Andric
263e8d8bef9SDimitry Andric// Word load / preload
264e8d8bef9SDimitry Andricdef : InstRW<[WriteLd],
265e8d8bef9SDimitry Andric      (instregex "t2LDRpc", "t2PL[DI]pci", "tLDRpci")>;
266e8d8bef9SDimitry Andricdef : InstRW<[WriteLd, M7Read_ISS],
267e8d8bef9SDimitry Andric      (instregex "t2LDR(i|T)", "t2PL[DI](W)?i", "tLDRi", "tLDRspi")>;
268e8d8bef9SDimitry Andricdef : InstRW<[WriteLd, M7Read_ISS, M7Read_ISS],
269e8d8bef9SDimitry Andric      (instregex "t2LDRs", "t2PL[DI](w)?s", "tLDRr")>;
270e8d8bef9SDimitry Andricdef : InstRW<[WriteLd, M7BaseUpdate, M7Read_ISS],
271e8d8bef9SDimitry Andric      (instregex "t2LDR_(POST|PRE)")>;
272e8d8bef9SDimitry Andric
273e8d8bef9SDimitry Andric// Stores
274e8d8bef9SDimitry Andricdef : InstRW<[M7BaseUpdate, WriteST, M7Read_EX2, M7Read_ISS],
275e8d8bef9SDimitry Andric      (instregex "t2STR(B|H)?_(POST|PRE)")>;
276e8d8bef9SDimitry Andricdef : InstRW<[WriteST, M7Read_EX2, M7Read_ISS, M7Read_ISS],
277e8d8bef9SDimitry Andric      (instregex "t2STR(B|H)?s$", "tSTR(B|H)?r$")>;
278e8d8bef9SDimitry Andricdef : InstRW<[WriteST, M7Read_EX2, M7Read_ISS],
279e8d8bef9SDimitry Andric      (instregex "t2STR(B|H)?(i|T)", "tSTR(B|H)?i$", "tSTRspi")>;
280e8d8bef9SDimitry Andric
281e8d8bef9SDimitry Andric// TBB/TBH - single-issue only; takes two cycles to issue
282e8d8bef9SDimitry Andric
283e8d8bef9SDimitry Andricdef M7TableLoad : SchedWriteRes<[M7UnitLoad]> {
284e8d8bef9SDimitry Andric  let NumMicroOps = 2;
285e8d8bef9SDimitry Andric  let SingleIssue = 1;
286e8d8bef9SDimitry Andric}
287e8d8bef9SDimitry Andric
288e8d8bef9SDimitry Andricdef : InstRW<[M7TableLoad, M7Read_ISS, M7Read_ISS], (instregex "t2TB")>;
289e8d8bef9SDimitry Andric
290e8d8bef9SDimitry Andric// VFP loads and stores
291e8d8bef9SDimitry Andric
292e8d8bef9SDimitry Andricdef M7LoadSP  : SchedWriteRes<[M7UnitLoad, M7UnitVPort]> { let Latency = 1; }
293*fe6060f1SDimitry Andricdef M7LoadDP  : SchedWriteRes<[M7UnitLoadL, M7UnitLoadH, M7UnitVPortL, M7UnitVPortH]> {
294e8d8bef9SDimitry Andric  let Latency = 2;
295e8d8bef9SDimitry Andric  let SingleIssue = 1;
296e8d8bef9SDimitry Andric}
297e8d8bef9SDimitry Andricdef M7StoreSP : SchedWriteRes<[M7UnitStore, M7UnitVPort]>;
298*fe6060f1SDimitry Andricdef M7StoreDP : SchedWriteRes<[M7UnitStore, M7UnitVPortL, M7UnitVPortH]> {
299e8d8bef9SDimitry Andric  let SingleIssue = 1;
300e8d8bef9SDimitry Andric}
301e8d8bef9SDimitry Andric
302e8d8bef9SDimitry Andricdef : InstRW<[M7LoadSP, M7Read_ISS],                 (instregex "VLDR(S|H)$")>;
303e8d8bef9SDimitry Andricdef : InstRW<[M7LoadDP, M7Read_ISS],                 (instregex "VLDRD$")>;
304e8d8bef9SDimitry Andricdef : InstRW<[M7StoreSP, M7Read_EX3, M7Read_ISS],    (instregex "VSTR(S|H)$")>;
305e8d8bef9SDimitry Andricdef : InstRW<[M7StoreDP, M7Read_EX3, M7Read_ISS],    (instregex "VSTRD$")>;
306e8d8bef9SDimitry Andric
307e8d8bef9SDimitry Andric// Load/store multiples cannot be dual-issued.
308e8d8bef9SDimitry Andric
309e8d8bef9SDimitry Andricdef : InstRW<[WriteLd, M7SingleIssue, M7Read_ISS],
310e8d8bef9SDimitry Andric      (instregex "VLDM(S|D|Q)(DB|IA)$")>;
311e8d8bef9SDimitry Andricdef : InstRW<[WriteST, M7SingleIssue, M7Read_ISS],
312e8d8bef9SDimitry Andric      (instregex "VSTM(S|D|Q)(DB|IA)$")>;
313e8d8bef9SDimitry Andricdef : InstRW<[M7BaseUpdate, WriteLd, M7SingleIssue, M7Read_ISS],
314e8d8bef9SDimitry Andric      (instregex "VLDM(S|D|Q)(DB|IA)_UPD$")>;
315e8d8bef9SDimitry Andricdef : InstRW<[M7BaseUpdate, WriteST, M7SingleIssue, M7Read_ISS],
316e8d8bef9SDimitry Andric      (instregex "VSTM(S|D|Q)(DB|IA)_UPD$")>;
317e8d8bef9SDimitry Andric
318e8d8bef9SDimitry Andric//===---------------------------------------------------------------------===//
319e8d8bef9SDimitry Andric// Sched definitions for ALU
320e8d8bef9SDimitry Andric//
321e8d8bef9SDimitry Andric
322e8d8bef9SDimitry Andric// Shifted ALU operands are read a cycle early.
323e8d8bef9SDimitry Andricdef M7Ex1ReadNoFastBypass : SchedReadAdvance<-1, [WriteLd, M7LoadLatency1]>;
324e8d8bef9SDimitry Andric
325e8d8bef9SDimitry Andricdef : InstRW<[WriteALUsi, M7Ex1ReadNoFastBypass, M7Read_ISS],
326e8d8bef9SDimitry Andric             (instregex "t2(ADC|ADDS|ADD|BIC|EOR|ORN|ORR|RSBS|RSB|SBC|SUBS)rs$",
327e8d8bef9SDimitry Andric                        "t2(SUB|CMP|CMNz|TEQ|TST)rs$",
328e8d8bef9SDimitry Andric                        "t2MOVsr(a|l)")>;
329e8d8bef9SDimitry Andricdef : InstRW<[WriteALUsi, M7Read_ISS],
330e8d8bef9SDimitry Andric             (instregex "t2MVNs")>;
331e8d8bef9SDimitry Andric
332e8d8bef9SDimitry Andric// Treat pure shift operations (except for RRX) as if they used the EX1
333e8d8bef9SDimitry Andric// shifter but have timing as if they used the EX2 shifter as they usually
334e8d8bef9SDimitry Andric// can choose the EX2 shifter when needed.  Will miss a few dual-issue cases,
335e8d8bef9SDimitry Andric// but the results prove to be better than trying to get them exact.
336e8d8bef9SDimitry Andric
337e8d8bef9SDimitry Andricdef : InstRW<[M7WriteShift2, M7Read_ISS], (instregex "t2RRX$")>;
338e8d8bef9SDimitry Andricdef : InstRW<[WriteALUsi], (instregex "(t|t2)(LSL|LSR|ASR|ROR)")>;
339e8d8bef9SDimitry Andric
340e8d8bef9SDimitry Andric// Instructions that use the shifter, but have normal timing.
341e8d8bef9SDimitry Andric
342e8d8bef9SDimitry Andricdef : InstRW<[WriteALUsi,M7Slot0Only], (instregex "t2(BFC|BFI)$")>;
343e8d8bef9SDimitry Andric
344e8d8bef9SDimitry Andric// Instructions which are slot zero only but otherwise normal.
345e8d8bef9SDimitry Andric
346e8d8bef9SDimitry Andricdef : InstRW<[WriteALU, M7Slot0Only], (instregex "t2CLZ")>;
347e8d8bef9SDimitry Andric
348e8d8bef9SDimitry Andric// MAC operations that don't have SchedRW set.
349e8d8bef9SDimitry Andric
350e8d8bef9SDimitry Andricdef : InstRW<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC], (instregex "t2SML[AS]D")>;
351e8d8bef9SDimitry Andric
352e8d8bef9SDimitry Andric// Divides are special because they stall for their latency, and so look like a
353e8d8bef9SDimitry Andric// single-cycle as far as scheduling opportunities go.  By putting WriteALU
354e8d8bef9SDimitry Andric// first, we make the operand latency 1, but keep the instruction latency 7.
355e8d8bef9SDimitry Andric
356e8d8bef9SDimitry Andricdef : InstRW<[WriteALU, WriteDIV], (instregex "t2(S|U)DIV")>;
357e8d8bef9SDimitry Andric
358e8d8bef9SDimitry Andric// DSP extension operations
359e8d8bef9SDimitry Andric
360e8d8bef9SDimitry Andricdef M7WriteSIMD1   : SchedWriteRes<[M7UnitSIMD, M7UnitALU]> {
361e8d8bef9SDimitry Andric  let Latency = 1;
362e8d8bef9SDimitry Andric  let BeginGroup = 1;
363e8d8bef9SDimitry Andric}
364e8d8bef9SDimitry Andricdef M7WriteSIMD2   : SchedWriteRes<[M7UnitSIMD, M7UnitALU]> {
365e8d8bef9SDimitry Andric  let Latency = 2;
366e8d8bef9SDimitry Andric  let BeginGroup = 1;
367e8d8bef9SDimitry Andric}
368e8d8bef9SDimitry Andricdef M7WriteShSIMD1 : SchedWriteRes<[M7UnitSIMD, M7UnitALU, M7UnitShift1]> {
369e8d8bef9SDimitry Andric  let Latency = 1;
370e8d8bef9SDimitry Andric  let BeginGroup = 1;
371e8d8bef9SDimitry Andric}
372e8d8bef9SDimitry Andricdef M7WriteShSIMD0 : SchedWriteRes<[M7UnitSIMD, M7UnitALU, M7UnitShift1]> {
373e8d8bef9SDimitry Andric  let Latency = 0;      // Bypassable out of EX1
374e8d8bef9SDimitry Andric  let BeginGroup = 1;
375e8d8bef9SDimitry Andric}
376e8d8bef9SDimitry Andricdef M7WriteShSIMD2 : SchedWriteRes<[M7UnitSIMD, M7UnitALU, M7UnitShift1]> {
377e8d8bef9SDimitry Andric  let Latency = 2;
378e8d8bef9SDimitry Andric  let BeginGroup = 1;
379e8d8bef9SDimitry Andric}
380e8d8bef9SDimitry Andric
381e8d8bef9SDimitry Andricdef : InstRW<[M7WriteShSIMD2, M7Read_ISS],
382e8d8bef9SDimitry Andric             (instregex "t2(S|U)SAT")>;
383e8d8bef9SDimitry Andricdef : InstRW<[M7WriteSIMD1, ReadALU],
384e8d8bef9SDimitry Andric             (instregex "(t|t2)(S|U)XT(B|H)")>;
385e8d8bef9SDimitry Andricdef : InstRW<[M7WriteSIMD1, ReadALU, ReadALU],
386e8d8bef9SDimitry Andric             (instregex "t2(S|SH|U|UH)(ADD16|ADD8|ASX|SAX|SUB16|SUB8)",
387e8d8bef9SDimitry Andric                        "t2SEL")>;
388e8d8bef9SDimitry Andricdef : InstRW<[M7WriteSIMD2, ReadALU, ReadALU],
389e8d8bef9SDimitry Andric             (instregex "t2(Q|UQ)(ADD|ASX|SAX|SUB)", "t2USAD8")>;
390e8d8bef9SDimitry Andricdef : InstRW<[M7WriteShSIMD2, M7Read_ISS, M7Read_ISS],
391e8d8bef9SDimitry Andric             (instregex "t2QD(ADD|SUB)")>;
392e8d8bef9SDimitry Andricdef : InstRW<[M7WriteShSIMD0, M7Read_ISS],
393e8d8bef9SDimitry Andric             (instregex "t2(RBIT|REV)", "tREV")>;
394e8d8bef9SDimitry Andricdef : InstRW<[M7WriteShSIMD1, M7Read_ISS],
395e8d8bef9SDimitry Andric             (instregex "t2(SBFX|UBFX)")>;
396e8d8bef9SDimitry Andricdef : InstRW<[M7WriteShSIMD1, ReadALU, M7Read_ISS],
397e8d8bef9SDimitry Andric             (instregex "t2PKH(BT|TB)", "t2(S|U)XTA")>;
398e8d8bef9SDimitry Andricdef : InstRW<[M7WriteSIMD2, ReadALU, ReadALU, M7Read_EX2],
399e8d8bef9SDimitry Andric             (instregex "t2USADA8")>;
400e8d8bef9SDimitry Andric
401e8d8bef9SDimitry Andric// MSR/MRS
402e8d8bef9SDimitry Andricdef : InstRW<[M7NonGeneralPurpose], (instregex "MSR", "MRS")>;
403e8d8bef9SDimitry Andric
404e8d8bef9SDimitry Andric//===---------------------------------------------------------------------===//
405e8d8bef9SDimitry Andric// Sched definitions for FP operations
406e8d8bef9SDimitry Andric//
407e8d8bef9SDimitry Andric
408e8d8bef9SDimitry Andric// Effective scheduling latency is really 3 for nearly all FP operations,
409e8d8bef9SDimitry Andric// even if their true latency is higher.
410e8d8bef9SDimitry Andricdef M7WriteVFPLatOverride : SchedWriteRes<[]> {
411e8d8bef9SDimitry Andric  let Latency = 3;
412e8d8bef9SDimitry Andric  let NumMicroOps = 0;
413e8d8bef9SDimitry Andric}
414e8d8bef9SDimitry Andricdef M7WriteVFPExtraVPort  : SchedWriteRes<[M7UnitVPort]> {
415e8d8bef9SDimitry Andric  let Latency = 3;
416e8d8bef9SDimitry Andric  let NumMicroOps = 0;
417e8d8bef9SDimitry Andric}
418e8d8bef9SDimitry Andric
419e8d8bef9SDimitry Andric// Instructions which are missing default schedules.
420e8d8bef9SDimitry Andricdef : InstRW<[WriteFPALU32],
421e8d8bef9SDimitry Andric             (instregex "V(ABS|CVT.*|NEG|FP_VMAX.*|FP_VMIN.*|RINT.*)S$")>;
422e8d8bef9SDimitry Andricdef : InstRW<[M7WriteVFPLatOverride, WriteFPALU64],
423e8d8bef9SDimitry Andric             (instregex "V(ABS|CVT.*|NEG|FP_VMAX.*|FP_VMIN.*|RINT.*)D$")>;
424e8d8bef9SDimitry Andric
425e8d8bef9SDimitry Andric// VCMP
426e8d8bef9SDimitry Andricdef M7WriteVCMPS : SchedWriteRes<[M7UnitVFP, M7UnitVPort]> { let Latency = 0; }
427e8d8bef9SDimitry Andricdef M7WriteVCMPD : SchedWriteRes<[M7UnitVFP, M7UnitVPort, M7UnitVPort]> {
428e8d8bef9SDimitry Andric  let Latency = 0;
429e8d8bef9SDimitry Andric  let BeginGroup = 1;
430e8d8bef9SDimitry Andric}
431e8d8bef9SDimitry Andricdef : InstRW<[M7WriteVCMPS], (instregex "VCMPS$")>;
432e8d8bef9SDimitry Andricdef : InstRW<[M7WriteVCMPD], (instregex "VCMPD$")>;
433e8d8bef9SDimitry Andric
434e8d8bef9SDimitry Andric    // VMRS/VMSR
435e8d8bef9SDimitry Andricdef M7VMRS : SchedWriteRes<[M7UnitVFP, M7UnitVPort]> { let SingleIssue = 1; }
436e8d8bef9SDimitry Andricdef M7VMSR : SchedWriteRes<[M7UnitVFP, M7UnitVPort]> { let SingleIssue = 1; }
437e8d8bef9SDimitry Andricdef : InstRW<[M7VMRS], (instregex "FMSTAT")>;
438e8d8bef9SDimitry Andricdef : InstRW<[M7VMSR], (instregex "VMSR")>;
439e8d8bef9SDimitry Andric
440e8d8bef9SDimitry Andric// VSEL cannot bypass in its implied $cpsr operand; model as earlier read
441e8d8bef9SDimitry Andricdef : InstRW<[WriteFPALU32, M7Slot0Only, ReadALU, ReadALU, M7Read_ISS],
442e8d8bef9SDimitry Andric             (instregex "VSEL.*S$")>;
443e8d8bef9SDimitry Andricdef : InstRW<[M7WriteVFPLatOverride, WriteFPALU64, M7Slot0Only,
444e8d8bef9SDimitry Andric              ReadALU, ReadALU, M7Read_ISS],
445e8d8bef9SDimitry Andric             (instregex "VSEL.*D$")>;
446e8d8bef9SDimitry Andric
447e8d8bef9SDimitry Andric// VMOV
448e8d8bef9SDimitry Andricdef : InstRW<[WriteFPMOV],
449e8d8bef9SDimitry Andric             (instregex "VMOV(H|S)$", "FCONST(H|S)")>;
450e8d8bef9SDimitry Andricdef : InstRW<[WriteFPMOV, M7WriteVFPExtraVPort, M7Slot0Only],
451e8d8bef9SDimitry Andric             (instregex "VMOVD$")>;
452e8d8bef9SDimitry Andricdef : InstRW<[WriteFPMOV, M7WriteVFPExtraVPort, M7Slot0Only],
453e8d8bef9SDimitry Andric             (instregex "FCONSTD")>;
454e8d8bef9SDimitry Andricdef : InstRW<[WriteFPMOV, M7WriteVFPExtraVPort, M7SingleIssue],
455e8d8bef9SDimitry Andric             (instregex "VMOV(DRR|RRD|RRS|SRR)")>;
456e8d8bef9SDimitry Andric
457e8d8bef9SDimitry Andric// Larger-latency overrides.
458e8d8bef9SDimitry Andric
459e8d8bef9SDimitry Andricdef : InstRW<[M7WriteVFPLatOverride, WriteFPDIV32],  (instregex "VDIVS")>;
460e8d8bef9SDimitry Andricdef : InstRW<[M7WriteVFPLatOverride, WriteFPDIV64],  (instregex "VDIVD")>;
461e8d8bef9SDimitry Andricdef : InstRW<[M7WriteVFPLatOverride, WriteFPSQRT32], (instregex "VSQRTS")>;
462e8d8bef9SDimitry Andricdef : InstRW<[M7WriteVFPLatOverride, WriteFPSQRT64], (instregex "VSQRTD")>;
463e8d8bef9SDimitry Andricdef : InstRW<[M7WriteVFPLatOverride, WriteFPMUL64],
464e8d8bef9SDimitry Andric             (instregex "V(MUL|NMUL)D")>;
465e8d8bef9SDimitry Andricdef : InstRW<[M7WriteVFPLatOverride, WriteFPALU64],
466e8d8bef9SDimitry Andric             (instregex "V(ADD|SUB)D")>;
467e8d8bef9SDimitry Andric
468e8d8bef9SDimitry Andric// Multiply-accumulate.  Chained SP timing is correct; rest need overrides
469e8d8bef9SDimitry Andric// Double-precision chained MAC stalls the pipeline behind it for 3 cycles,
470e8d8bef9SDimitry Andric// making it appear to have 3 cycle latency for scheduling.
471e8d8bef9SDimitry Andric
472e8d8bef9SDimitry Andricdef : InstRW<[M7WriteVFPLatOverride, WriteFPMAC64,
473e8d8bef9SDimitry Andric              ReadFPMAC, ReadFPMUL, ReadFPMUL],
474e8d8bef9SDimitry Andric             (instregex "V(N)?ML(A|S)D$")>;
475e8d8bef9SDimitry Andric
476e8d8bef9SDimitry Andric// Single-precision fused MACs look like latency 5 with advance of 2.
477e8d8bef9SDimitry Andric
478e8d8bef9SDimitry Andricdef M7WriteVFPLatOverride5 : SchedWriteRes<[]> {
479e8d8bef9SDimitry Andric  let Latency = 5;
480e8d8bef9SDimitry Andric  let NumMicroOps = 0;
481e8d8bef9SDimitry Andric}
482e8d8bef9SDimitry Andricdef M7ReadFPMAC2   : SchedReadAdvance<2>;
483e8d8bef9SDimitry Andric
484e8d8bef9SDimitry Andricdef : InstRW<[M7WriteVFPLatOverride5, WriteFPMAC32,
485e8d8bef9SDimitry Andric              M7ReadFPMAC2, ReadFPMUL, ReadFPMUL],
486e8d8bef9SDimitry Andric             (instregex "VF(N)?M(A|S)S$")>;
487e8d8bef9SDimitry Andric
488e8d8bef9SDimitry Andric// Double-precision fused MAC stalls the pipeline behind it for 2 cycles, making
489e8d8bef9SDimitry Andric// it appear to have 3 cycle latency for scheduling.
490e8d8bef9SDimitry Andric
491e8d8bef9SDimitry Andricdef : InstRW<[M7WriteVFPLatOverride, WriteFPMAC64,
492e8d8bef9SDimitry Andric              ReadFPMAC, ReadFPMUL, ReadFPMUL],
493e8d8bef9SDimitry Andric             (instregex "VF(N)?M(A|S)D$")>;
494e8d8bef9SDimitry Andric
495e8d8bef9SDimitry Andric}  // SchedModel = CortexM7Model
496