1e8d8bef9SDimitry Andric//=- ARMScheduleM7.td - ARM Cortex-M7 Scheduling Definitions -*- tablegen -*-=// 2e8d8bef9SDimitry Andric// 3e8d8bef9SDimitry Andric// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4e8d8bef9SDimitry Andric// See https://llvm.org/LICENSE.txt for license information. 5e8d8bef9SDimitry Andric// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6e8d8bef9SDimitry Andric// 7e8d8bef9SDimitry Andric//===----------------------------------------------------------------------===// 8e8d8bef9SDimitry Andric// 9e8d8bef9SDimitry Andric// This file defines the SchedRead/Write data for the ARM Cortex-M7 processor. 10e8d8bef9SDimitry Andric// 11e8d8bef9SDimitry Andric//===----------------------------------------------------------------------===// 12e8d8bef9SDimitry Andric 13e8d8bef9SDimitry Andricdef CortexM7Model : SchedMachineModel { 14e8d8bef9SDimitry Andric let IssueWidth = 2; // Dual issue for most instructions. 15e8d8bef9SDimitry Andric let MicroOpBufferSize = 0; // The Cortex-M7 is in-order. 16e8d8bef9SDimitry Andric let LoadLatency = 2; // Best case for load-use case. 17e8d8bef9SDimitry Andric let MispredictPenalty = 4; // Mispredict cost for forward branches is 6, 18e8d8bef9SDimitry Andric // but 4 works better 19e8d8bef9SDimitry Andric let CompleteModel = 0; 20e8d8bef9SDimitry Andric} 21e8d8bef9SDimitry Andric 22*fe6060f1SDimitry Andriclet SchedModel = CortexM7Model in { 23*fe6060f1SDimitry Andric 24e8d8bef9SDimitry Andric//===--------------------------------------------------------------------===// 25e8d8bef9SDimitry Andric// The Cortex-M7 has two ALU, two LOAD, a STORE, a MAC, a BRANCH and a VFP 26e8d8bef9SDimitry Andric// pipe. The stages relevant to scheduling are as follows: 27e8d8bef9SDimitry Andric// 28e8d8bef9SDimitry Andric// EX1: address generation shifts 29e8d8bef9SDimitry Andric// EX2: fast load data ALUs FP operation 30e8d8bef9SDimitry Andric// EX3: slow load data integer writeback FP operation 31e8d8bef9SDimitry Andric// EX4: store data FP writeback 32e8d8bef9SDimitry Andric// 33e8d8bef9SDimitry Andric// There are shifters in both EX1 and EX2, and some instructions can be 34e8d8bef9SDimitry Andric// flexibly allocated between them. EX2 is used as the "zero" point 35e8d8bef9SDimitry Andric// for scheduling, so simple ALU operations executing in EX2 will have 36e8d8bef9SDimitry Andric// ReadAdvance<0> (the default) for their source operands and Latency = 1. 37e8d8bef9SDimitry Andric 38*fe6060f1SDimitry Andricdef M7UnitLoadL : ProcResource<1> { let BufferSize = 0; } 39*fe6060f1SDimitry Andricdef M7UnitLoadH : ProcResource<1> { let BufferSize = 0; } 40*fe6060f1SDimitry Andricdef M7UnitLoad : ProcResGroup<[M7UnitLoadL,M7UnitLoadH]> { let BufferSize = 0; } 41e8d8bef9SDimitry Andricdef M7UnitStore : ProcResource<1> { let BufferSize = 0; } 42e8d8bef9SDimitry Andricdef M7UnitALU : ProcResource<2>; 43e8d8bef9SDimitry Andricdef M7UnitShift1 : ProcResource<1> { let BufferSize = 0; } 44e8d8bef9SDimitry Andricdef M7UnitShift2 : ProcResource<1> { let BufferSize = 0; } 45e8d8bef9SDimitry Andricdef M7UnitMAC : ProcResource<1> { let BufferSize = 0; } 46e8d8bef9SDimitry Andricdef M7UnitBranch : ProcResource<1> { let BufferSize = 0; } 47e8d8bef9SDimitry Andricdef M7UnitVFP : ProcResource<1> { let BufferSize = 0; } 48*fe6060f1SDimitry Andricdef M7UnitVPortL : ProcResource<1> { let BufferSize = 0; } 49*fe6060f1SDimitry Andricdef M7UnitVPortH : ProcResource<1> { let BufferSize = 0; } 50*fe6060f1SDimitry Andricdef M7UnitVPort : ProcResGroup<[M7UnitVPortL,M7UnitVPortH]> { let BufferSize = 0; } 51e8d8bef9SDimitry Andricdef M7UnitSIMD : ProcResource<1> { let BufferSize = 0; } 52e8d8bef9SDimitry Andric 53e8d8bef9SDimitry Andric//===---------------------------------------------------------------------===// 54e8d8bef9SDimitry Andric// Subtarget-specific SchedWrite types with map ProcResources and set latency. 55e8d8bef9SDimitry Andric 56e8d8bef9SDimitry Andricdef : WriteRes<WriteALU, [M7UnitALU]> { let Latency = 1; } 57e8d8bef9SDimitry Andric 58e8d8bef9SDimitry Andric// Basic ALU with shifts. 59e8d8bef9SDimitry Andriclet Latency = 1 in { 60e8d8bef9SDimitry Andric def : WriteRes<WriteALUsi, [M7UnitALU, M7UnitShift1]>; 61e8d8bef9SDimitry Andric def : WriteRes<WriteALUsr, [M7UnitALU, M7UnitShift1]>; 62e8d8bef9SDimitry Andric def : WriteRes<WriteALUSsr, [M7UnitALU, M7UnitShift1]>; 63e8d8bef9SDimitry Andric} 64e8d8bef9SDimitry Andric 65e8d8bef9SDimitry Andric// Compares. 66e8d8bef9SDimitry Andricdef : WriteRes<WriteCMP, [M7UnitALU]> { let Latency = 1; } 67e8d8bef9SDimitry Andricdef : WriteRes<WriteCMPsi, [M7UnitALU, M7UnitShift1]> { let Latency = 2; } 68e8d8bef9SDimitry Andricdef : WriteRes<WriteCMPsr, [M7UnitALU, M7UnitShift1]> { let Latency = 2; } 69e8d8bef9SDimitry Andric 70e8d8bef9SDimitry Andric// Multiplies. 71e8d8bef9SDimitry Andriclet Latency = 2 in { 72e8d8bef9SDimitry Andric def : WriteRes<WriteMUL16, [M7UnitMAC]>; 73e8d8bef9SDimitry Andric def : WriteRes<WriteMUL32, [M7UnitMAC]>; 74e8d8bef9SDimitry Andric def : WriteRes<WriteMUL64Lo, [M7UnitMAC]>; 75e8d8bef9SDimitry Andric def : WriteRes<WriteMUL64Hi, []> { let NumMicroOps = 0; } 76e8d8bef9SDimitry Andric} 77e8d8bef9SDimitry Andric 78e8d8bef9SDimitry Andric// Multiply-accumulates. 79e8d8bef9SDimitry Andriclet Latency = 2 in { 80e8d8bef9SDimitry Andric def : WriteRes<WriteMAC16, [M7UnitMAC]>; 81e8d8bef9SDimitry Andric def : WriteRes<WriteMAC32, [M7UnitMAC]>; 82e8d8bef9SDimitry Andric def : WriteRes<WriteMAC64Lo, [M7UnitMAC]> { let Latency = 2; } 83e8d8bef9SDimitry Andric def : WriteRes<WriteMAC64Hi, []> { let NumMicroOps = 0; } 84e8d8bef9SDimitry Andric} 85e8d8bef9SDimitry Andric 86e8d8bef9SDimitry Andric// Divisions. 87e8d8bef9SDimitry Andric// These cannot be dual-issued with any instructions. 88e8d8bef9SDimitry Andricdef : WriteRes<WriteDIV, [M7UnitALU]> { 89e8d8bef9SDimitry Andric let Latency = 7; 90e8d8bef9SDimitry Andric let SingleIssue = 1; 91e8d8bef9SDimitry Andric} 92e8d8bef9SDimitry Andric 93e8d8bef9SDimitry Andric// Loads/Stores. 94e8d8bef9SDimitry Andricdef : WriteRes<WriteLd, [M7UnitLoad]> { let Latency = 1; } 95e8d8bef9SDimitry Andricdef : WriteRes<WritePreLd, [M7UnitLoad]> { let Latency = 2; } 96e8d8bef9SDimitry Andricdef : WriteRes<WriteST, [M7UnitStore]> { let Latency = 2; } 97e8d8bef9SDimitry Andric 98e8d8bef9SDimitry Andric// Branches. 99e8d8bef9SDimitry Andricdef : WriteRes<WriteBr, [M7UnitBranch]> { let Latency = 2; } 100e8d8bef9SDimitry Andricdef : WriteRes<WriteBrL, [M7UnitBranch]> { let Latency = 2; } 101e8d8bef9SDimitry Andricdef : WriteRes<WriteBrTbl, [M7UnitBranch]> { let Latency = 2; } 102e8d8bef9SDimitry Andric 103e8d8bef9SDimitry Andric// Noop. 104e8d8bef9SDimitry Andricdef : WriteRes<WriteNoop, []> { let Latency = 0; } 105e8d8bef9SDimitry Andric 106e8d8bef9SDimitry Andric//===---------------------------------------------------------------------===// 107e8d8bef9SDimitry Andric// Sched definitions for floating-point instructions 108e8d8bef9SDimitry Andric// 109e8d8bef9SDimitry Andric// Floating point conversions. 110e8d8bef9SDimitry Andricdef : WriteRes<WriteFPCVT, [M7UnitVFP, M7UnitVPort]> { let Latency = 3; } 111e8d8bef9SDimitry Andricdef : WriteRes<WriteFPMOV, [M7UnitVPort]> { let Latency = 3; } 112*fe6060f1SDimitry Andricdef M7WriteFPMOV64 : SchedWriteRes<[M7UnitVPortL, M7UnitVPortH]> { 113*fe6060f1SDimitry Andric let Latency = 3; 114*fe6060f1SDimitry Andric} 115e8d8bef9SDimitry Andric 116e8d8bef9SDimitry Andric// The FP pipeline has a latency of 3 cycles. 117e8d8bef9SDimitry Andric// ALU operations (32/64-bit). These go down the FP pipeline. 118e8d8bef9SDimitry Andricdef : WriteRes<WriteFPALU32, [M7UnitVFP, M7UnitVPort]> { let Latency = 3; } 119*fe6060f1SDimitry Andricdef : WriteRes<WriteFPALU64, [M7UnitVFP, M7UnitVPortL, M7UnitVPortH]> { 120e8d8bef9SDimitry Andric let Latency = 4; 121e8d8bef9SDimitry Andric let BeginGroup = 1; 122e8d8bef9SDimitry Andric} 123e8d8bef9SDimitry Andric 124e8d8bef9SDimitry Andric// Multiplication 125e8d8bef9SDimitry Andricdef : WriteRes<WriteFPMUL32, [M7UnitVFP, M7UnitVPort]> { let Latency = 3; } 126*fe6060f1SDimitry Andricdef : WriteRes<WriteFPMUL64, [M7UnitVFP, M7UnitVPortL, M7UnitVPortH]> { 127e8d8bef9SDimitry Andric let Latency = 7; 128e8d8bef9SDimitry Andric let BeginGroup = 1; 129e8d8bef9SDimitry Andric} 130e8d8bef9SDimitry Andric 131e8d8bef9SDimitry Andric// Multiply-accumulate. FPMAC goes down the FP Pipeline. 132e8d8bef9SDimitry Andricdef : WriteRes<WriteFPMAC32, [M7UnitVFP, M7UnitVPort]> { let Latency = 6; } 133*fe6060f1SDimitry Andricdef : WriteRes<WriteFPMAC64, [M7UnitVFP, M7UnitVPortL, M7UnitVPortH]> { 134e8d8bef9SDimitry Andric let Latency = 11; 135e8d8bef9SDimitry Andric let BeginGroup = 1; 136e8d8bef9SDimitry Andric} 137e8d8bef9SDimitry Andric 138e8d8bef9SDimitry Andric// Division. Effective scheduling latency is 3, though real latency is larger 139e8d8bef9SDimitry Andricdef : WriteRes<WriteFPDIV32, [M7UnitVFP, M7UnitVPort]> { let Latency = 16; } 140*fe6060f1SDimitry Andricdef : WriteRes<WriteFPDIV64, [M7UnitVFP, M7UnitVPortL, M7UnitVPortH]> { 141e8d8bef9SDimitry Andric let Latency = 30; 142e8d8bef9SDimitry Andric let BeginGroup = 1; 143e8d8bef9SDimitry Andric} 144e8d8bef9SDimitry Andric 145e8d8bef9SDimitry Andric// Square-root. Effective scheduling latency is 3; real latency is larger 146e8d8bef9SDimitry Andricdef : WriteRes<WriteFPSQRT32, [M7UnitVFP, M7UnitVPort]> { let Latency = 16; } 147*fe6060f1SDimitry Andricdef : WriteRes<WriteFPSQRT64, [M7UnitVFP, M7UnitVPortL, M7UnitVPortH]> { 148e8d8bef9SDimitry Andric let Latency = 30; 149e8d8bef9SDimitry Andric let BeginGroup = 1; 150e8d8bef9SDimitry Andric} 151e8d8bef9SDimitry Andric 152e8d8bef9SDimitry Andricdef M7WriteShift2 : SchedWriteRes<[M7UnitALU, M7UnitShift2]> {} 153e8d8bef9SDimitry Andric 154e8d8bef9SDimitry Andric// Not used for M7, but needing definitions anyway 155e8d8bef9SDimitry Andricdef : WriteRes<WriteVLD1, []>; 156e8d8bef9SDimitry Andricdef : WriteRes<WriteVLD2, []>; 157e8d8bef9SDimitry Andricdef : WriteRes<WriteVLD3, []>; 158e8d8bef9SDimitry Andricdef : WriteRes<WriteVLD4, []>; 159e8d8bef9SDimitry Andricdef : WriteRes<WriteVST1, []>; 160e8d8bef9SDimitry Andricdef : WriteRes<WriteVST2, []>; 161e8d8bef9SDimitry Andricdef : WriteRes<WriteVST3, []>; 162e8d8bef9SDimitry Andricdef : WriteRes<WriteVST4, []>; 163e8d8bef9SDimitry Andric 164e8d8bef9SDimitry Andricdef M7SingleIssue : SchedWriteRes<[]> { 165e8d8bef9SDimitry Andric let SingleIssue = 1; 166e8d8bef9SDimitry Andric let NumMicroOps = 0; 167e8d8bef9SDimitry Andric} 168e8d8bef9SDimitry Andricdef M7Slot0Only : SchedWriteRes<[]> { 169e8d8bef9SDimitry Andric let BeginGroup = 1; 170e8d8bef9SDimitry Andric let NumMicroOps = 0; 171e8d8bef9SDimitry Andric} 172e8d8bef9SDimitry Andric 173e8d8bef9SDimitry Andric// What pipeline stage operands need to be ready for depending on 174e8d8bef9SDimitry Andric// where they come from. 175e8d8bef9SDimitry Andricdef : ReadAdvance<ReadALUsr, 0>; 176e8d8bef9SDimitry Andricdef : ReadAdvance<ReadMUL, 0>; 177e8d8bef9SDimitry Andricdef : ReadAdvance<ReadMAC, 1>; 178e8d8bef9SDimitry Andricdef : ReadAdvance<ReadALU, 0>; 179e8d8bef9SDimitry Andricdef : ReadAdvance<ReadFPMUL, 0>; 180e8d8bef9SDimitry Andricdef : ReadAdvance<ReadFPMAC, 3>; 181e8d8bef9SDimitry Andricdef M7Read_ISS : SchedReadAdvance<-1>; // operands needed at EX1 182e8d8bef9SDimitry Andricdef M7Read_EX2 : SchedReadAdvance<1>; // operands needed at EX3 183e8d8bef9SDimitry Andricdef M7Read_EX3 : SchedReadAdvance<2>; // operands needed at EX4 184e8d8bef9SDimitry Andric 185e8d8bef9SDimitry Andric// Non general purpose instructions may not be dual issued. These 186e8d8bef9SDimitry Andric// use both issue units. 187e8d8bef9SDimitry Andricdef M7NonGeneralPurpose : SchedWriteRes<[]> { 188e8d8bef9SDimitry Andric // Assume that these will go down the main ALU pipeline. 189e8d8bef9SDimitry Andric // In reality, many look likely to stall the whole pipeline. 190e8d8bef9SDimitry Andric let Latency = 3; 191e8d8bef9SDimitry Andric let SingleIssue = 1; 192e8d8bef9SDimitry Andric} 193e8d8bef9SDimitry Andric 194e8d8bef9SDimitry Andric// List the non general purpose instructions. 195e8d8bef9SDimitry Andricdef : InstRW<[M7NonGeneralPurpose], (instregex "t2MRS", "tSVC", "tBKPT", 196e8d8bef9SDimitry Andric "t2MSR", "t2DMB", "t2DSB", "t2ISB", 197e8d8bef9SDimitry Andric "t2HVC", "t2SMC", "t2UDF", "ERET", 198e8d8bef9SDimitry Andric "tHINT", "t2HINT", "t2CLREX", "BUNDLE")>; 199e8d8bef9SDimitry Andric 200e8d8bef9SDimitry Andric//===---------------------------------------------------------------------===// 201e8d8bef9SDimitry Andric// Sched definitions for load/store 202e8d8bef9SDimitry Andric// 203e8d8bef9SDimitry Andric// Mark whether the loads/stores must be single-issue 204e8d8bef9SDimitry Andric// Address operands are needed earlier 205e8d8bef9SDimitry Andric// Data operands are needed later 206e8d8bef9SDimitry Andric 207e8d8bef9SDimitry Andricdef M7BaseUpdate : SchedWriteRes<[]> { 208e8d8bef9SDimitry Andric let Latency = 0; // Update is bypassable out of EX1 209e8d8bef9SDimitry Andric let NumMicroOps = 0; 210e8d8bef9SDimitry Andric} 211e8d8bef9SDimitry Andricdef M7LoadLatency1 : SchedWriteRes<[]> { 212e8d8bef9SDimitry Andric let Latency = 1; 213e8d8bef9SDimitry Andric let NumMicroOps = 0; 214e8d8bef9SDimitry Andric} 215e8d8bef9SDimitry Andricdef M7SlowLoad : SchedWriteRes<[M7UnitLoad]> { let Latency = 2; } 216e8d8bef9SDimitry Andric 217e8d8bef9SDimitry Andric// Byte and half-word loads should have greater latency than other loads. 218e8d8bef9SDimitry Andric// So should load exclusive. 219e8d8bef9SDimitry Andric 220e8d8bef9SDimitry Andricdef : InstRW<[M7SlowLoad], 221e8d8bef9SDimitry Andric (instregex "t2LDR(B|H|SB|SH)pc")>; 222e8d8bef9SDimitry Andricdef : InstRW<[M7SlowLoad, M7Read_ISS], 223e8d8bef9SDimitry Andric (instregex "t2LDR(B|H|SB|SH)T", "t2LDR(B|H|SB|SH)i", 224e8d8bef9SDimitry Andric "tLDR(B|H)i")>; 225e8d8bef9SDimitry Andricdef : InstRW<[M7SlowLoad, M7Read_ISS, M7Read_ISS], 226e8d8bef9SDimitry Andric (instregex "t2LDR(B|H|SB|SH)s", "tLDR(B|H)r", "tLDR(SB|SH)")>; 227e8d8bef9SDimitry Andricdef : InstRW<[M7SlowLoad, M7BaseUpdate, M7Read_ISS], 228e8d8bef9SDimitry Andric (instregex "t2LDR(B|H|SB|SH)_(POST|PRE)")>; 229e8d8bef9SDimitry Andric 230e8d8bef9SDimitry Andric// Exclusive loads/stores cannot be dual-issued 231e8d8bef9SDimitry Andricdef : InstRW<[WriteLd, M7Slot0Only, M7Read_ISS], 232e8d8bef9SDimitry Andric (instregex "t2LDREX$")>; 233e8d8bef9SDimitry Andricdef : InstRW<[M7SlowLoad, M7Slot0Only, M7Read_ISS], 234e8d8bef9SDimitry Andric (instregex "t2LDREX(B|H)")>; 235e8d8bef9SDimitry Andricdef : InstRW<[WriteST, M7SingleIssue, M7Read_EX2, M7Read_ISS], 236e8d8bef9SDimitry Andric (instregex "t2STREX(B|H)?$")>; 237e8d8bef9SDimitry Andric 238e8d8bef9SDimitry Andric// Load/store multiples cannot be dual-issued. Note that default scheduling 239e8d8bef9SDimitry Andric// occurs around read/write times of individual registers in the list; read 240e8d8bef9SDimitry Andric// time for STM cannot be overridden because it is a variadic source operand. 241e8d8bef9SDimitry Andric 242e8d8bef9SDimitry Andricdef : InstRW<[WriteLd, M7SingleIssue, M7Read_ISS], 243e8d8bef9SDimitry Andric (instregex "(t|t2)LDM(DB|IA)$")>; 244e8d8bef9SDimitry Andricdef : InstRW<[WriteST, M7SingleIssue, M7Read_ISS], 245e8d8bef9SDimitry Andric (instregex "(t|t2)STM(DB|IA)$")>; 246e8d8bef9SDimitry Andricdef : InstRW<[M7BaseUpdate, WriteLd, M7SingleIssue, M7Read_ISS], 247e8d8bef9SDimitry Andric (instregex "(t|t2)LDM(DB|IA)_UPD$", "tPOP")>; 248e8d8bef9SDimitry Andricdef : InstRW<[M7BaseUpdate, WriteST, M7SingleIssue, M7Read_ISS], 249e8d8bef9SDimitry Andric (instregex "(t|t2)STM(DB|IA)_UPD$", "tPUSH")>; 250e8d8bef9SDimitry Andric 251e8d8bef9SDimitry Andric// Load/store doubles cannot be dual-issued. 252e8d8bef9SDimitry Andric 253e8d8bef9SDimitry Andricdef : InstRW<[M7BaseUpdate, WriteST, M7SingleIssue, 254e8d8bef9SDimitry Andric M7Read_EX2, M7Read_EX2, M7Read_ISS], 255e8d8bef9SDimitry Andric (instregex "t2STRD_(PRE|POST)")>; 256e8d8bef9SDimitry Andricdef : InstRW<[WriteST, M7SingleIssue, M7Read_EX2, M7Read_EX2, M7Read_ISS], 257e8d8bef9SDimitry Andric (instregex "t2STRDi")>; 258e8d8bef9SDimitry Andricdef : InstRW<[WriteLd, M7LoadLatency1, M7SingleIssue, M7BaseUpdate, M7Read_ISS], 259e8d8bef9SDimitry Andric (instregex "t2LDRD_(PRE|POST)")>; 260e8d8bef9SDimitry Andricdef : InstRW<[WriteLd, M7LoadLatency1, M7SingleIssue, M7Read_ISS], 261e8d8bef9SDimitry Andric (instregex "t2LDRDi")>; 262e8d8bef9SDimitry Andric 263e8d8bef9SDimitry Andric// Word load / preload 264e8d8bef9SDimitry Andricdef : InstRW<[WriteLd], 265e8d8bef9SDimitry Andric (instregex "t2LDRpc", "t2PL[DI]pci", "tLDRpci")>; 266e8d8bef9SDimitry Andricdef : InstRW<[WriteLd, M7Read_ISS], 267e8d8bef9SDimitry Andric (instregex "t2LDR(i|T)", "t2PL[DI](W)?i", "tLDRi", "tLDRspi")>; 268e8d8bef9SDimitry Andricdef : InstRW<[WriteLd, M7Read_ISS, M7Read_ISS], 269e8d8bef9SDimitry Andric (instregex "t2LDRs", "t2PL[DI](w)?s", "tLDRr")>; 270e8d8bef9SDimitry Andricdef : InstRW<[WriteLd, M7BaseUpdate, M7Read_ISS], 271e8d8bef9SDimitry Andric (instregex "t2LDR_(POST|PRE)")>; 272e8d8bef9SDimitry Andric 273e8d8bef9SDimitry Andric// Stores 274e8d8bef9SDimitry Andricdef : InstRW<[M7BaseUpdate, WriteST, M7Read_EX2, M7Read_ISS], 275e8d8bef9SDimitry Andric (instregex "t2STR(B|H)?_(POST|PRE)")>; 276e8d8bef9SDimitry Andricdef : InstRW<[WriteST, M7Read_EX2, M7Read_ISS, M7Read_ISS], 277e8d8bef9SDimitry Andric (instregex "t2STR(B|H)?s$", "tSTR(B|H)?r$")>; 278e8d8bef9SDimitry Andricdef : InstRW<[WriteST, M7Read_EX2, M7Read_ISS], 279e8d8bef9SDimitry Andric (instregex "t2STR(B|H)?(i|T)", "tSTR(B|H)?i$", "tSTRspi")>; 280e8d8bef9SDimitry Andric 281e8d8bef9SDimitry Andric// TBB/TBH - single-issue only; takes two cycles to issue 282e8d8bef9SDimitry Andric 283e8d8bef9SDimitry Andricdef M7TableLoad : SchedWriteRes<[M7UnitLoad]> { 284e8d8bef9SDimitry Andric let NumMicroOps = 2; 285e8d8bef9SDimitry Andric let SingleIssue = 1; 286e8d8bef9SDimitry Andric} 287e8d8bef9SDimitry Andric 288e8d8bef9SDimitry Andricdef : InstRW<[M7TableLoad, M7Read_ISS, M7Read_ISS], (instregex "t2TB")>; 289e8d8bef9SDimitry Andric 290e8d8bef9SDimitry Andric// VFP loads and stores 291e8d8bef9SDimitry Andric 292e8d8bef9SDimitry Andricdef M7LoadSP : SchedWriteRes<[M7UnitLoad, M7UnitVPort]> { let Latency = 1; } 293*fe6060f1SDimitry Andricdef M7LoadDP : SchedWriteRes<[M7UnitLoadL, M7UnitLoadH, M7UnitVPortL, M7UnitVPortH]> { 294e8d8bef9SDimitry Andric let Latency = 2; 295e8d8bef9SDimitry Andric let SingleIssue = 1; 296e8d8bef9SDimitry Andric} 297e8d8bef9SDimitry Andricdef M7StoreSP : SchedWriteRes<[M7UnitStore, M7UnitVPort]>; 298*fe6060f1SDimitry Andricdef M7StoreDP : SchedWriteRes<[M7UnitStore, M7UnitVPortL, M7UnitVPortH]> { 299e8d8bef9SDimitry Andric let SingleIssue = 1; 300e8d8bef9SDimitry Andric} 301e8d8bef9SDimitry Andric 302e8d8bef9SDimitry Andricdef : InstRW<[M7LoadSP, M7Read_ISS], (instregex "VLDR(S|H)$")>; 303e8d8bef9SDimitry Andricdef : InstRW<[M7LoadDP, M7Read_ISS], (instregex "VLDRD$")>; 304e8d8bef9SDimitry Andricdef : InstRW<[M7StoreSP, M7Read_EX3, M7Read_ISS], (instregex "VSTR(S|H)$")>; 305e8d8bef9SDimitry Andricdef : InstRW<[M7StoreDP, M7Read_EX3, M7Read_ISS], (instregex "VSTRD$")>; 306e8d8bef9SDimitry Andric 307e8d8bef9SDimitry Andric// Load/store multiples cannot be dual-issued. 308e8d8bef9SDimitry Andric 309e8d8bef9SDimitry Andricdef : InstRW<[WriteLd, M7SingleIssue, M7Read_ISS], 310e8d8bef9SDimitry Andric (instregex "VLDM(S|D|Q)(DB|IA)$")>; 311e8d8bef9SDimitry Andricdef : InstRW<[WriteST, M7SingleIssue, M7Read_ISS], 312e8d8bef9SDimitry Andric (instregex "VSTM(S|D|Q)(DB|IA)$")>; 313e8d8bef9SDimitry Andricdef : InstRW<[M7BaseUpdate, WriteLd, M7SingleIssue, M7Read_ISS], 314e8d8bef9SDimitry Andric (instregex "VLDM(S|D|Q)(DB|IA)_UPD$")>; 315e8d8bef9SDimitry Andricdef : InstRW<[M7BaseUpdate, WriteST, M7SingleIssue, M7Read_ISS], 316e8d8bef9SDimitry Andric (instregex "VSTM(S|D|Q)(DB|IA)_UPD$")>; 317e8d8bef9SDimitry Andric 318e8d8bef9SDimitry Andric//===---------------------------------------------------------------------===// 319e8d8bef9SDimitry Andric// Sched definitions for ALU 320e8d8bef9SDimitry Andric// 321e8d8bef9SDimitry Andric 322e8d8bef9SDimitry Andric// Shifted ALU operands are read a cycle early. 323e8d8bef9SDimitry Andricdef M7Ex1ReadNoFastBypass : SchedReadAdvance<-1, [WriteLd, M7LoadLatency1]>; 324e8d8bef9SDimitry Andric 325e8d8bef9SDimitry Andricdef : InstRW<[WriteALUsi, M7Ex1ReadNoFastBypass, M7Read_ISS], 326e8d8bef9SDimitry Andric (instregex "t2(ADC|ADDS|ADD|BIC|EOR|ORN|ORR|RSBS|RSB|SBC|SUBS)rs$", 327e8d8bef9SDimitry Andric "t2(SUB|CMP|CMNz|TEQ|TST)rs$", 328e8d8bef9SDimitry Andric "t2MOVsr(a|l)")>; 329e8d8bef9SDimitry Andricdef : InstRW<[WriteALUsi, M7Read_ISS], 330e8d8bef9SDimitry Andric (instregex "t2MVNs")>; 331e8d8bef9SDimitry Andric 332e8d8bef9SDimitry Andric// Treat pure shift operations (except for RRX) as if they used the EX1 333e8d8bef9SDimitry Andric// shifter but have timing as if they used the EX2 shifter as they usually 334e8d8bef9SDimitry Andric// can choose the EX2 shifter when needed. Will miss a few dual-issue cases, 335e8d8bef9SDimitry Andric// but the results prove to be better than trying to get them exact. 336e8d8bef9SDimitry Andric 337e8d8bef9SDimitry Andricdef : InstRW<[M7WriteShift2, M7Read_ISS], (instregex "t2RRX$")>; 338e8d8bef9SDimitry Andricdef : InstRW<[WriteALUsi], (instregex "(t|t2)(LSL|LSR|ASR|ROR)")>; 339e8d8bef9SDimitry Andric 340e8d8bef9SDimitry Andric// Instructions that use the shifter, but have normal timing. 341e8d8bef9SDimitry Andric 342e8d8bef9SDimitry Andricdef : InstRW<[WriteALUsi,M7Slot0Only], (instregex "t2(BFC|BFI)$")>; 343e8d8bef9SDimitry Andric 344e8d8bef9SDimitry Andric// Instructions which are slot zero only but otherwise normal. 345e8d8bef9SDimitry Andric 346e8d8bef9SDimitry Andricdef : InstRW<[WriteALU, M7Slot0Only], (instregex "t2CLZ")>; 347e8d8bef9SDimitry Andric 348e8d8bef9SDimitry Andric// MAC operations that don't have SchedRW set. 349e8d8bef9SDimitry Andric 350e8d8bef9SDimitry Andricdef : InstRW<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC], (instregex "t2SML[AS]D")>; 351e8d8bef9SDimitry Andric 352e8d8bef9SDimitry Andric// Divides are special because they stall for their latency, and so look like a 353e8d8bef9SDimitry Andric// single-cycle as far as scheduling opportunities go. By putting WriteALU 354e8d8bef9SDimitry Andric// first, we make the operand latency 1, but keep the instruction latency 7. 355e8d8bef9SDimitry Andric 356e8d8bef9SDimitry Andricdef : InstRW<[WriteALU, WriteDIV], (instregex "t2(S|U)DIV")>; 357e8d8bef9SDimitry Andric 358e8d8bef9SDimitry Andric// DSP extension operations 359e8d8bef9SDimitry Andric 360e8d8bef9SDimitry Andricdef M7WriteSIMD1 : SchedWriteRes<[M7UnitSIMD, M7UnitALU]> { 361e8d8bef9SDimitry Andric let Latency = 1; 362e8d8bef9SDimitry Andric let BeginGroup = 1; 363e8d8bef9SDimitry Andric} 364e8d8bef9SDimitry Andricdef M7WriteSIMD2 : SchedWriteRes<[M7UnitSIMD, M7UnitALU]> { 365e8d8bef9SDimitry Andric let Latency = 2; 366e8d8bef9SDimitry Andric let BeginGroup = 1; 367e8d8bef9SDimitry Andric} 368e8d8bef9SDimitry Andricdef M7WriteShSIMD1 : SchedWriteRes<[M7UnitSIMD, M7UnitALU, M7UnitShift1]> { 369e8d8bef9SDimitry Andric let Latency = 1; 370e8d8bef9SDimitry Andric let BeginGroup = 1; 371e8d8bef9SDimitry Andric} 372e8d8bef9SDimitry Andricdef M7WriteShSIMD0 : SchedWriteRes<[M7UnitSIMD, M7UnitALU, M7UnitShift1]> { 373e8d8bef9SDimitry Andric let Latency = 0; // Bypassable out of EX1 374e8d8bef9SDimitry Andric let BeginGroup = 1; 375e8d8bef9SDimitry Andric} 376e8d8bef9SDimitry Andricdef M7WriteShSIMD2 : SchedWriteRes<[M7UnitSIMD, M7UnitALU, M7UnitShift1]> { 377e8d8bef9SDimitry Andric let Latency = 2; 378e8d8bef9SDimitry Andric let BeginGroup = 1; 379e8d8bef9SDimitry Andric} 380e8d8bef9SDimitry Andric 381e8d8bef9SDimitry Andricdef : InstRW<[M7WriteShSIMD2, M7Read_ISS], 382e8d8bef9SDimitry Andric (instregex "t2(S|U)SAT")>; 383e8d8bef9SDimitry Andricdef : InstRW<[M7WriteSIMD1, ReadALU], 384e8d8bef9SDimitry Andric (instregex "(t|t2)(S|U)XT(B|H)")>; 385e8d8bef9SDimitry Andricdef : InstRW<[M7WriteSIMD1, ReadALU, ReadALU], 386e8d8bef9SDimitry Andric (instregex "t2(S|SH|U|UH)(ADD16|ADD8|ASX|SAX|SUB16|SUB8)", 387e8d8bef9SDimitry Andric "t2SEL")>; 388e8d8bef9SDimitry Andricdef : InstRW<[M7WriteSIMD2, ReadALU, ReadALU], 389e8d8bef9SDimitry Andric (instregex "t2(Q|UQ)(ADD|ASX|SAX|SUB)", "t2USAD8")>; 390e8d8bef9SDimitry Andricdef : InstRW<[M7WriteShSIMD2, M7Read_ISS, M7Read_ISS], 391e8d8bef9SDimitry Andric (instregex "t2QD(ADD|SUB)")>; 392e8d8bef9SDimitry Andricdef : InstRW<[M7WriteShSIMD0, M7Read_ISS], 393e8d8bef9SDimitry Andric (instregex "t2(RBIT|REV)", "tREV")>; 394e8d8bef9SDimitry Andricdef : InstRW<[M7WriteShSIMD1, M7Read_ISS], 395e8d8bef9SDimitry Andric (instregex "t2(SBFX|UBFX)")>; 396e8d8bef9SDimitry Andricdef : InstRW<[M7WriteShSIMD1, ReadALU, M7Read_ISS], 397e8d8bef9SDimitry Andric (instregex "t2PKH(BT|TB)", "t2(S|U)XTA")>; 398e8d8bef9SDimitry Andricdef : InstRW<[M7WriteSIMD2, ReadALU, ReadALU, M7Read_EX2], 399e8d8bef9SDimitry Andric (instregex "t2USADA8")>; 400e8d8bef9SDimitry Andric 401e8d8bef9SDimitry Andric// MSR/MRS 402e8d8bef9SDimitry Andricdef : InstRW<[M7NonGeneralPurpose], (instregex "MSR", "MRS")>; 403e8d8bef9SDimitry Andric 404e8d8bef9SDimitry Andric//===---------------------------------------------------------------------===// 405e8d8bef9SDimitry Andric// Sched definitions for FP operations 406e8d8bef9SDimitry Andric// 407e8d8bef9SDimitry Andric 408e8d8bef9SDimitry Andric// Effective scheduling latency is really 3 for nearly all FP operations, 409e8d8bef9SDimitry Andric// even if their true latency is higher. 410e8d8bef9SDimitry Andricdef M7WriteVFPLatOverride : SchedWriteRes<[]> { 411e8d8bef9SDimitry Andric let Latency = 3; 412e8d8bef9SDimitry Andric let NumMicroOps = 0; 413e8d8bef9SDimitry Andric} 414e8d8bef9SDimitry Andricdef M7WriteVFPExtraVPort : SchedWriteRes<[M7UnitVPort]> { 415e8d8bef9SDimitry Andric let Latency = 3; 416e8d8bef9SDimitry Andric let NumMicroOps = 0; 417e8d8bef9SDimitry Andric} 418e8d8bef9SDimitry Andric 419e8d8bef9SDimitry Andric// Instructions which are missing default schedules. 420e8d8bef9SDimitry Andricdef : InstRW<[WriteFPALU32], 421e8d8bef9SDimitry Andric (instregex "V(ABS|CVT.*|NEG|FP_VMAX.*|FP_VMIN.*|RINT.*)S$")>; 422e8d8bef9SDimitry Andricdef : InstRW<[M7WriteVFPLatOverride, WriteFPALU64], 423e8d8bef9SDimitry Andric (instregex "V(ABS|CVT.*|NEG|FP_VMAX.*|FP_VMIN.*|RINT.*)D$")>; 424e8d8bef9SDimitry Andric 425e8d8bef9SDimitry Andric// VCMP 426e8d8bef9SDimitry Andricdef M7WriteVCMPS : SchedWriteRes<[M7UnitVFP, M7UnitVPort]> { let Latency = 0; } 427e8d8bef9SDimitry Andricdef M7WriteVCMPD : SchedWriteRes<[M7UnitVFP, M7UnitVPort, M7UnitVPort]> { 428e8d8bef9SDimitry Andric let Latency = 0; 429e8d8bef9SDimitry Andric let BeginGroup = 1; 430e8d8bef9SDimitry Andric} 431e8d8bef9SDimitry Andricdef : InstRW<[M7WriteVCMPS], (instregex "VCMPS$")>; 432e8d8bef9SDimitry Andricdef : InstRW<[M7WriteVCMPD], (instregex "VCMPD$")>; 433e8d8bef9SDimitry Andric 434e8d8bef9SDimitry Andric // VMRS/VMSR 435e8d8bef9SDimitry Andricdef M7VMRS : SchedWriteRes<[M7UnitVFP, M7UnitVPort]> { let SingleIssue = 1; } 436e8d8bef9SDimitry Andricdef M7VMSR : SchedWriteRes<[M7UnitVFP, M7UnitVPort]> { let SingleIssue = 1; } 437e8d8bef9SDimitry Andricdef : InstRW<[M7VMRS], (instregex "FMSTAT")>; 438e8d8bef9SDimitry Andricdef : InstRW<[M7VMSR], (instregex "VMSR")>; 439e8d8bef9SDimitry Andric 440e8d8bef9SDimitry Andric// VSEL cannot bypass in its implied $cpsr operand; model as earlier read 441e8d8bef9SDimitry Andricdef : InstRW<[WriteFPALU32, M7Slot0Only, ReadALU, ReadALU, M7Read_ISS], 442e8d8bef9SDimitry Andric (instregex "VSEL.*S$")>; 443e8d8bef9SDimitry Andricdef : InstRW<[M7WriteVFPLatOverride, WriteFPALU64, M7Slot0Only, 444e8d8bef9SDimitry Andric ReadALU, ReadALU, M7Read_ISS], 445e8d8bef9SDimitry Andric (instregex "VSEL.*D$")>; 446e8d8bef9SDimitry Andric 447e8d8bef9SDimitry Andric// VMOV 448e8d8bef9SDimitry Andricdef : InstRW<[WriteFPMOV], 449e8d8bef9SDimitry Andric (instregex "VMOV(H|S)$", "FCONST(H|S)")>; 450e8d8bef9SDimitry Andricdef : InstRW<[WriteFPMOV, M7WriteVFPExtraVPort, M7Slot0Only], 451e8d8bef9SDimitry Andric (instregex "VMOVD$")>; 452e8d8bef9SDimitry Andricdef : InstRW<[WriteFPMOV, M7WriteVFPExtraVPort, M7Slot0Only], 453e8d8bef9SDimitry Andric (instregex "FCONSTD")>; 454e8d8bef9SDimitry Andricdef : InstRW<[WriteFPMOV, M7WriteVFPExtraVPort, M7SingleIssue], 455e8d8bef9SDimitry Andric (instregex "VMOV(DRR|RRD|RRS|SRR)")>; 456e8d8bef9SDimitry Andric 457e8d8bef9SDimitry Andric// Larger-latency overrides. 458e8d8bef9SDimitry Andric 459e8d8bef9SDimitry Andricdef : InstRW<[M7WriteVFPLatOverride, WriteFPDIV32], (instregex "VDIVS")>; 460e8d8bef9SDimitry Andricdef : InstRW<[M7WriteVFPLatOverride, WriteFPDIV64], (instregex "VDIVD")>; 461e8d8bef9SDimitry Andricdef : InstRW<[M7WriteVFPLatOverride, WriteFPSQRT32], (instregex "VSQRTS")>; 462e8d8bef9SDimitry Andricdef : InstRW<[M7WriteVFPLatOverride, WriteFPSQRT64], (instregex "VSQRTD")>; 463e8d8bef9SDimitry Andricdef : InstRW<[M7WriteVFPLatOverride, WriteFPMUL64], 464e8d8bef9SDimitry Andric (instregex "V(MUL|NMUL)D")>; 465e8d8bef9SDimitry Andricdef : InstRW<[M7WriteVFPLatOverride, WriteFPALU64], 466e8d8bef9SDimitry Andric (instregex "V(ADD|SUB)D")>; 467e8d8bef9SDimitry Andric 468e8d8bef9SDimitry Andric// Multiply-accumulate. Chained SP timing is correct; rest need overrides 469e8d8bef9SDimitry Andric// Double-precision chained MAC stalls the pipeline behind it for 3 cycles, 470e8d8bef9SDimitry Andric// making it appear to have 3 cycle latency for scheduling. 471e8d8bef9SDimitry Andric 472e8d8bef9SDimitry Andricdef : InstRW<[M7WriteVFPLatOverride, WriteFPMAC64, 473e8d8bef9SDimitry Andric ReadFPMAC, ReadFPMUL, ReadFPMUL], 474e8d8bef9SDimitry Andric (instregex "V(N)?ML(A|S)D$")>; 475e8d8bef9SDimitry Andric 476e8d8bef9SDimitry Andric// Single-precision fused MACs look like latency 5 with advance of 2. 477e8d8bef9SDimitry Andric 478e8d8bef9SDimitry Andricdef M7WriteVFPLatOverride5 : SchedWriteRes<[]> { 479e8d8bef9SDimitry Andric let Latency = 5; 480e8d8bef9SDimitry Andric let NumMicroOps = 0; 481e8d8bef9SDimitry Andric} 482e8d8bef9SDimitry Andricdef M7ReadFPMAC2 : SchedReadAdvance<2>; 483e8d8bef9SDimitry Andric 484e8d8bef9SDimitry Andricdef : InstRW<[M7WriteVFPLatOverride5, WriteFPMAC32, 485e8d8bef9SDimitry Andric M7ReadFPMAC2, ReadFPMUL, ReadFPMUL], 486e8d8bef9SDimitry Andric (instregex "VF(N)?M(A|S)S$")>; 487e8d8bef9SDimitry Andric 488e8d8bef9SDimitry Andric// Double-precision fused MAC stalls the pipeline behind it for 2 cycles, making 489e8d8bef9SDimitry Andric// it appear to have 3 cycle latency for scheduling. 490e8d8bef9SDimitry Andric 491e8d8bef9SDimitry Andricdef : InstRW<[M7WriteVFPLatOverride, WriteFPMAC64, 492e8d8bef9SDimitry Andric ReadFPMAC, ReadFPMUL, ReadFPMUL], 493e8d8bef9SDimitry Andric (instregex "VF(N)?M(A|S)D$")>; 494e8d8bef9SDimitry Andric 495e8d8bef9SDimitry Andric} // SchedModel = CortexM7Model 496