1*21f7c626SNashe Mncube //===- ARMLatencyMutations.cpp - ARM Latency Mutations --------------------===// 2*21f7c626SNashe Mncube // 3*21f7c626SNashe Mncube // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4*21f7c626SNashe Mncube // See https://llvm.org/LICENSE.txt for license information. 5*21f7c626SNashe Mncube // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6*21f7c626SNashe Mncube // 7*21f7c626SNashe Mncube //===----------------------------------------------------------------------===// 8*21f7c626SNashe Mncube // 9*21f7c626SNashe Mncube /// \file This file contains the ARM definition DAG scheduling mutations which 10*21f7c626SNashe Mncube /// change inter-instruction latencies 11*21f7c626SNashe Mncube // 12*21f7c626SNashe Mncube //===----------------------------------------------------------------------===// 13*21f7c626SNashe Mncube 14*21f7c626SNashe Mncube #include "ARMLatencyMutations.h" 15*21f7c626SNashe Mncube #include "ARMSubtarget.h" 16*21f7c626SNashe Mncube #include "Thumb2InstrInfo.h" 17*21f7c626SNashe Mncube #include "llvm/Analysis/AliasAnalysis.h" 18*21f7c626SNashe Mncube #include "llvm/CodeGen/ScheduleDAG.h" 19*21f7c626SNashe Mncube #include "llvm/CodeGen/ScheduleDAGMutation.h" 20*21f7c626SNashe Mncube #include "llvm/CodeGen/TargetInstrInfo.h" 21*21f7c626SNashe Mncube #include <algorithm> 22*21f7c626SNashe Mncube #include <array> 23*21f7c626SNashe Mncube #include <initializer_list> 24*21f7c626SNashe Mncube #include <memory> 25*21f7c626SNashe Mncube 26*21f7c626SNashe Mncube namespace llvm { 27*21f7c626SNashe Mncube 28*21f7c626SNashe Mncube namespace { 29*21f7c626SNashe Mncube 30*21f7c626SNashe Mncube // Precompute information about opcodes to speed up pass 31*21f7c626SNashe Mncube 32*21f7c626SNashe Mncube class InstructionInformation { 33*21f7c626SNashe Mncube protected: 34*21f7c626SNashe Mncube struct IInfo { 35*21f7c626SNashe Mncube bool HasBRegAddr : 1; // B-side of addr gen is a register 36*21f7c626SNashe Mncube bool HasBRegAddrShift : 1; // B-side of addr gen has a shift 37*21f7c626SNashe Mncube bool IsDivide : 1; // Some form of integer divide 38*21f7c626SNashe Mncube bool IsInlineShiftALU : 1; // Inline shift+ALU 39*21f7c626SNashe Mncube bool IsMultiply : 1; // Some form of integer multiply 40*21f7c626SNashe Mncube bool IsMVEIntMAC : 1; // MVE 8/16/32-bit integer MAC operation 41*21f7c626SNashe Mncube bool IsNonSubwordLoad : 1; // Load which is a word or larger 42*21f7c626SNashe Mncube bool IsShift : 1; // Shift operation 43*21f7c626SNashe Mncube bool IsRev : 1; // REV operation 44*21f7c626SNashe Mncube bool ProducesQP : 1; // Produces a vector register result 45*21f7c626SNashe Mncube bool ProducesDP : 1; // Produces a double-precision register result 46*21f7c626SNashe Mncube bool ProducesSP : 1; // Produces a single-precision register result 47*21f7c626SNashe Mncube bool ConsumesQP : 1; // Consumes a vector register result 48*21f7c626SNashe Mncube bool ConsumesDP : 1; // Consumes a double-precision register result 49*21f7c626SNashe Mncube bool ConsumesSP : 1; // Consumes a single-precision register result 50*21f7c626SNashe Mncube unsigned MVEIntMACMatched; // Matched operand type (for MVE) 51*21f7c626SNashe Mncube unsigned AddressOpMask; // Mask indicating which operands go into AGU 52*21f7c626SNashe Mncube IInfo() 53*21f7c626SNashe Mncube : HasBRegAddr(false), HasBRegAddrShift(false), IsDivide(false), 54*21f7c626SNashe Mncube IsInlineShiftALU(false), IsMultiply(false), IsMVEIntMAC(false), 55*21f7c626SNashe Mncube IsNonSubwordLoad(false), IsShift(false), IsRev(false), 56*21f7c626SNashe Mncube ProducesQP(false), ProducesDP(false), ProducesSP(false), 57*21f7c626SNashe Mncube ConsumesQP(false), ConsumesDP(false), ConsumesSP(false), 58*21f7c626SNashe Mncube MVEIntMACMatched(0), AddressOpMask(0) {} 59*21f7c626SNashe Mncube }; 60*21f7c626SNashe Mncube typedef std::array<IInfo, ARM::INSTRUCTION_LIST_END> IInfoArray; 61*21f7c626SNashe Mncube IInfoArray Info; 62*21f7c626SNashe Mncube 63*21f7c626SNashe Mncube public: 64*21f7c626SNashe Mncube // Always available information 65*21f7c626SNashe Mncube unsigned getAddressOpMask(unsigned Op) { return Info[Op].AddressOpMask; } 66*21f7c626SNashe Mncube bool hasBRegAddr(unsigned Op) { return Info[Op].HasBRegAddr; } 67*21f7c626SNashe Mncube bool hasBRegAddrShift(unsigned Op) { return Info[Op].HasBRegAddrShift; } 68*21f7c626SNashe Mncube bool isDivide(unsigned Op) { return Info[Op].IsDivide; } 69*21f7c626SNashe Mncube bool isInlineShiftALU(unsigned Op) { return Info[Op].IsInlineShiftALU; } 70*21f7c626SNashe Mncube bool isMultiply(unsigned Op) { return Info[Op].IsMultiply; } 71*21f7c626SNashe Mncube bool isMVEIntMAC(unsigned Op) { return Info[Op].IsMVEIntMAC; } 72*21f7c626SNashe Mncube bool isNonSubwordLoad(unsigned Op) { return Info[Op].IsNonSubwordLoad; } 73*21f7c626SNashe Mncube bool isRev(unsigned Op) { return Info[Op].IsRev; } 74*21f7c626SNashe Mncube bool isShift(unsigned Op) { return Info[Op].IsShift; } 75*21f7c626SNashe Mncube 76*21f7c626SNashe Mncube // information available if markDPConsumers is called. 77*21f7c626SNashe Mncube bool producesQP(unsigned Op) { return Info[Op].ProducesQP; } 78*21f7c626SNashe Mncube bool producesDP(unsigned Op) { return Info[Op].ProducesDP; } 79*21f7c626SNashe Mncube bool producesSP(unsigned Op) { return Info[Op].ProducesSP; } 80*21f7c626SNashe Mncube bool consumesQP(unsigned Op) { return Info[Op].ConsumesQP; } 81*21f7c626SNashe Mncube bool consumesDP(unsigned Op) { return Info[Op].ConsumesDP; } 82*21f7c626SNashe Mncube bool consumesSP(unsigned Op) { return Info[Op].ConsumesSP; } 83*21f7c626SNashe Mncube 84*21f7c626SNashe Mncube bool isMVEIntMACMatched(unsigned SrcOp, unsigned DstOp) { 85*21f7c626SNashe Mncube return SrcOp == DstOp || Info[DstOp].MVEIntMACMatched == SrcOp; 86*21f7c626SNashe Mncube } 87*21f7c626SNashe Mncube 88*21f7c626SNashe Mncube InstructionInformation(const ARMBaseInstrInfo *TII); 89*21f7c626SNashe Mncube 90*21f7c626SNashe Mncube protected: 91*21f7c626SNashe Mncube void markDPProducersConsumers(const ARMBaseInstrInfo *TII); 92*21f7c626SNashe Mncube }; 93*21f7c626SNashe Mncube 94*21f7c626SNashe Mncube InstructionInformation::InstructionInformation(const ARMBaseInstrInfo *TII) { 95*21f7c626SNashe Mncube using namespace ARM; 96*21f7c626SNashe Mncube 97*21f7c626SNashe Mncube std::initializer_list<unsigned> hasBRegAddrList = { 98*21f7c626SNashe Mncube t2LDRs, t2LDRBs, t2LDRHs, t2STRs, t2STRBs, t2STRHs, 99*21f7c626SNashe Mncube tLDRr, tLDRBr, tLDRHr, tSTRr, tSTRBr, tSTRHr, 100*21f7c626SNashe Mncube }; 101*21f7c626SNashe Mncube for (auto op : hasBRegAddrList) { 102*21f7c626SNashe Mncube Info[op].HasBRegAddr = true; 103*21f7c626SNashe Mncube } 104*21f7c626SNashe Mncube 105*21f7c626SNashe Mncube std::initializer_list<unsigned> hasBRegAddrShiftList = { 106*21f7c626SNashe Mncube t2LDRs, t2LDRBs, t2LDRHs, t2STRs, t2STRBs, t2STRHs, 107*21f7c626SNashe Mncube }; 108*21f7c626SNashe Mncube for (auto op : hasBRegAddrShiftList) { 109*21f7c626SNashe Mncube Info[op].HasBRegAddrShift = true; 110*21f7c626SNashe Mncube } 111*21f7c626SNashe Mncube 112*21f7c626SNashe Mncube Info[t2SDIV].IsDivide = Info[t2UDIV].IsDivide = true; 113*21f7c626SNashe Mncube 114*21f7c626SNashe Mncube std::initializer_list<unsigned> isInlineShiftALUList = { 115*21f7c626SNashe Mncube t2ADCrs, t2ADDSrs, t2ADDrs, t2BICrs, t2EORrs, 116*21f7c626SNashe Mncube t2ORNrs, t2RSBSrs, t2RSBrs, t2SBCrs, t2SUBrs, 117*21f7c626SNashe Mncube t2SUBSrs, t2CMPrs, t2CMNzrs, t2TEQrs, t2TSTrs, 118*21f7c626SNashe Mncube }; 119*21f7c626SNashe Mncube for (auto op : isInlineShiftALUList) { 120*21f7c626SNashe Mncube Info[op].IsInlineShiftALU = true; 121*21f7c626SNashe Mncube } 122*21f7c626SNashe Mncube 123*21f7c626SNashe Mncube Info[t2SDIV].IsDivide = Info[t2UDIV].IsDivide = true; 124*21f7c626SNashe Mncube 125*21f7c626SNashe Mncube std::initializer_list<unsigned> isMultiplyList = { 126*21f7c626SNashe Mncube t2MUL, t2MLA, t2MLS, t2SMLABB, t2SMLABT, t2SMLAD, t2SMLADX, 127*21f7c626SNashe Mncube t2SMLAL, t2SMLALBB, t2SMLALBT, t2SMLALD, t2SMLALDX, t2SMLALTB, t2SMLALTT, 128*21f7c626SNashe Mncube t2SMLATB, t2SMLATT, t2SMLAWT, t2SMLSD, t2SMLSDX, t2SMLSLD, t2SMLSLDX, 129*21f7c626SNashe Mncube t2SMMLA, t2SMMLAR, t2SMMLS, t2SMMLSR, t2SMMUL, t2SMMULR, t2SMUAD, 130*21f7c626SNashe Mncube t2SMUADX, t2SMULBB, t2SMULBT, t2SMULL, t2SMULTB, t2SMULTT, t2SMULWT, 131*21f7c626SNashe Mncube t2SMUSD, t2SMUSDX, t2UMAAL, t2UMLAL, t2UMULL, tMUL, 132*21f7c626SNashe Mncube }; 133*21f7c626SNashe Mncube for (auto op : isMultiplyList) { 134*21f7c626SNashe Mncube Info[op].IsMultiply = true; 135*21f7c626SNashe Mncube } 136*21f7c626SNashe Mncube 137*21f7c626SNashe Mncube std::initializer_list<unsigned> isMVEIntMACList = { 138*21f7c626SNashe Mncube MVE_VMLAS_qr_i16, MVE_VMLAS_qr_i32, MVE_VMLAS_qr_i8, 139*21f7c626SNashe Mncube MVE_VMLA_qr_i16, MVE_VMLA_qr_i32, MVE_VMLA_qr_i8, 140*21f7c626SNashe Mncube MVE_VQDMLAH_qrs16, MVE_VQDMLAH_qrs32, MVE_VQDMLAH_qrs8, 141*21f7c626SNashe Mncube MVE_VQDMLASH_qrs16, MVE_VQDMLASH_qrs32, MVE_VQDMLASH_qrs8, 142*21f7c626SNashe Mncube MVE_VQRDMLAH_qrs16, MVE_VQRDMLAH_qrs32, MVE_VQRDMLAH_qrs8, 143*21f7c626SNashe Mncube MVE_VQRDMLASH_qrs16, MVE_VQRDMLASH_qrs32, MVE_VQRDMLASH_qrs8, 144*21f7c626SNashe Mncube MVE_VQDMLADHXs16, MVE_VQDMLADHXs32, MVE_VQDMLADHXs8, 145*21f7c626SNashe Mncube MVE_VQDMLADHs16, MVE_VQDMLADHs32, MVE_VQDMLADHs8, 146*21f7c626SNashe Mncube MVE_VQDMLSDHXs16, MVE_VQDMLSDHXs32, MVE_VQDMLSDHXs8, 147*21f7c626SNashe Mncube MVE_VQDMLSDHs16, MVE_VQDMLSDHs32, MVE_VQDMLSDHs8, 148*21f7c626SNashe Mncube MVE_VQRDMLADHXs16, MVE_VQRDMLADHXs32, MVE_VQRDMLADHXs8, 149*21f7c626SNashe Mncube MVE_VQRDMLADHs16, MVE_VQRDMLADHs32, MVE_VQRDMLADHs8, 150*21f7c626SNashe Mncube MVE_VQRDMLSDHXs16, MVE_VQRDMLSDHXs32, MVE_VQRDMLSDHXs8, 151*21f7c626SNashe Mncube MVE_VQRDMLSDHs16, MVE_VQRDMLSDHs32, MVE_VQRDMLSDHs8, 152*21f7c626SNashe Mncube }; 153*21f7c626SNashe Mncube for (auto op : isMVEIntMACList) { 154*21f7c626SNashe Mncube Info[op].IsMVEIntMAC = true; 155*21f7c626SNashe Mncube } 156*21f7c626SNashe Mncube 157*21f7c626SNashe Mncube std::initializer_list<unsigned> isNonSubwordLoadList = { 158*21f7c626SNashe Mncube t2LDRi12, t2LDRi8, t2LDR_POST, t2LDR_PRE, t2LDRpci, 159*21f7c626SNashe Mncube t2LDRs, t2LDRDi8, t2LDRD_POST, t2LDRD_PRE, tLDRi, 160*21f7c626SNashe Mncube tLDRpci, tLDRr, tLDRspi, 161*21f7c626SNashe Mncube }; 162*21f7c626SNashe Mncube for (auto op : isNonSubwordLoadList) { 163*21f7c626SNashe Mncube Info[op].IsNonSubwordLoad = true; 164*21f7c626SNashe Mncube } 165*21f7c626SNashe Mncube 166*21f7c626SNashe Mncube std::initializer_list<unsigned> isRevList = { 167*21f7c626SNashe Mncube t2REV, t2REV16, t2REVSH, t2RBIT, tREV, tREV16, tREVSH, 168*21f7c626SNashe Mncube }; 169*21f7c626SNashe Mncube for (auto op : isRevList) { 170*21f7c626SNashe Mncube Info[op].IsRev = true; 171*21f7c626SNashe Mncube } 172*21f7c626SNashe Mncube 173*21f7c626SNashe Mncube std::initializer_list<unsigned> isShiftList = { 174*21f7c626SNashe Mncube t2ASRri, t2ASRrr, t2LSLri, t2LSLrr, t2LSRri, t2LSRrr, t2RORri, t2RORrr, 175*21f7c626SNashe Mncube tASRri, tASRrr, tLSLSri, tLSLri, tLSLrr, tLSRri, tLSRrr, tROR, 176*21f7c626SNashe Mncube }; 177*21f7c626SNashe Mncube for (auto op : isShiftList) { 178*21f7c626SNashe Mncube Info[op].IsShift = true; 179*21f7c626SNashe Mncube } 180*21f7c626SNashe Mncube 181*21f7c626SNashe Mncube std::initializer_list<unsigned> Address1List = { 182*21f7c626SNashe Mncube t2LDRBi12, 183*21f7c626SNashe Mncube t2LDRBi8, 184*21f7c626SNashe Mncube t2LDRBpci, 185*21f7c626SNashe Mncube t2LDRBs, 186*21f7c626SNashe Mncube t2LDRHi12, 187*21f7c626SNashe Mncube t2LDRHi8, 188*21f7c626SNashe Mncube t2LDRHpci, 189*21f7c626SNashe Mncube t2LDRHs, 190*21f7c626SNashe Mncube t2LDRSBi12, 191*21f7c626SNashe Mncube t2LDRSBi8, 192*21f7c626SNashe Mncube t2LDRSBpci, 193*21f7c626SNashe Mncube t2LDRSBs, 194*21f7c626SNashe Mncube t2LDRSHi12, 195*21f7c626SNashe Mncube t2LDRSHi8, 196*21f7c626SNashe Mncube t2LDRSHpci, 197*21f7c626SNashe Mncube t2LDRSHs, 198*21f7c626SNashe Mncube t2LDRi12, 199*21f7c626SNashe Mncube t2LDRi8, 200*21f7c626SNashe Mncube t2LDRpci, 201*21f7c626SNashe Mncube t2LDRs, 202*21f7c626SNashe Mncube tLDRBi, 203*21f7c626SNashe Mncube tLDRBr, 204*21f7c626SNashe Mncube tLDRHi, 205*21f7c626SNashe Mncube tLDRHr, 206*21f7c626SNashe Mncube tLDRSB, 207*21f7c626SNashe Mncube tLDRSH, 208*21f7c626SNashe Mncube tLDRi, 209*21f7c626SNashe Mncube tLDRpci, 210*21f7c626SNashe Mncube tLDRr, 211*21f7c626SNashe Mncube tLDRspi, 212*21f7c626SNashe Mncube t2STRBi12, 213*21f7c626SNashe Mncube t2STRBi8, 214*21f7c626SNashe Mncube t2STRBs, 215*21f7c626SNashe Mncube t2STRHi12, 216*21f7c626SNashe Mncube t2STRHi8, 217*21f7c626SNashe Mncube t2STRHs, 218*21f7c626SNashe Mncube t2STRi12, 219*21f7c626SNashe Mncube t2STRi8, 220*21f7c626SNashe Mncube t2STRs, 221*21f7c626SNashe Mncube tSTRBi, 222*21f7c626SNashe Mncube tSTRBr, 223*21f7c626SNashe Mncube tSTRHi, 224*21f7c626SNashe Mncube tSTRHr, 225*21f7c626SNashe Mncube tSTRi, 226*21f7c626SNashe Mncube tSTRr, 227*21f7c626SNashe Mncube tSTRspi, 228*21f7c626SNashe Mncube VLDRD, 229*21f7c626SNashe Mncube VLDRH, 230*21f7c626SNashe Mncube VLDRS, 231*21f7c626SNashe Mncube VSTRD, 232*21f7c626SNashe Mncube VSTRH, 233*21f7c626SNashe Mncube VSTRS, 234*21f7c626SNashe Mncube MVE_VLD20_16, 235*21f7c626SNashe Mncube MVE_VLD20_32, 236*21f7c626SNashe Mncube MVE_VLD20_8, 237*21f7c626SNashe Mncube MVE_VLD21_16, 238*21f7c626SNashe Mncube MVE_VLD21_32, 239*21f7c626SNashe Mncube MVE_VLD21_8, 240*21f7c626SNashe Mncube MVE_VLD40_16, 241*21f7c626SNashe Mncube MVE_VLD40_32, 242*21f7c626SNashe Mncube MVE_VLD40_8, 243*21f7c626SNashe Mncube MVE_VLD41_16, 244*21f7c626SNashe Mncube MVE_VLD41_32, 245*21f7c626SNashe Mncube MVE_VLD41_8, 246*21f7c626SNashe Mncube MVE_VLD42_16, 247*21f7c626SNashe Mncube MVE_VLD42_32, 248*21f7c626SNashe Mncube MVE_VLD42_8, 249*21f7c626SNashe Mncube MVE_VLD43_16, 250*21f7c626SNashe Mncube MVE_VLD43_32, 251*21f7c626SNashe Mncube MVE_VLD43_8, 252*21f7c626SNashe Mncube MVE_VLDRBS16, 253*21f7c626SNashe Mncube MVE_VLDRBS16_rq, 254*21f7c626SNashe Mncube MVE_VLDRBS32, 255*21f7c626SNashe Mncube MVE_VLDRBS32_rq, 256*21f7c626SNashe Mncube MVE_VLDRBU16, 257*21f7c626SNashe Mncube MVE_VLDRBU16_rq, 258*21f7c626SNashe Mncube MVE_VLDRBU32, 259*21f7c626SNashe Mncube MVE_VLDRBU32_rq, 260*21f7c626SNashe Mncube MVE_VLDRBU8, 261*21f7c626SNashe Mncube MVE_VLDRBU8_rq, 262*21f7c626SNashe Mncube MVE_VLDRDU64_qi, 263*21f7c626SNashe Mncube MVE_VLDRDU64_rq, 264*21f7c626SNashe Mncube MVE_VLDRDU64_rq_u, 265*21f7c626SNashe Mncube MVE_VLDRHS32, 266*21f7c626SNashe Mncube MVE_VLDRHS32_rq, 267*21f7c626SNashe Mncube MVE_VLDRHS32_rq_u, 268*21f7c626SNashe Mncube MVE_VLDRHU16, 269*21f7c626SNashe Mncube MVE_VLDRHU16_rq, 270*21f7c626SNashe Mncube MVE_VLDRHU16_rq_u, 271*21f7c626SNashe Mncube MVE_VLDRHU32, 272*21f7c626SNashe Mncube MVE_VLDRHU32_rq, 273*21f7c626SNashe Mncube MVE_VLDRHU32_rq_u, 274*21f7c626SNashe Mncube MVE_VLDRWU32, 275*21f7c626SNashe Mncube MVE_VLDRWU32_qi, 276*21f7c626SNashe Mncube MVE_VLDRWU32_rq, 277*21f7c626SNashe Mncube MVE_VLDRWU32_rq_u, 278*21f7c626SNashe Mncube MVE_VST20_16, 279*21f7c626SNashe Mncube MVE_VST20_32, 280*21f7c626SNashe Mncube MVE_VST20_8, 281*21f7c626SNashe Mncube MVE_VST21_16, 282*21f7c626SNashe Mncube MVE_VST21_32, 283*21f7c626SNashe Mncube MVE_VST21_8, 284*21f7c626SNashe Mncube MVE_VST40_16, 285*21f7c626SNashe Mncube MVE_VST40_32, 286*21f7c626SNashe Mncube MVE_VST40_8, 287*21f7c626SNashe Mncube MVE_VST41_16, 288*21f7c626SNashe Mncube MVE_VST41_32, 289*21f7c626SNashe Mncube MVE_VST41_8, 290*21f7c626SNashe Mncube MVE_VST42_16, 291*21f7c626SNashe Mncube MVE_VST42_32, 292*21f7c626SNashe Mncube MVE_VST42_8, 293*21f7c626SNashe Mncube MVE_VST43_16, 294*21f7c626SNashe Mncube MVE_VST43_32, 295*21f7c626SNashe Mncube MVE_VST43_8, 296*21f7c626SNashe Mncube MVE_VSTRB16, 297*21f7c626SNashe Mncube MVE_VSTRB16_rq, 298*21f7c626SNashe Mncube MVE_VSTRB32, 299*21f7c626SNashe Mncube MVE_VSTRB32_rq, 300*21f7c626SNashe Mncube MVE_VSTRBU8, 301*21f7c626SNashe Mncube MVE_VSTRB8_rq, 302*21f7c626SNashe Mncube MVE_VSTRD64_qi, 303*21f7c626SNashe Mncube MVE_VSTRD64_rq, 304*21f7c626SNashe Mncube MVE_VSTRD64_rq_u, 305*21f7c626SNashe Mncube MVE_VSTRH32, 306*21f7c626SNashe Mncube MVE_VSTRH32_rq, 307*21f7c626SNashe Mncube MVE_VSTRH32_rq_u, 308*21f7c626SNashe Mncube MVE_VSTRHU16, 309*21f7c626SNashe Mncube MVE_VSTRH16_rq, 310*21f7c626SNashe Mncube MVE_VSTRH16_rq_u, 311*21f7c626SNashe Mncube MVE_VSTRWU32, 312*21f7c626SNashe Mncube MVE_VSTRW32_qi, 313*21f7c626SNashe Mncube MVE_VSTRW32_rq, 314*21f7c626SNashe Mncube MVE_VSTRW32_rq_u, 315*21f7c626SNashe Mncube }; 316*21f7c626SNashe Mncube std::initializer_list<unsigned> Address2List = { 317*21f7c626SNashe Mncube t2LDRB_POST, 318*21f7c626SNashe Mncube t2LDRB_PRE, 319*21f7c626SNashe Mncube t2LDRDi8, 320*21f7c626SNashe Mncube t2LDRH_POST, 321*21f7c626SNashe Mncube t2LDRH_PRE, 322*21f7c626SNashe Mncube t2LDRSB_POST, 323*21f7c626SNashe Mncube t2LDRSB_PRE, 324*21f7c626SNashe Mncube t2LDRSH_POST, 325*21f7c626SNashe Mncube t2LDRSH_PRE, 326*21f7c626SNashe Mncube t2LDR_POST, 327*21f7c626SNashe Mncube t2LDR_PRE, 328*21f7c626SNashe Mncube t2STRB_POST, 329*21f7c626SNashe Mncube t2STRB_PRE, 330*21f7c626SNashe Mncube t2STRDi8, 331*21f7c626SNashe Mncube t2STRH_POST, 332*21f7c626SNashe Mncube t2STRH_PRE, 333*21f7c626SNashe Mncube t2STR_POST, 334*21f7c626SNashe Mncube t2STR_PRE, 335*21f7c626SNashe Mncube MVE_VLD20_16_wb, 336*21f7c626SNashe Mncube MVE_VLD20_32_wb, 337*21f7c626SNashe Mncube MVE_VLD20_8_wb, 338*21f7c626SNashe Mncube MVE_VLD21_16_wb, 339*21f7c626SNashe Mncube MVE_VLD21_32_wb, 340*21f7c626SNashe Mncube MVE_VLD21_8_wb, 341*21f7c626SNashe Mncube MVE_VLD40_16_wb, 342*21f7c626SNashe Mncube MVE_VLD40_32_wb, 343*21f7c626SNashe Mncube MVE_VLD40_8_wb, 344*21f7c626SNashe Mncube MVE_VLD41_16_wb, 345*21f7c626SNashe Mncube MVE_VLD41_32_wb, 346*21f7c626SNashe Mncube MVE_VLD41_8_wb, 347*21f7c626SNashe Mncube MVE_VLD42_16_wb, 348*21f7c626SNashe Mncube MVE_VLD42_32_wb, 349*21f7c626SNashe Mncube MVE_VLD42_8_wb, 350*21f7c626SNashe Mncube MVE_VLD43_16_wb, 351*21f7c626SNashe Mncube MVE_VLD43_32_wb, 352*21f7c626SNashe Mncube MVE_VLD43_8_wb, 353*21f7c626SNashe Mncube MVE_VLDRBS16_post, 354*21f7c626SNashe Mncube MVE_VLDRBS16_pre, 355*21f7c626SNashe Mncube MVE_VLDRBS32_post, 356*21f7c626SNashe Mncube MVE_VLDRBS32_pre, 357*21f7c626SNashe Mncube MVE_VLDRBU16_post, 358*21f7c626SNashe Mncube MVE_VLDRBU16_pre, 359*21f7c626SNashe Mncube MVE_VLDRBU32_post, 360*21f7c626SNashe Mncube MVE_VLDRBU32_pre, 361*21f7c626SNashe Mncube MVE_VLDRBU8_post, 362*21f7c626SNashe Mncube MVE_VLDRBU8_pre, 363*21f7c626SNashe Mncube MVE_VLDRDU64_qi_pre, 364*21f7c626SNashe Mncube MVE_VLDRHS32_post, 365*21f7c626SNashe Mncube MVE_VLDRHS32_pre, 366*21f7c626SNashe Mncube MVE_VLDRHU16_post, 367*21f7c626SNashe Mncube MVE_VLDRHU16_pre, 368*21f7c626SNashe Mncube MVE_VLDRHU32_post, 369*21f7c626SNashe Mncube MVE_VLDRHU32_pre, 370*21f7c626SNashe Mncube MVE_VLDRWU32_post, 371*21f7c626SNashe Mncube MVE_VLDRWU32_pre, 372*21f7c626SNashe Mncube MVE_VLDRWU32_qi_pre, 373*21f7c626SNashe Mncube MVE_VST20_16_wb, 374*21f7c626SNashe Mncube MVE_VST20_32_wb, 375*21f7c626SNashe Mncube MVE_VST20_8_wb, 376*21f7c626SNashe Mncube MVE_VST21_16_wb, 377*21f7c626SNashe Mncube MVE_VST21_32_wb, 378*21f7c626SNashe Mncube MVE_VST21_8_wb, 379*21f7c626SNashe Mncube MVE_VST40_16_wb, 380*21f7c626SNashe Mncube MVE_VST40_32_wb, 381*21f7c626SNashe Mncube MVE_VST40_8_wb, 382*21f7c626SNashe Mncube MVE_VST41_16_wb, 383*21f7c626SNashe Mncube MVE_VST41_32_wb, 384*21f7c626SNashe Mncube MVE_VST41_8_wb, 385*21f7c626SNashe Mncube MVE_VST42_16_wb, 386*21f7c626SNashe Mncube MVE_VST42_32_wb, 387*21f7c626SNashe Mncube MVE_VST42_8_wb, 388*21f7c626SNashe Mncube MVE_VST43_16_wb, 389*21f7c626SNashe Mncube MVE_VST43_32_wb, 390*21f7c626SNashe Mncube MVE_VST43_8_wb, 391*21f7c626SNashe Mncube MVE_VSTRB16_post, 392*21f7c626SNashe Mncube MVE_VSTRB16_pre, 393*21f7c626SNashe Mncube MVE_VSTRB32_post, 394*21f7c626SNashe Mncube MVE_VSTRB32_pre, 395*21f7c626SNashe Mncube MVE_VSTRBU8_post, 396*21f7c626SNashe Mncube MVE_VSTRBU8_pre, 397*21f7c626SNashe Mncube MVE_VSTRD64_qi_pre, 398*21f7c626SNashe Mncube MVE_VSTRH32_post, 399*21f7c626SNashe Mncube MVE_VSTRH32_pre, 400*21f7c626SNashe Mncube MVE_VSTRHU16_post, 401*21f7c626SNashe Mncube MVE_VSTRHU16_pre, 402*21f7c626SNashe Mncube MVE_VSTRWU32_post, 403*21f7c626SNashe Mncube MVE_VSTRWU32_pre, 404*21f7c626SNashe Mncube MVE_VSTRW32_qi_pre, 405*21f7c626SNashe Mncube }; 406*21f7c626SNashe Mncube std::initializer_list<unsigned> Address3List = { 407*21f7c626SNashe Mncube t2LDRD_POST, 408*21f7c626SNashe Mncube t2LDRD_PRE, 409*21f7c626SNashe Mncube t2STRD_POST, 410*21f7c626SNashe Mncube t2STRD_PRE, 411*21f7c626SNashe Mncube }; 412*21f7c626SNashe Mncube // Compute a mask of which operands are involved in address computation 413*21f7c626SNashe Mncube for (auto &op : Address1List) { 414*21f7c626SNashe Mncube Info[op].AddressOpMask = 0x6; 415*21f7c626SNashe Mncube } 416*21f7c626SNashe Mncube for (auto &op : Address2List) { 417*21f7c626SNashe Mncube Info[op].AddressOpMask = 0xc; 418*21f7c626SNashe Mncube } 419*21f7c626SNashe Mncube for (auto &op : Address3List) { 420*21f7c626SNashe Mncube Info[op].AddressOpMask = 0x18; 421*21f7c626SNashe Mncube } 422*21f7c626SNashe Mncube for (auto &op : hasBRegAddrShiftList) { 423*21f7c626SNashe Mncube Info[op].AddressOpMask |= 0x8; 424*21f7c626SNashe Mncube } 425*21f7c626SNashe Mncube } 426*21f7c626SNashe Mncube 427*21f7c626SNashe Mncube void InstructionInformation::markDPProducersConsumers( 428*21f7c626SNashe Mncube const ARMBaseInstrInfo *TII) { 429*21f7c626SNashe Mncube // Learn about all instructions which have FP source/dest registers 430*21f7c626SNashe Mncube for (unsigned MI = 0; MI < ARM::INSTRUCTION_LIST_END; ++MI) { 431*21f7c626SNashe Mncube const MCInstrDesc &MID = TII->get(MI); 432*21f7c626SNashe Mncube auto Operands = MID.operands(); 433*21f7c626SNashe Mncube for (unsigned OI = 0, OIE = MID.getNumOperands(); OI != OIE; ++OI) { 434*21f7c626SNashe Mncube bool MarkQP = false, MarkDP = false, MarkSP = false; 435*21f7c626SNashe Mncube switch (Operands[OI].RegClass) { 436*21f7c626SNashe Mncube case ARM::MQPRRegClassID: 437*21f7c626SNashe Mncube case ARM::DPRRegClassID: 438*21f7c626SNashe Mncube case ARM::DPR_8RegClassID: 439*21f7c626SNashe Mncube case ARM::DPR_VFP2RegClassID: 440*21f7c626SNashe Mncube case ARM::DPairRegClassID: 441*21f7c626SNashe Mncube case ARM::DPairSpcRegClassID: 442*21f7c626SNashe Mncube case ARM::DQuadRegClassID: 443*21f7c626SNashe Mncube case ARM::DQuadSpcRegClassID: 444*21f7c626SNashe Mncube case ARM::DTripleRegClassID: 445*21f7c626SNashe Mncube case ARM::DTripleSpcRegClassID: 446*21f7c626SNashe Mncube MarkDP = true; 447*21f7c626SNashe Mncube break; 448*21f7c626SNashe Mncube case ARM::QPRRegClassID: 449*21f7c626SNashe Mncube case ARM::QPR_8RegClassID: 450*21f7c626SNashe Mncube case ARM::QPR_VFP2RegClassID: 451*21f7c626SNashe Mncube case ARM::QQPRRegClassID: 452*21f7c626SNashe Mncube case ARM::QQQQPRRegClassID: 453*21f7c626SNashe Mncube MarkQP = true; 454*21f7c626SNashe Mncube break; 455*21f7c626SNashe Mncube case ARM::SPRRegClassID: 456*21f7c626SNashe Mncube case ARM::SPR_8RegClassID: 457*21f7c626SNashe Mncube case ARM::FPWithVPRRegClassID: 458*21f7c626SNashe Mncube MarkSP = true; 459*21f7c626SNashe Mncube break; 460*21f7c626SNashe Mncube default: 461*21f7c626SNashe Mncube break; 462*21f7c626SNashe Mncube } 463*21f7c626SNashe Mncube if (MarkQP) { 464*21f7c626SNashe Mncube if (OI < MID.getNumDefs()) 465*21f7c626SNashe Mncube Info[MI].ProducesQP = true; 466*21f7c626SNashe Mncube else 467*21f7c626SNashe Mncube Info[MI].ConsumesQP = true; 468*21f7c626SNashe Mncube } 469*21f7c626SNashe Mncube if (MarkDP) { 470*21f7c626SNashe Mncube if (OI < MID.getNumDefs()) 471*21f7c626SNashe Mncube Info[MI].ProducesDP = true; 472*21f7c626SNashe Mncube else 473*21f7c626SNashe Mncube Info[MI].ConsumesDP = true; 474*21f7c626SNashe Mncube } 475*21f7c626SNashe Mncube if (MarkSP) { 476*21f7c626SNashe Mncube if (OI < MID.getNumDefs()) 477*21f7c626SNashe Mncube Info[MI].ProducesSP = true; 478*21f7c626SNashe Mncube else 479*21f7c626SNashe Mncube Info[MI].ConsumesSP = true; 480*21f7c626SNashe Mncube } 481*21f7c626SNashe Mncube } 482*21f7c626SNashe Mncube } 483*21f7c626SNashe Mncube } 484*21f7c626SNashe Mncube 485*21f7c626SNashe Mncube } // anonymous namespace 486*21f7c626SNashe Mncube 487*21f7c626SNashe Mncube static bool hasImplicitCPSRUse(const MachineInstr *MI) { 488*21f7c626SNashe Mncube return MI->getDesc().hasImplicitUseOfPhysReg(ARM::CPSR); 489*21f7c626SNashe Mncube } 490*21f7c626SNashe Mncube 491*21f7c626SNashe Mncube void ARMOverrideBypasses::setBidirLatencies(SUnit &SrcSU, SDep &SrcDep, 492*21f7c626SNashe Mncube unsigned latency) { 493*21f7c626SNashe Mncube SDep Reverse = SrcDep; 494*21f7c626SNashe Mncube Reverse.setSUnit(&SrcSU); 495*21f7c626SNashe Mncube for (SDep &PDep : SrcDep.getSUnit()->Preds) { 496*21f7c626SNashe Mncube if (PDep == Reverse) { 497*21f7c626SNashe Mncube PDep.setLatency(latency); 498*21f7c626SNashe Mncube SrcDep.getSUnit()->setDepthDirty(); 499*21f7c626SNashe Mncube break; 500*21f7c626SNashe Mncube } 501*21f7c626SNashe Mncube } 502*21f7c626SNashe Mncube SrcDep.setLatency(latency); 503*21f7c626SNashe Mncube SrcSU.setHeightDirty(); 504*21f7c626SNashe Mncube } 505*21f7c626SNashe Mncube 506*21f7c626SNashe Mncube static bool mismatchedPred(ARMCC::CondCodes a, ARMCC::CondCodes b) { 507*21f7c626SNashe Mncube return (a & 0xe) != (b & 0xe); 508*21f7c626SNashe Mncube } 509*21f7c626SNashe Mncube 510*21f7c626SNashe Mncube // Set output dependences to zero latency for processors which can 511*21f7c626SNashe Mncube // simultaneously issue to the same register. Returns true if a change 512*21f7c626SNashe Mncube // was made. 513*21f7c626SNashe Mncube bool ARMOverrideBypasses::zeroOutputDependences(SUnit &ISU, SDep &Dep) { 514*21f7c626SNashe Mncube if (Dep.getKind() == SDep::Output) { 515*21f7c626SNashe Mncube setBidirLatencies(ISU, Dep, 0); 516*21f7c626SNashe Mncube return true; 517*21f7c626SNashe Mncube } 518*21f7c626SNashe Mncube return false; 519*21f7c626SNashe Mncube } 520*21f7c626SNashe Mncube 521*21f7c626SNashe Mncube // The graph doesn't look inside of bundles to determine their 522*21f7c626SNashe Mncube // scheduling boundaries and reports zero latency into and out of them 523*21f7c626SNashe Mncube // (except for CPSR into the bundle, which has latency 1). 524*21f7c626SNashe Mncube // Make some better scheduling assumptions: 525*21f7c626SNashe Mncube // 1) CPSR uses have zero latency; other uses have incoming latency 1 526*21f7c626SNashe Mncube // 2) CPSR defs retain a latency of zero; others have a latency of 1. 527*21f7c626SNashe Mncube // 528*21f7c626SNashe Mncube // Returns 1 if a use change was made; 2 if a def change was made; 0 otherwise 529*21f7c626SNashe Mncube unsigned ARMOverrideBypasses::makeBundleAssumptions(SUnit &ISU, SDep &Dep) { 530*21f7c626SNashe Mncube 531*21f7c626SNashe Mncube SUnit &DepSU = *Dep.getSUnit(); 532*21f7c626SNashe Mncube const MachineInstr *SrcMI = ISU.getInstr(); 533*21f7c626SNashe Mncube unsigned SrcOpcode = SrcMI->getOpcode(); 534*21f7c626SNashe Mncube const MachineInstr *DstMI = DepSU.getInstr(); 535*21f7c626SNashe Mncube unsigned DstOpcode = DstMI->getOpcode(); 536*21f7c626SNashe Mncube 537*21f7c626SNashe Mncube if (DstOpcode == ARM::BUNDLE && TII->isPredicated(*DstMI)) { 538*21f7c626SNashe Mncube setBidirLatencies( 539*21f7c626SNashe Mncube ISU, Dep, 540*21f7c626SNashe Mncube (Dep.isAssignedRegDep() && Dep.getReg() == ARM::CPSR) ? 0 : 1); 541*21f7c626SNashe Mncube return 1; 542*21f7c626SNashe Mncube } 543*21f7c626SNashe Mncube if (SrcOpcode == ARM::BUNDLE && TII->isPredicated(*SrcMI) && 544*21f7c626SNashe Mncube Dep.isAssignedRegDep() && Dep.getReg() != ARM::CPSR) { 545*21f7c626SNashe Mncube setBidirLatencies(ISU, Dep, 1); 546*21f7c626SNashe Mncube return 2; 547*21f7c626SNashe Mncube } 548*21f7c626SNashe Mncube return 0; 549*21f7c626SNashe Mncube } 550*21f7c626SNashe Mncube 551*21f7c626SNashe Mncube // Determine whether there is a memory RAW hazard here and set up latency 552*21f7c626SNashe Mncube // accordingly 553*21f7c626SNashe Mncube bool ARMOverrideBypasses::memoryRAWHazard(SUnit &ISU, SDep &Dep, 554*21f7c626SNashe Mncube unsigned latency) { 555*21f7c626SNashe Mncube if (!Dep.isNormalMemory()) 556*21f7c626SNashe Mncube return false; 557*21f7c626SNashe Mncube auto &SrcInst = *ISU.getInstr(); 558*21f7c626SNashe Mncube auto &DstInst = *Dep.getSUnit()->getInstr(); 559*21f7c626SNashe Mncube if (!SrcInst.mayStore() || !DstInst.mayLoad()) 560*21f7c626SNashe Mncube return false; 561*21f7c626SNashe Mncube 562*21f7c626SNashe Mncube auto SrcMO = *SrcInst.memoperands().begin(); 563*21f7c626SNashe Mncube auto DstMO = *DstInst.memoperands().begin(); 564*21f7c626SNashe Mncube auto SrcVal = SrcMO->getValue(); 565*21f7c626SNashe Mncube auto DstVal = DstMO->getValue(); 566*21f7c626SNashe Mncube auto SrcPseudoVal = SrcMO->getPseudoValue(); 567*21f7c626SNashe Mncube auto DstPseudoVal = DstMO->getPseudoValue(); 568*21f7c626SNashe Mncube if (SrcVal && DstVal && AA->alias(SrcVal, DstVal) == AliasResult::MustAlias && 569*21f7c626SNashe Mncube SrcMO->getOffset() == DstMO->getOffset()) { 570*21f7c626SNashe Mncube setBidirLatencies(ISU, Dep, latency); 571*21f7c626SNashe Mncube return true; 572*21f7c626SNashe Mncube } else if (SrcPseudoVal && DstPseudoVal && 573*21f7c626SNashe Mncube SrcPseudoVal->kind() == DstPseudoVal->kind() && 574*21f7c626SNashe Mncube SrcPseudoVal->kind() == PseudoSourceValue::FixedStack) { 575*21f7c626SNashe Mncube // Spills/fills 576*21f7c626SNashe Mncube auto FS0 = cast<FixedStackPseudoSourceValue>(SrcPseudoVal); 577*21f7c626SNashe Mncube auto FS1 = cast<FixedStackPseudoSourceValue>(DstPseudoVal); 578*21f7c626SNashe Mncube if (FS0 == FS1) { 579*21f7c626SNashe Mncube setBidirLatencies(ISU, Dep, latency); 580*21f7c626SNashe Mncube return true; 581*21f7c626SNashe Mncube } 582*21f7c626SNashe Mncube } 583*21f7c626SNashe Mncube return false; 584*21f7c626SNashe Mncube } 585*21f7c626SNashe Mncube 586*21f7c626SNashe Mncube namespace { 587*21f7c626SNashe Mncube 588*21f7c626SNashe Mncube std::unique_ptr<InstructionInformation> II; 589*21f7c626SNashe Mncube 590*21f7c626SNashe Mncube class CortexM7InstructionInformation : public InstructionInformation { 591*21f7c626SNashe Mncube public: 592*21f7c626SNashe Mncube CortexM7InstructionInformation(const ARMBaseInstrInfo *TII) 593*21f7c626SNashe Mncube : InstructionInformation(TII) {} 594*21f7c626SNashe Mncube }; 595*21f7c626SNashe Mncube 596*21f7c626SNashe Mncube class CortexM7Overrides : public ARMOverrideBypasses { 597*21f7c626SNashe Mncube public: 598*21f7c626SNashe Mncube CortexM7Overrides(const ARMBaseInstrInfo *TII, AAResults *AA) 599*21f7c626SNashe Mncube : ARMOverrideBypasses(TII, AA) { 600*21f7c626SNashe Mncube if (!II) 601*21f7c626SNashe Mncube II.reset(new CortexM7InstructionInformation(TII)); 602*21f7c626SNashe Mncube } 603*21f7c626SNashe Mncube 604*21f7c626SNashe Mncube void modifyBypasses(SUnit &) override; 605*21f7c626SNashe Mncube }; 606*21f7c626SNashe Mncube 607*21f7c626SNashe Mncube void CortexM7Overrides::modifyBypasses(SUnit &ISU) { 608*21f7c626SNashe Mncube const MachineInstr *SrcMI = ISU.getInstr(); 609*21f7c626SNashe Mncube unsigned SrcOpcode = SrcMI->getOpcode(); 610*21f7c626SNashe Mncube bool isNSWload = II->isNonSubwordLoad(SrcOpcode); 611*21f7c626SNashe Mncube 612*21f7c626SNashe Mncube // Walk the successors looking for latency overrides that are needed 613*21f7c626SNashe Mncube for (SDep &Dep : ISU.Succs) { 614*21f7c626SNashe Mncube 615*21f7c626SNashe Mncube // Output dependences should have 0 latency, as M7 is able to 616*21f7c626SNashe Mncube // schedule writers to the same register for simultaneous issue. 617*21f7c626SNashe Mncube if (zeroOutputDependences(ISU, Dep)) 618*21f7c626SNashe Mncube continue; 619*21f7c626SNashe Mncube 620*21f7c626SNashe Mncube if (memoryRAWHazard(ISU, Dep, 4)) 621*21f7c626SNashe Mncube continue; 622*21f7c626SNashe Mncube 623*21f7c626SNashe Mncube // Ignore dependencies other than data 624*21f7c626SNashe Mncube if (Dep.getKind() != SDep::Data) 625*21f7c626SNashe Mncube continue; 626*21f7c626SNashe Mncube 627*21f7c626SNashe Mncube SUnit &DepSU = *Dep.getSUnit(); 628*21f7c626SNashe Mncube if (DepSU.isBoundaryNode()) 629*21f7c626SNashe Mncube continue; 630*21f7c626SNashe Mncube 631*21f7c626SNashe Mncube if (makeBundleAssumptions(ISU, Dep) == 1) 632*21f7c626SNashe Mncube continue; 633*21f7c626SNashe Mncube 634*21f7c626SNashe Mncube const MachineInstr *DstMI = DepSU.getInstr(); 635*21f7c626SNashe Mncube unsigned DstOpcode = DstMI->getOpcode(); 636*21f7c626SNashe Mncube 637*21f7c626SNashe Mncube // Word loads into any multiply or divide instruction are considered 638*21f7c626SNashe Mncube // cannot bypass their scheduling stage. Didn't do this in the .td file 639*21f7c626SNashe Mncube // because we cannot easily create a read advance that is 0 from certain 640*21f7c626SNashe Mncube // writer classes and 1 from all the rest. 641*21f7c626SNashe Mncube // (The other way around would have been easy.) 642*21f7c626SNashe Mncube if (isNSWload && (II->isMultiply(DstOpcode) || II->isDivide(DstOpcode))) 643*21f7c626SNashe Mncube setBidirLatencies(ISU, Dep, Dep.getLatency() + 1); 644*21f7c626SNashe Mncube 645*21f7c626SNashe Mncube // Word loads into B operand of a load/store are considered cannot bypass 646*21f7c626SNashe Mncube // their scheduling stage. Cannot do in the .td file because 647*21f7c626SNashe Mncube // need to decide between -1 and -2 for ReadAdvance 648*21f7c626SNashe Mncube if (isNSWload && II->hasBRegAddr(DstOpcode) && 649*21f7c626SNashe Mncube DstMI->getOperand(2).getReg() == Dep.getReg()) 650*21f7c626SNashe Mncube setBidirLatencies(ISU, Dep, Dep.getLatency() + 1); 651*21f7c626SNashe Mncube 652*21f7c626SNashe Mncube // Multiplies into any address generation cannot bypass from EX3. Cannot do 653*21f7c626SNashe Mncube // in the .td file because need to decide between -1 and -2 for ReadAdvance 654*21f7c626SNashe Mncube if (II->isMultiply(SrcOpcode)) { 655*21f7c626SNashe Mncube unsigned OpMask = II->getAddressOpMask(DstOpcode) >> 1; 656*21f7c626SNashe Mncube for (unsigned i = 1; OpMask; ++i, OpMask >>= 1) { 657*21f7c626SNashe Mncube if ((OpMask & 1) && DstMI->getOperand(i).isReg() && 658*21f7c626SNashe Mncube DstMI->getOperand(i).getReg() == Dep.getReg()) { 659*21f7c626SNashe Mncube setBidirLatencies(ISU, Dep, 4); // first legal bypass is EX4->EX1 660*21f7c626SNashe Mncube break; 661*21f7c626SNashe Mncube } 662*21f7c626SNashe Mncube } 663*21f7c626SNashe Mncube } 664*21f7c626SNashe Mncube 665*21f7c626SNashe Mncube // Mismatched conditional producers take longer on M7; they end up looking 666*21f7c626SNashe Mncube // like they were produced at EX3 and read at IS. 667*21f7c626SNashe Mncube if (TII->isPredicated(*SrcMI) && Dep.isAssignedRegDep() && 668*21f7c626SNashe Mncube (SrcOpcode == ARM::BUNDLE || 669*21f7c626SNashe Mncube mismatchedPred(TII->getPredicate(*SrcMI), 670*21f7c626SNashe Mncube TII->getPredicate(*DstMI)))) { 671*21f7c626SNashe Mncube unsigned Lat = 1; 672*21f7c626SNashe Mncube // Operand A of shift+ALU is treated as an EX1 read instead of EX2. 673*21f7c626SNashe Mncube if (II->isInlineShiftALU(DstOpcode) && DstMI->getOperand(3).getImm() && 674*21f7c626SNashe Mncube DstMI->getOperand(1).getReg() == Dep.getReg()) 675*21f7c626SNashe Mncube Lat = 2; 676*21f7c626SNashe Mncube Lat = std::min(3u, Dep.getLatency() + Lat); 677*21f7c626SNashe Mncube setBidirLatencies(ISU, Dep, std::max(Dep.getLatency(), Lat)); 678*21f7c626SNashe Mncube } 679*21f7c626SNashe Mncube 680*21f7c626SNashe Mncube // CC setter into conditional producer shouldn't have a latency of more 681*21f7c626SNashe Mncube // than 1 unless it's due to an implicit read. (All the "true" readers 682*21f7c626SNashe Mncube // of the condition code use an implicit read, and predicates use an 683*21f7c626SNashe Mncube // explicit.) 684*21f7c626SNashe Mncube if (Dep.isAssignedRegDep() && Dep.getReg() == ARM::CPSR && 685*21f7c626SNashe Mncube TII->isPredicated(*DstMI) && !hasImplicitCPSRUse(DstMI)) 686*21f7c626SNashe Mncube setBidirLatencies(ISU, Dep, 1); 687*21f7c626SNashe Mncube 688*21f7c626SNashe Mncube // REV instructions cannot bypass directly into the EX1 shifter. The 689*21f7c626SNashe Mncube // code is slightly inexact as it doesn't attempt to ensure that the bypass 690*21f7c626SNashe Mncube // is to the shifter operands. 691*21f7c626SNashe Mncube if (II->isRev(SrcOpcode)) { 692*21f7c626SNashe Mncube if (II->isInlineShiftALU(DstOpcode)) 693*21f7c626SNashe Mncube setBidirLatencies(ISU, Dep, 2); 694*21f7c626SNashe Mncube else if (II->isShift(DstOpcode)) 695*21f7c626SNashe Mncube setBidirLatencies(ISU, Dep, 1); 696*21f7c626SNashe Mncube } 697*21f7c626SNashe Mncube } 698*21f7c626SNashe Mncube } 699*21f7c626SNashe Mncube 700*21f7c626SNashe Mncube class M85InstructionInformation : public InstructionInformation { 701*21f7c626SNashe Mncube public: 702*21f7c626SNashe Mncube M85InstructionInformation(const ARMBaseInstrInfo *t) 703*21f7c626SNashe Mncube : InstructionInformation(t) { 704*21f7c626SNashe Mncube markDPProducersConsumers(t); 705*21f7c626SNashe Mncube } 706*21f7c626SNashe Mncube }; 707*21f7c626SNashe Mncube 708*21f7c626SNashe Mncube class M85Overrides : public ARMOverrideBypasses { 709*21f7c626SNashe Mncube public: 710*21f7c626SNashe Mncube M85Overrides(const ARMBaseInstrInfo *t, AAResults *a) 711*21f7c626SNashe Mncube : ARMOverrideBypasses(t, a) { 712*21f7c626SNashe Mncube if (!II) 713*21f7c626SNashe Mncube II.reset(new M85InstructionInformation(t)); 714*21f7c626SNashe Mncube } 715*21f7c626SNashe Mncube 716*21f7c626SNashe Mncube void modifyBypasses(SUnit &) override; 717*21f7c626SNashe Mncube 718*21f7c626SNashe Mncube private: 719*21f7c626SNashe Mncube unsigned computeBypassStage(const MCSchedClassDesc *SCD); 720*21f7c626SNashe Mncube signed modifyMixedWidthFP(const MachineInstr *SrcMI, 721*21f7c626SNashe Mncube const MachineInstr *DstMI, unsigned RegID, 722*21f7c626SNashe Mncube const MCSchedClassDesc *SCD); 723*21f7c626SNashe Mncube }; 724*21f7c626SNashe Mncube 725*21f7c626SNashe Mncube unsigned M85Overrides::computeBypassStage(const MCSchedClassDesc *SCDesc) { 726*21f7c626SNashe Mncube auto SM = DAG->getSchedModel(); 727*21f7c626SNashe Mncube unsigned DefIdx = 0; // just look for the first output's timing 728*21f7c626SNashe Mncube if (DefIdx < SCDesc->NumWriteLatencyEntries) { 729*21f7c626SNashe Mncube // Lookup the definition's write latency in SubtargetInfo. 730*21f7c626SNashe Mncube const MCWriteLatencyEntry *WLEntry = 731*21f7c626SNashe Mncube SM->getSubtargetInfo()->getWriteLatencyEntry(SCDesc, DefIdx); 732*21f7c626SNashe Mncube unsigned Latency = WLEntry->Cycles >= 0 ? WLEntry->Cycles : 1000; 733*21f7c626SNashe Mncube if (Latency == 4) 734*21f7c626SNashe Mncube return 2; 735*21f7c626SNashe Mncube else if (Latency == 5) 736*21f7c626SNashe Mncube return 3; 737*21f7c626SNashe Mncube else if (Latency > 3) 738*21f7c626SNashe Mncube return 3; 739*21f7c626SNashe Mncube else 740*21f7c626SNashe Mncube return Latency; 741*21f7c626SNashe Mncube } 742*21f7c626SNashe Mncube return 2; 743*21f7c626SNashe Mncube } 744*21f7c626SNashe Mncube 745*21f7c626SNashe Mncube // Latency changes for bypassing between FP registers of different sizes: 746*21f7c626SNashe Mncube // 747*21f7c626SNashe Mncube // Note that mixed DP/SP are unlikely because of the semantics 748*21f7c626SNashe Mncube // of C. Mixed MVE/SP are quite common when MVE intrinsics are used. 749*21f7c626SNashe Mncube signed M85Overrides::modifyMixedWidthFP(const MachineInstr *SrcMI, 750*21f7c626SNashe Mncube const MachineInstr *DstMI, 751*21f7c626SNashe Mncube unsigned RegID, 752*21f7c626SNashe Mncube const MCSchedClassDesc *SCD) { 753*21f7c626SNashe Mncube 754*21f7c626SNashe Mncube if (!II->producesSP(SrcMI->getOpcode()) && 755*21f7c626SNashe Mncube !II->producesDP(SrcMI->getOpcode()) && 756*21f7c626SNashe Mncube !II->producesQP(SrcMI->getOpcode())) 757*21f7c626SNashe Mncube return 0; 758*21f7c626SNashe Mncube 759*21f7c626SNashe Mncube if (Register::isVirtualRegister(RegID)) { 760*21f7c626SNashe Mncube if (II->producesSP(SrcMI->getOpcode()) && 761*21f7c626SNashe Mncube II->consumesDP(DstMI->getOpcode())) { 762*21f7c626SNashe Mncube for (auto &OP : SrcMI->operands()) 763*21f7c626SNashe Mncube if (OP.isReg() && OP.isDef() && OP.getReg() == RegID && 764*21f7c626SNashe Mncube OP.getSubReg() == ARM::ssub_1) 765*21f7c626SNashe Mncube return 5 - computeBypassStage(SCD); 766*21f7c626SNashe Mncube } else if (II->producesSP(SrcMI->getOpcode()) && 767*21f7c626SNashe Mncube II->consumesQP(DstMI->getOpcode())) { 768*21f7c626SNashe Mncube for (auto &OP : SrcMI->operands()) 769*21f7c626SNashe Mncube if (OP.isReg() && OP.isDef() && OP.getReg() == RegID && 770*21f7c626SNashe Mncube (OP.getSubReg() == ARM::ssub_1 || OP.getSubReg() == ARM::ssub_3)) 771*21f7c626SNashe Mncube return 5 - computeBypassStage(SCD) - 772*21f7c626SNashe Mncube ((OP.getSubReg() == ARM::ssub_2 || 773*21f7c626SNashe Mncube OP.getSubReg() == ARM::ssub_3) 774*21f7c626SNashe Mncube ? 1 775*21f7c626SNashe Mncube : 0); 776*21f7c626SNashe Mncube } else if (II->producesDP(SrcMI->getOpcode()) && 777*21f7c626SNashe Mncube II->consumesQP(DstMI->getOpcode())) { 778*21f7c626SNashe Mncube for (auto &OP : SrcMI->operands()) 779*21f7c626SNashe Mncube if (OP.isReg() && OP.isDef() && OP.getReg() == RegID && 780*21f7c626SNashe Mncube OP.getSubReg() == ARM::ssub_1) 781*21f7c626SNashe Mncube return -1; 782*21f7c626SNashe Mncube } else if (II->producesDP(SrcMI->getOpcode()) && 783*21f7c626SNashe Mncube II->consumesSP(DstMI->getOpcode())) { 784*21f7c626SNashe Mncube for (auto &OP : DstMI->operands()) 785*21f7c626SNashe Mncube if (OP.isReg() && OP.isUse() && OP.getReg() == RegID && 786*21f7c626SNashe Mncube OP.getSubReg() == ARM::ssub_1) 787*21f7c626SNashe Mncube return 5 - computeBypassStage(SCD); 788*21f7c626SNashe Mncube } else if (II->producesQP(SrcMI->getOpcode()) && 789*21f7c626SNashe Mncube II->consumesSP(DstMI->getOpcode())) { 790*21f7c626SNashe Mncube for (auto &OP : DstMI->operands()) 791*21f7c626SNashe Mncube if (OP.isReg() && OP.isUse() && OP.getReg() == RegID && 792*21f7c626SNashe Mncube (OP.getSubReg() == ARM::ssub_1 || OP.getSubReg() == ARM::ssub_3)) 793*21f7c626SNashe Mncube return 5 - computeBypassStage(SCD) + 794*21f7c626SNashe Mncube ((OP.getSubReg() == ARM::ssub_2 || 795*21f7c626SNashe Mncube OP.getSubReg() == ARM::ssub_3) 796*21f7c626SNashe Mncube ? 1 797*21f7c626SNashe Mncube : 0); 798*21f7c626SNashe Mncube } else if (II->producesQP(SrcMI->getOpcode()) && 799*21f7c626SNashe Mncube II->consumesDP(DstMI->getOpcode())) { 800*21f7c626SNashe Mncube for (auto &OP : DstMI->operands()) 801*21f7c626SNashe Mncube if (OP.isReg() && OP.isUse() && OP.getReg() == RegID && 802*21f7c626SNashe Mncube OP.getSubReg() == ARM::ssub_1) 803*21f7c626SNashe Mncube return 1; 804*21f7c626SNashe Mncube } 805*21f7c626SNashe Mncube } else if (Register::isPhysicalRegister(RegID)) { 806*21f7c626SNashe Mncube // Note that when the producer is narrower, not all of the producers 807*21f7c626SNashe Mncube // may be present in the scheduling graph; somewhere earlier in the 808*21f7c626SNashe Mncube // compiler, an implicit def/use of the aliased full register gets 809*21f7c626SNashe Mncube // added to the producer, and so only that producer is seen as *the* 810*21f7c626SNashe Mncube // single producer. This behavior also has the unfortunate effect of 811*21f7c626SNashe Mncube // serializing the producers in the compiler's view of things. 812*21f7c626SNashe Mncube if (II->producesSP(SrcMI->getOpcode()) && 813*21f7c626SNashe Mncube II->consumesDP(DstMI->getOpcode())) { 814*21f7c626SNashe Mncube for (auto &OP : SrcMI->operands()) 815*21f7c626SNashe Mncube if (OP.isReg() && OP.isDef() && OP.getReg() >= ARM::S1 && 816*21f7c626SNashe Mncube OP.getReg() <= ARM::S31 && (OP.getReg() - ARM::S0) % 2 && 817*21f7c626SNashe Mncube (OP.getReg() == RegID || 818*21f7c626SNashe Mncube (OP.getReg() - ARM::S0) / 2 + ARM::D0 == RegID || 819*21f7c626SNashe Mncube (OP.getReg() - ARM::S0) / 4 + ARM::Q0 == RegID)) 820*21f7c626SNashe Mncube return 5 - computeBypassStage(SCD); 821*21f7c626SNashe Mncube } else if (II->producesSP(SrcMI->getOpcode()) && 822*21f7c626SNashe Mncube II->consumesQP(DstMI->getOpcode())) { 823*21f7c626SNashe Mncube for (auto &OP : SrcMI->operands()) 824*21f7c626SNashe Mncube if (OP.isReg() && OP.isDef() && OP.getReg() >= ARM::S1 && 825*21f7c626SNashe Mncube OP.getReg() <= ARM::S31 && (OP.getReg() - ARM::S0) % 2 && 826*21f7c626SNashe Mncube (OP.getReg() == RegID || 827*21f7c626SNashe Mncube (OP.getReg() - ARM::S0) / 2 + ARM::D0 == RegID || 828*21f7c626SNashe Mncube (OP.getReg() - ARM::S0) / 4 + ARM::Q0 == RegID)) 829*21f7c626SNashe Mncube return 5 - computeBypassStage(SCD) - 830*21f7c626SNashe Mncube (((OP.getReg() - ARM::S0) / 2) % 2 ? 1 : 0); 831*21f7c626SNashe Mncube } else if (II->producesDP(SrcMI->getOpcode()) && 832*21f7c626SNashe Mncube II->consumesQP(DstMI->getOpcode())) { 833*21f7c626SNashe Mncube for (auto &OP : SrcMI->operands()) 834*21f7c626SNashe Mncube if (OP.isReg() && OP.isDef() && OP.getReg() >= ARM::D0 && 835*21f7c626SNashe Mncube OP.getReg() <= ARM::D15 && (OP.getReg() - ARM::D0) % 2 && 836*21f7c626SNashe Mncube (OP.getReg() == RegID || 837*21f7c626SNashe Mncube (OP.getReg() - ARM::D0) / 2 + ARM::Q0 == RegID)) 838*21f7c626SNashe Mncube return -1; 839*21f7c626SNashe Mncube } else if (II->producesDP(SrcMI->getOpcode()) && 840*21f7c626SNashe Mncube II->consumesSP(DstMI->getOpcode())) { 841*21f7c626SNashe Mncube if (RegID >= ARM::S1 && RegID <= ARM::S31 && (RegID - ARM::S0) % 2) 842*21f7c626SNashe Mncube return 5 - computeBypassStage(SCD); 843*21f7c626SNashe Mncube } else if (II->producesQP(SrcMI->getOpcode()) && 844*21f7c626SNashe Mncube II->consumesSP(DstMI->getOpcode())) { 845*21f7c626SNashe Mncube if (RegID >= ARM::S1 && RegID <= ARM::S31 && (RegID - ARM::S0) % 2) 846*21f7c626SNashe Mncube return 5 - computeBypassStage(SCD) + 847*21f7c626SNashe Mncube (((RegID - ARM::S0) / 2) % 2 ? 1 : 0); 848*21f7c626SNashe Mncube } else if (II->producesQP(SrcMI->getOpcode()) && 849*21f7c626SNashe Mncube II->consumesDP(DstMI->getOpcode())) { 850*21f7c626SNashe Mncube if (RegID >= ARM::D1 && RegID <= ARM::D15 && (RegID - ARM::D0) % 2) 851*21f7c626SNashe Mncube return 1; 852*21f7c626SNashe Mncube } 853*21f7c626SNashe Mncube } 854*21f7c626SNashe Mncube return 0; 855*21f7c626SNashe Mncube } 856*21f7c626SNashe Mncube 857*21f7c626SNashe Mncube void M85Overrides::modifyBypasses(SUnit &ISU) { 858*21f7c626SNashe Mncube const MachineInstr *SrcMI = ISU.getInstr(); 859*21f7c626SNashe Mncube unsigned SrcOpcode = SrcMI->getOpcode(); 860*21f7c626SNashe Mncube bool isNSWload = II->isNonSubwordLoad(SrcOpcode); 861*21f7c626SNashe Mncube 862*21f7c626SNashe Mncube // Walk the successors looking for latency overrides that are needed 863*21f7c626SNashe Mncube for (SDep &Dep : ISU.Succs) { 864*21f7c626SNashe Mncube 865*21f7c626SNashe Mncube // Output dependences should have 0 latency, as CortexM85 is able to 866*21f7c626SNashe Mncube // schedule writers to the same register for simultaneous issue. 867*21f7c626SNashe Mncube if (zeroOutputDependences(ISU, Dep)) 868*21f7c626SNashe Mncube continue; 869*21f7c626SNashe Mncube 870*21f7c626SNashe Mncube if (memoryRAWHazard(ISU, Dep, 3)) 871*21f7c626SNashe Mncube continue; 872*21f7c626SNashe Mncube 873*21f7c626SNashe Mncube // Ignore dependencies other than data or strong ordering. 874*21f7c626SNashe Mncube if (Dep.getKind() != SDep::Data) 875*21f7c626SNashe Mncube continue; 876*21f7c626SNashe Mncube 877*21f7c626SNashe Mncube SUnit &DepSU = *Dep.getSUnit(); 878*21f7c626SNashe Mncube if (DepSU.isBoundaryNode()) 879*21f7c626SNashe Mncube continue; 880*21f7c626SNashe Mncube 881*21f7c626SNashe Mncube if (makeBundleAssumptions(ISU, Dep) == 1) 882*21f7c626SNashe Mncube continue; 883*21f7c626SNashe Mncube 884*21f7c626SNashe Mncube const MachineInstr *DstMI = DepSU.getInstr(); 885*21f7c626SNashe Mncube unsigned DstOpcode = DstMI->getOpcode(); 886*21f7c626SNashe Mncube 887*21f7c626SNashe Mncube // Word loads into B operand of a load/store with cannot bypass their 888*21f7c626SNashe Mncube // scheduling stage. Cannot do in the .td file because need to decide 889*21f7c626SNashe Mncube // between -1 and -2 for ReadAdvance 890*21f7c626SNashe Mncube 891*21f7c626SNashe Mncube if (isNSWload && II->hasBRegAddrShift(DstOpcode) && 892*21f7c626SNashe Mncube DstMI->getOperand(3).getImm() != 0 && // shift operand 893*21f7c626SNashe Mncube DstMI->getOperand(2).getReg() == Dep.getReg()) 894*21f7c626SNashe Mncube setBidirLatencies(ISU, Dep, Dep.getLatency() + 1); 895*21f7c626SNashe Mncube 896*21f7c626SNashe Mncube if (isNSWload && isMVEVectorInstruction(DstMI)) { 897*21f7c626SNashe Mncube setBidirLatencies(ISU, Dep, Dep.getLatency() + 1); 898*21f7c626SNashe Mncube } 899*21f7c626SNashe Mncube 900*21f7c626SNashe Mncube if (II->isMVEIntMAC(DstOpcode) && 901*21f7c626SNashe Mncube II->isMVEIntMACMatched(SrcOpcode, DstOpcode) && 902*21f7c626SNashe Mncube DstMI->getOperand(0).isReg() && 903*21f7c626SNashe Mncube DstMI->getOperand(0).getReg() == Dep.getReg()) 904*21f7c626SNashe Mncube setBidirLatencies(ISU, Dep, Dep.getLatency() - 1); 905*21f7c626SNashe Mncube 906*21f7c626SNashe Mncube // CC setter into conditional producer shouldn't have a latency of more 907*21f7c626SNashe Mncube // than 0 unless it's due to an implicit read. 908*21f7c626SNashe Mncube if (Dep.isAssignedRegDep() && Dep.getReg() == ARM::CPSR && 909*21f7c626SNashe Mncube TII->isPredicated(*DstMI) && !hasImplicitCPSRUse(DstMI)) 910*21f7c626SNashe Mncube setBidirLatencies(ISU, Dep, 0); 911*21f7c626SNashe Mncube 912*21f7c626SNashe Mncube if (signed ALat = modifyMixedWidthFP(SrcMI, DstMI, Dep.getReg(), 913*21f7c626SNashe Mncube DAG->getSchedClass(&ISU))) 914*21f7c626SNashe Mncube setBidirLatencies(ISU, Dep, std::max(0, signed(Dep.getLatency()) + ALat)); 915*21f7c626SNashe Mncube 916*21f7c626SNashe Mncube if (II->isRev(SrcOpcode)) { 917*21f7c626SNashe Mncube if (II->isInlineShiftALU(DstOpcode)) 918*21f7c626SNashe Mncube setBidirLatencies(ISU, Dep, 1); 919*21f7c626SNashe Mncube else if (II->isShift(DstOpcode)) 920*21f7c626SNashe Mncube setBidirLatencies(ISU, Dep, 1); 921*21f7c626SNashe Mncube } 922*21f7c626SNashe Mncube } 923*21f7c626SNashe Mncube } 924*21f7c626SNashe Mncube 925*21f7c626SNashe Mncube // Add M55 specific overrides for latencies between instructions. Currently it: 926*21f7c626SNashe Mncube // - Adds an extra cycle latency between MVE VMLAV and scalar instructions. 927*21f7c626SNashe Mncube class CortexM55Overrides : public ARMOverrideBypasses { 928*21f7c626SNashe Mncube public: 929*21f7c626SNashe Mncube CortexM55Overrides(const ARMBaseInstrInfo *TII, AAResults *AA) 930*21f7c626SNashe Mncube : ARMOverrideBypasses(TII, AA) {} 931*21f7c626SNashe Mncube 932*21f7c626SNashe Mncube void modifyBypasses(SUnit &SU) override { 933*21f7c626SNashe Mncube MachineInstr *SrcMI = SU.getInstr(); 934*21f7c626SNashe Mncube if (!(SrcMI->getDesc().TSFlags & ARMII::HorizontalReduction)) 935*21f7c626SNashe Mncube return; 936*21f7c626SNashe Mncube 937*21f7c626SNashe Mncube for (SDep &Dep : SU.Succs) { 938*21f7c626SNashe Mncube if (Dep.getKind() != SDep::Data) 939*21f7c626SNashe Mncube continue; 940*21f7c626SNashe Mncube SUnit &DepSU = *Dep.getSUnit(); 941*21f7c626SNashe Mncube if (DepSU.isBoundaryNode()) 942*21f7c626SNashe Mncube continue; 943*21f7c626SNashe Mncube MachineInstr *DstMI = DepSU.getInstr(); 944*21f7c626SNashe Mncube 945*21f7c626SNashe Mncube if (!isMVEVectorInstruction(DstMI) && !DstMI->mayStore()) 946*21f7c626SNashe Mncube setBidirLatencies(SU, Dep, 3); 947*21f7c626SNashe Mncube } 948*21f7c626SNashe Mncube } 949*21f7c626SNashe Mncube }; 950*21f7c626SNashe Mncube 951*21f7c626SNashe Mncube } // end anonymous namespace 952*21f7c626SNashe Mncube 953*21f7c626SNashe Mncube void ARMOverrideBypasses::apply(ScheduleDAGInstrs *DAGInstrs) { 954*21f7c626SNashe Mncube DAG = DAGInstrs; 955*21f7c626SNashe Mncube for (SUnit &ISU : DAGInstrs->SUnits) { 956*21f7c626SNashe Mncube if (ISU.isBoundaryNode()) 957*21f7c626SNashe Mncube continue; 958*21f7c626SNashe Mncube modifyBypasses(ISU); 959*21f7c626SNashe Mncube } 960*21f7c626SNashe Mncube if (DAGInstrs->ExitSU.getInstr()) 961*21f7c626SNashe Mncube modifyBypasses(DAGInstrs->ExitSU); 962*21f7c626SNashe Mncube } 963*21f7c626SNashe Mncube 964*21f7c626SNashe Mncube std::unique_ptr<ScheduleDAGMutation> 965*21f7c626SNashe Mncube createARMLatencyMutations(const ARMSubtarget &ST, AAResults *AA) { 966*21f7c626SNashe Mncube if (ST.isCortexM85()) 967*21f7c626SNashe Mncube return std::make_unique<M85Overrides>(ST.getInstrInfo(), AA); 968*21f7c626SNashe Mncube else if (ST.isCortexM7()) 969*21f7c626SNashe Mncube return std::make_unique<CortexM7Overrides>(ST.getInstrInfo(), AA); 970*21f7c626SNashe Mncube else if (ST.isCortexM55()) 971*21f7c626SNashe Mncube return std::make_unique<CortexM55Overrides>(ST.getInstrInfo(), AA); 972*21f7c626SNashe Mncube 973*21f7c626SNashe Mncube return nullptr; 974*21f7c626SNashe Mncube } 975*21f7c626SNashe Mncube 976*21f7c626SNashe Mncube } // end namespace llvm 977