xref: /llvm-project/llvm/lib/Target/ARM/ARMLatencyMutations.cpp (revision 5be43db9b17e7cfc9e987f257221b0926551eb6e)
1*21f7c626SNashe Mncube //===- ARMLatencyMutations.cpp - ARM Latency Mutations --------------------===//
2*21f7c626SNashe Mncube //
3*21f7c626SNashe Mncube // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*21f7c626SNashe Mncube // See https://llvm.org/LICENSE.txt for license information.
5*21f7c626SNashe Mncube // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*21f7c626SNashe Mncube //
7*21f7c626SNashe Mncube //===----------------------------------------------------------------------===//
8*21f7c626SNashe Mncube //
9*21f7c626SNashe Mncube /// \file This file contains the ARM definition DAG scheduling mutations which
10*21f7c626SNashe Mncube /// change inter-instruction latencies
11*21f7c626SNashe Mncube //
12*21f7c626SNashe Mncube //===----------------------------------------------------------------------===//
13*21f7c626SNashe Mncube 
14*21f7c626SNashe Mncube #include "ARMLatencyMutations.h"
15*21f7c626SNashe Mncube #include "ARMSubtarget.h"
16*21f7c626SNashe Mncube #include "Thumb2InstrInfo.h"
17*21f7c626SNashe Mncube #include "llvm/Analysis/AliasAnalysis.h"
18*21f7c626SNashe Mncube #include "llvm/CodeGen/ScheduleDAG.h"
19*21f7c626SNashe Mncube #include "llvm/CodeGen/ScheduleDAGMutation.h"
20*21f7c626SNashe Mncube #include "llvm/CodeGen/TargetInstrInfo.h"
21*21f7c626SNashe Mncube #include <algorithm>
22*21f7c626SNashe Mncube #include <array>
23*21f7c626SNashe Mncube #include <initializer_list>
24*21f7c626SNashe Mncube #include <memory>
25*21f7c626SNashe Mncube 
26*21f7c626SNashe Mncube namespace llvm {
27*21f7c626SNashe Mncube 
28*21f7c626SNashe Mncube namespace {
29*21f7c626SNashe Mncube 
30*21f7c626SNashe Mncube // Precompute information about opcodes to speed up pass
31*21f7c626SNashe Mncube 
32*21f7c626SNashe Mncube class InstructionInformation {
33*21f7c626SNashe Mncube protected:
34*21f7c626SNashe Mncube   struct IInfo {
35*21f7c626SNashe Mncube     bool HasBRegAddr : 1;      // B-side of addr gen is a register
36*21f7c626SNashe Mncube     bool HasBRegAddrShift : 1; // B-side of addr gen has a shift
37*21f7c626SNashe Mncube     bool IsDivide : 1;         // Some form of integer divide
38*21f7c626SNashe Mncube     bool IsInlineShiftALU : 1; // Inline shift+ALU
39*21f7c626SNashe Mncube     bool IsMultiply : 1;       // Some form of integer multiply
40*21f7c626SNashe Mncube     bool IsMVEIntMAC : 1;      // MVE 8/16/32-bit integer MAC operation
41*21f7c626SNashe Mncube     bool IsNonSubwordLoad : 1; // Load which is a word or larger
42*21f7c626SNashe Mncube     bool IsShift : 1;          // Shift operation
43*21f7c626SNashe Mncube     bool IsRev : 1;            // REV operation
44*21f7c626SNashe Mncube     bool ProducesQP : 1;       // Produces a vector register result
45*21f7c626SNashe Mncube     bool ProducesDP : 1;       // Produces a double-precision register result
46*21f7c626SNashe Mncube     bool ProducesSP : 1;       // Produces a single-precision register result
47*21f7c626SNashe Mncube     bool ConsumesQP : 1;       // Consumes a vector register result
48*21f7c626SNashe Mncube     bool ConsumesDP : 1;       // Consumes a double-precision register result
49*21f7c626SNashe Mncube     bool ConsumesSP : 1;       // Consumes a single-precision register result
50*21f7c626SNashe Mncube     unsigned MVEIntMACMatched; // Matched operand type (for MVE)
51*21f7c626SNashe Mncube     unsigned AddressOpMask;    // Mask indicating which operands go into AGU
52*21f7c626SNashe Mncube     IInfo()
53*21f7c626SNashe Mncube         : HasBRegAddr(false), HasBRegAddrShift(false), IsDivide(false),
54*21f7c626SNashe Mncube           IsInlineShiftALU(false), IsMultiply(false), IsMVEIntMAC(false),
55*21f7c626SNashe Mncube           IsNonSubwordLoad(false), IsShift(false), IsRev(false),
56*21f7c626SNashe Mncube           ProducesQP(false), ProducesDP(false), ProducesSP(false),
57*21f7c626SNashe Mncube           ConsumesQP(false), ConsumesDP(false), ConsumesSP(false),
58*21f7c626SNashe Mncube           MVEIntMACMatched(0), AddressOpMask(0) {}
59*21f7c626SNashe Mncube   };
60*21f7c626SNashe Mncube   typedef std::array<IInfo, ARM::INSTRUCTION_LIST_END> IInfoArray;
61*21f7c626SNashe Mncube   IInfoArray Info;
62*21f7c626SNashe Mncube 
63*21f7c626SNashe Mncube public:
64*21f7c626SNashe Mncube   // Always available information
65*21f7c626SNashe Mncube   unsigned getAddressOpMask(unsigned Op) { return Info[Op].AddressOpMask; }
66*21f7c626SNashe Mncube   bool hasBRegAddr(unsigned Op) { return Info[Op].HasBRegAddr; }
67*21f7c626SNashe Mncube   bool hasBRegAddrShift(unsigned Op) { return Info[Op].HasBRegAddrShift; }
68*21f7c626SNashe Mncube   bool isDivide(unsigned Op) { return Info[Op].IsDivide; }
69*21f7c626SNashe Mncube   bool isInlineShiftALU(unsigned Op) { return Info[Op].IsInlineShiftALU; }
70*21f7c626SNashe Mncube   bool isMultiply(unsigned Op) { return Info[Op].IsMultiply; }
71*21f7c626SNashe Mncube   bool isMVEIntMAC(unsigned Op) { return Info[Op].IsMVEIntMAC; }
72*21f7c626SNashe Mncube   bool isNonSubwordLoad(unsigned Op) { return Info[Op].IsNonSubwordLoad; }
73*21f7c626SNashe Mncube   bool isRev(unsigned Op) { return Info[Op].IsRev; }
74*21f7c626SNashe Mncube   bool isShift(unsigned Op) { return Info[Op].IsShift; }
75*21f7c626SNashe Mncube 
76*21f7c626SNashe Mncube   // information available if markDPConsumers is called.
77*21f7c626SNashe Mncube   bool producesQP(unsigned Op) { return Info[Op].ProducesQP; }
78*21f7c626SNashe Mncube   bool producesDP(unsigned Op) { return Info[Op].ProducesDP; }
79*21f7c626SNashe Mncube   bool producesSP(unsigned Op) { return Info[Op].ProducesSP; }
80*21f7c626SNashe Mncube   bool consumesQP(unsigned Op) { return Info[Op].ConsumesQP; }
81*21f7c626SNashe Mncube   bool consumesDP(unsigned Op) { return Info[Op].ConsumesDP; }
82*21f7c626SNashe Mncube   bool consumesSP(unsigned Op) { return Info[Op].ConsumesSP; }
83*21f7c626SNashe Mncube 
84*21f7c626SNashe Mncube   bool isMVEIntMACMatched(unsigned SrcOp, unsigned DstOp) {
85*21f7c626SNashe Mncube     return SrcOp == DstOp || Info[DstOp].MVEIntMACMatched == SrcOp;
86*21f7c626SNashe Mncube   }
87*21f7c626SNashe Mncube 
88*21f7c626SNashe Mncube   InstructionInformation(const ARMBaseInstrInfo *TII);
89*21f7c626SNashe Mncube 
90*21f7c626SNashe Mncube protected:
91*21f7c626SNashe Mncube   void markDPProducersConsumers(const ARMBaseInstrInfo *TII);
92*21f7c626SNashe Mncube };
93*21f7c626SNashe Mncube 
94*21f7c626SNashe Mncube InstructionInformation::InstructionInformation(const ARMBaseInstrInfo *TII) {
95*21f7c626SNashe Mncube   using namespace ARM;
96*21f7c626SNashe Mncube 
97*21f7c626SNashe Mncube   std::initializer_list<unsigned> hasBRegAddrList = {
98*21f7c626SNashe Mncube       t2LDRs, t2LDRBs, t2LDRHs, t2STRs, t2STRBs, t2STRHs,
99*21f7c626SNashe Mncube       tLDRr,  tLDRBr,  tLDRHr,  tSTRr,  tSTRBr,  tSTRHr,
100*21f7c626SNashe Mncube   };
101*21f7c626SNashe Mncube   for (auto op : hasBRegAddrList) {
102*21f7c626SNashe Mncube     Info[op].HasBRegAddr = true;
103*21f7c626SNashe Mncube   }
104*21f7c626SNashe Mncube 
105*21f7c626SNashe Mncube   std::initializer_list<unsigned> hasBRegAddrShiftList = {
106*21f7c626SNashe Mncube       t2LDRs, t2LDRBs, t2LDRHs, t2STRs, t2STRBs, t2STRHs,
107*21f7c626SNashe Mncube   };
108*21f7c626SNashe Mncube   for (auto op : hasBRegAddrShiftList) {
109*21f7c626SNashe Mncube     Info[op].HasBRegAddrShift = true;
110*21f7c626SNashe Mncube   }
111*21f7c626SNashe Mncube 
112*21f7c626SNashe Mncube   Info[t2SDIV].IsDivide = Info[t2UDIV].IsDivide = true;
113*21f7c626SNashe Mncube 
114*21f7c626SNashe Mncube   std::initializer_list<unsigned> isInlineShiftALUList = {
115*21f7c626SNashe Mncube       t2ADCrs,  t2ADDSrs, t2ADDrs,  t2BICrs, t2EORrs,
116*21f7c626SNashe Mncube       t2ORNrs,  t2RSBSrs, t2RSBrs,  t2SBCrs, t2SUBrs,
117*21f7c626SNashe Mncube       t2SUBSrs, t2CMPrs,  t2CMNzrs, t2TEQrs, t2TSTrs,
118*21f7c626SNashe Mncube   };
119*21f7c626SNashe Mncube   for (auto op : isInlineShiftALUList) {
120*21f7c626SNashe Mncube     Info[op].IsInlineShiftALU = true;
121*21f7c626SNashe Mncube   }
122*21f7c626SNashe Mncube 
123*21f7c626SNashe Mncube   Info[t2SDIV].IsDivide = Info[t2UDIV].IsDivide = true;
124*21f7c626SNashe Mncube 
125*21f7c626SNashe Mncube   std::initializer_list<unsigned> isMultiplyList = {
126*21f7c626SNashe Mncube       t2MUL,    t2MLA,     t2MLS,     t2SMLABB, t2SMLABT,  t2SMLAD,   t2SMLADX,
127*21f7c626SNashe Mncube       t2SMLAL,  t2SMLALBB, t2SMLALBT, t2SMLALD, t2SMLALDX, t2SMLALTB, t2SMLALTT,
128*21f7c626SNashe Mncube       t2SMLATB, t2SMLATT,  t2SMLAWT,  t2SMLSD,  t2SMLSDX,  t2SMLSLD,  t2SMLSLDX,
129*21f7c626SNashe Mncube       t2SMMLA,  t2SMMLAR,  t2SMMLS,   t2SMMLSR, t2SMMUL,   t2SMMULR,  t2SMUAD,
130*21f7c626SNashe Mncube       t2SMUADX, t2SMULBB,  t2SMULBT,  t2SMULL,  t2SMULTB,  t2SMULTT,  t2SMULWT,
131*21f7c626SNashe Mncube       t2SMUSD,  t2SMUSDX,  t2UMAAL,   t2UMLAL,  t2UMULL,   tMUL,
132*21f7c626SNashe Mncube   };
133*21f7c626SNashe Mncube   for (auto op : isMultiplyList) {
134*21f7c626SNashe Mncube     Info[op].IsMultiply = true;
135*21f7c626SNashe Mncube   }
136*21f7c626SNashe Mncube 
137*21f7c626SNashe Mncube   std::initializer_list<unsigned> isMVEIntMACList = {
138*21f7c626SNashe Mncube       MVE_VMLAS_qr_i16,    MVE_VMLAS_qr_i32,    MVE_VMLAS_qr_i8,
139*21f7c626SNashe Mncube       MVE_VMLA_qr_i16,     MVE_VMLA_qr_i32,     MVE_VMLA_qr_i8,
140*21f7c626SNashe Mncube       MVE_VQDMLAH_qrs16,   MVE_VQDMLAH_qrs32,   MVE_VQDMLAH_qrs8,
141*21f7c626SNashe Mncube       MVE_VQDMLASH_qrs16,  MVE_VQDMLASH_qrs32,  MVE_VQDMLASH_qrs8,
142*21f7c626SNashe Mncube       MVE_VQRDMLAH_qrs16,  MVE_VQRDMLAH_qrs32,  MVE_VQRDMLAH_qrs8,
143*21f7c626SNashe Mncube       MVE_VQRDMLASH_qrs16, MVE_VQRDMLASH_qrs32, MVE_VQRDMLASH_qrs8,
144*21f7c626SNashe Mncube       MVE_VQDMLADHXs16,    MVE_VQDMLADHXs32,    MVE_VQDMLADHXs8,
145*21f7c626SNashe Mncube       MVE_VQDMLADHs16,     MVE_VQDMLADHs32,     MVE_VQDMLADHs8,
146*21f7c626SNashe Mncube       MVE_VQDMLSDHXs16,    MVE_VQDMLSDHXs32,    MVE_VQDMLSDHXs8,
147*21f7c626SNashe Mncube       MVE_VQDMLSDHs16,     MVE_VQDMLSDHs32,     MVE_VQDMLSDHs8,
148*21f7c626SNashe Mncube       MVE_VQRDMLADHXs16,   MVE_VQRDMLADHXs32,   MVE_VQRDMLADHXs8,
149*21f7c626SNashe Mncube       MVE_VQRDMLADHs16,    MVE_VQRDMLADHs32,    MVE_VQRDMLADHs8,
150*21f7c626SNashe Mncube       MVE_VQRDMLSDHXs16,   MVE_VQRDMLSDHXs32,   MVE_VQRDMLSDHXs8,
151*21f7c626SNashe Mncube       MVE_VQRDMLSDHs16,    MVE_VQRDMLSDHs32,    MVE_VQRDMLSDHs8,
152*21f7c626SNashe Mncube   };
153*21f7c626SNashe Mncube   for (auto op : isMVEIntMACList) {
154*21f7c626SNashe Mncube     Info[op].IsMVEIntMAC = true;
155*21f7c626SNashe Mncube   }
156*21f7c626SNashe Mncube 
157*21f7c626SNashe Mncube   std::initializer_list<unsigned> isNonSubwordLoadList = {
158*21f7c626SNashe Mncube       t2LDRi12, t2LDRi8,  t2LDR_POST,  t2LDR_PRE,  t2LDRpci,
159*21f7c626SNashe Mncube       t2LDRs,   t2LDRDi8, t2LDRD_POST, t2LDRD_PRE, tLDRi,
160*21f7c626SNashe Mncube       tLDRpci,  tLDRr,    tLDRspi,
161*21f7c626SNashe Mncube   };
162*21f7c626SNashe Mncube   for (auto op : isNonSubwordLoadList) {
163*21f7c626SNashe Mncube     Info[op].IsNonSubwordLoad = true;
164*21f7c626SNashe Mncube   }
165*21f7c626SNashe Mncube 
166*21f7c626SNashe Mncube   std::initializer_list<unsigned> isRevList = {
167*21f7c626SNashe Mncube       t2REV, t2REV16, t2REVSH, t2RBIT, tREV, tREV16, tREVSH,
168*21f7c626SNashe Mncube   };
169*21f7c626SNashe Mncube   for (auto op : isRevList) {
170*21f7c626SNashe Mncube     Info[op].IsRev = true;
171*21f7c626SNashe Mncube   }
172*21f7c626SNashe Mncube 
173*21f7c626SNashe Mncube   std::initializer_list<unsigned> isShiftList = {
174*21f7c626SNashe Mncube       t2ASRri, t2ASRrr, t2LSLri, t2LSLrr, t2LSRri, t2LSRrr, t2RORri, t2RORrr,
175*21f7c626SNashe Mncube       tASRri,  tASRrr,  tLSLSri, tLSLri,  tLSLrr,  tLSRri,  tLSRrr,  tROR,
176*21f7c626SNashe Mncube   };
177*21f7c626SNashe Mncube   for (auto op : isShiftList) {
178*21f7c626SNashe Mncube     Info[op].IsShift = true;
179*21f7c626SNashe Mncube   }
180*21f7c626SNashe Mncube 
181*21f7c626SNashe Mncube   std::initializer_list<unsigned> Address1List = {
182*21f7c626SNashe Mncube       t2LDRBi12,
183*21f7c626SNashe Mncube       t2LDRBi8,
184*21f7c626SNashe Mncube       t2LDRBpci,
185*21f7c626SNashe Mncube       t2LDRBs,
186*21f7c626SNashe Mncube       t2LDRHi12,
187*21f7c626SNashe Mncube       t2LDRHi8,
188*21f7c626SNashe Mncube       t2LDRHpci,
189*21f7c626SNashe Mncube       t2LDRHs,
190*21f7c626SNashe Mncube       t2LDRSBi12,
191*21f7c626SNashe Mncube       t2LDRSBi8,
192*21f7c626SNashe Mncube       t2LDRSBpci,
193*21f7c626SNashe Mncube       t2LDRSBs,
194*21f7c626SNashe Mncube       t2LDRSHi12,
195*21f7c626SNashe Mncube       t2LDRSHi8,
196*21f7c626SNashe Mncube       t2LDRSHpci,
197*21f7c626SNashe Mncube       t2LDRSHs,
198*21f7c626SNashe Mncube       t2LDRi12,
199*21f7c626SNashe Mncube       t2LDRi8,
200*21f7c626SNashe Mncube       t2LDRpci,
201*21f7c626SNashe Mncube       t2LDRs,
202*21f7c626SNashe Mncube       tLDRBi,
203*21f7c626SNashe Mncube       tLDRBr,
204*21f7c626SNashe Mncube       tLDRHi,
205*21f7c626SNashe Mncube       tLDRHr,
206*21f7c626SNashe Mncube       tLDRSB,
207*21f7c626SNashe Mncube       tLDRSH,
208*21f7c626SNashe Mncube       tLDRi,
209*21f7c626SNashe Mncube       tLDRpci,
210*21f7c626SNashe Mncube       tLDRr,
211*21f7c626SNashe Mncube       tLDRspi,
212*21f7c626SNashe Mncube       t2STRBi12,
213*21f7c626SNashe Mncube       t2STRBi8,
214*21f7c626SNashe Mncube       t2STRBs,
215*21f7c626SNashe Mncube       t2STRHi12,
216*21f7c626SNashe Mncube       t2STRHi8,
217*21f7c626SNashe Mncube       t2STRHs,
218*21f7c626SNashe Mncube       t2STRi12,
219*21f7c626SNashe Mncube       t2STRi8,
220*21f7c626SNashe Mncube       t2STRs,
221*21f7c626SNashe Mncube       tSTRBi,
222*21f7c626SNashe Mncube       tSTRBr,
223*21f7c626SNashe Mncube       tSTRHi,
224*21f7c626SNashe Mncube       tSTRHr,
225*21f7c626SNashe Mncube       tSTRi,
226*21f7c626SNashe Mncube       tSTRr,
227*21f7c626SNashe Mncube       tSTRspi,
228*21f7c626SNashe Mncube       VLDRD,
229*21f7c626SNashe Mncube       VLDRH,
230*21f7c626SNashe Mncube       VLDRS,
231*21f7c626SNashe Mncube       VSTRD,
232*21f7c626SNashe Mncube       VSTRH,
233*21f7c626SNashe Mncube       VSTRS,
234*21f7c626SNashe Mncube       MVE_VLD20_16,
235*21f7c626SNashe Mncube       MVE_VLD20_32,
236*21f7c626SNashe Mncube       MVE_VLD20_8,
237*21f7c626SNashe Mncube       MVE_VLD21_16,
238*21f7c626SNashe Mncube       MVE_VLD21_32,
239*21f7c626SNashe Mncube       MVE_VLD21_8,
240*21f7c626SNashe Mncube       MVE_VLD40_16,
241*21f7c626SNashe Mncube       MVE_VLD40_32,
242*21f7c626SNashe Mncube       MVE_VLD40_8,
243*21f7c626SNashe Mncube       MVE_VLD41_16,
244*21f7c626SNashe Mncube       MVE_VLD41_32,
245*21f7c626SNashe Mncube       MVE_VLD41_8,
246*21f7c626SNashe Mncube       MVE_VLD42_16,
247*21f7c626SNashe Mncube       MVE_VLD42_32,
248*21f7c626SNashe Mncube       MVE_VLD42_8,
249*21f7c626SNashe Mncube       MVE_VLD43_16,
250*21f7c626SNashe Mncube       MVE_VLD43_32,
251*21f7c626SNashe Mncube       MVE_VLD43_8,
252*21f7c626SNashe Mncube       MVE_VLDRBS16,
253*21f7c626SNashe Mncube       MVE_VLDRBS16_rq,
254*21f7c626SNashe Mncube       MVE_VLDRBS32,
255*21f7c626SNashe Mncube       MVE_VLDRBS32_rq,
256*21f7c626SNashe Mncube       MVE_VLDRBU16,
257*21f7c626SNashe Mncube       MVE_VLDRBU16_rq,
258*21f7c626SNashe Mncube       MVE_VLDRBU32,
259*21f7c626SNashe Mncube       MVE_VLDRBU32_rq,
260*21f7c626SNashe Mncube       MVE_VLDRBU8,
261*21f7c626SNashe Mncube       MVE_VLDRBU8_rq,
262*21f7c626SNashe Mncube       MVE_VLDRDU64_qi,
263*21f7c626SNashe Mncube       MVE_VLDRDU64_rq,
264*21f7c626SNashe Mncube       MVE_VLDRDU64_rq_u,
265*21f7c626SNashe Mncube       MVE_VLDRHS32,
266*21f7c626SNashe Mncube       MVE_VLDRHS32_rq,
267*21f7c626SNashe Mncube       MVE_VLDRHS32_rq_u,
268*21f7c626SNashe Mncube       MVE_VLDRHU16,
269*21f7c626SNashe Mncube       MVE_VLDRHU16_rq,
270*21f7c626SNashe Mncube       MVE_VLDRHU16_rq_u,
271*21f7c626SNashe Mncube       MVE_VLDRHU32,
272*21f7c626SNashe Mncube       MVE_VLDRHU32_rq,
273*21f7c626SNashe Mncube       MVE_VLDRHU32_rq_u,
274*21f7c626SNashe Mncube       MVE_VLDRWU32,
275*21f7c626SNashe Mncube       MVE_VLDRWU32_qi,
276*21f7c626SNashe Mncube       MVE_VLDRWU32_rq,
277*21f7c626SNashe Mncube       MVE_VLDRWU32_rq_u,
278*21f7c626SNashe Mncube       MVE_VST20_16,
279*21f7c626SNashe Mncube       MVE_VST20_32,
280*21f7c626SNashe Mncube       MVE_VST20_8,
281*21f7c626SNashe Mncube       MVE_VST21_16,
282*21f7c626SNashe Mncube       MVE_VST21_32,
283*21f7c626SNashe Mncube       MVE_VST21_8,
284*21f7c626SNashe Mncube       MVE_VST40_16,
285*21f7c626SNashe Mncube       MVE_VST40_32,
286*21f7c626SNashe Mncube       MVE_VST40_8,
287*21f7c626SNashe Mncube       MVE_VST41_16,
288*21f7c626SNashe Mncube       MVE_VST41_32,
289*21f7c626SNashe Mncube       MVE_VST41_8,
290*21f7c626SNashe Mncube       MVE_VST42_16,
291*21f7c626SNashe Mncube       MVE_VST42_32,
292*21f7c626SNashe Mncube       MVE_VST42_8,
293*21f7c626SNashe Mncube       MVE_VST43_16,
294*21f7c626SNashe Mncube       MVE_VST43_32,
295*21f7c626SNashe Mncube       MVE_VST43_8,
296*21f7c626SNashe Mncube       MVE_VSTRB16,
297*21f7c626SNashe Mncube       MVE_VSTRB16_rq,
298*21f7c626SNashe Mncube       MVE_VSTRB32,
299*21f7c626SNashe Mncube       MVE_VSTRB32_rq,
300*21f7c626SNashe Mncube       MVE_VSTRBU8,
301*21f7c626SNashe Mncube       MVE_VSTRB8_rq,
302*21f7c626SNashe Mncube       MVE_VSTRD64_qi,
303*21f7c626SNashe Mncube       MVE_VSTRD64_rq,
304*21f7c626SNashe Mncube       MVE_VSTRD64_rq_u,
305*21f7c626SNashe Mncube       MVE_VSTRH32,
306*21f7c626SNashe Mncube       MVE_VSTRH32_rq,
307*21f7c626SNashe Mncube       MVE_VSTRH32_rq_u,
308*21f7c626SNashe Mncube       MVE_VSTRHU16,
309*21f7c626SNashe Mncube       MVE_VSTRH16_rq,
310*21f7c626SNashe Mncube       MVE_VSTRH16_rq_u,
311*21f7c626SNashe Mncube       MVE_VSTRWU32,
312*21f7c626SNashe Mncube       MVE_VSTRW32_qi,
313*21f7c626SNashe Mncube       MVE_VSTRW32_rq,
314*21f7c626SNashe Mncube       MVE_VSTRW32_rq_u,
315*21f7c626SNashe Mncube   };
316*21f7c626SNashe Mncube   std::initializer_list<unsigned> Address2List = {
317*21f7c626SNashe Mncube       t2LDRB_POST,
318*21f7c626SNashe Mncube       t2LDRB_PRE,
319*21f7c626SNashe Mncube       t2LDRDi8,
320*21f7c626SNashe Mncube       t2LDRH_POST,
321*21f7c626SNashe Mncube       t2LDRH_PRE,
322*21f7c626SNashe Mncube       t2LDRSB_POST,
323*21f7c626SNashe Mncube       t2LDRSB_PRE,
324*21f7c626SNashe Mncube       t2LDRSH_POST,
325*21f7c626SNashe Mncube       t2LDRSH_PRE,
326*21f7c626SNashe Mncube       t2LDR_POST,
327*21f7c626SNashe Mncube       t2LDR_PRE,
328*21f7c626SNashe Mncube       t2STRB_POST,
329*21f7c626SNashe Mncube       t2STRB_PRE,
330*21f7c626SNashe Mncube       t2STRDi8,
331*21f7c626SNashe Mncube       t2STRH_POST,
332*21f7c626SNashe Mncube       t2STRH_PRE,
333*21f7c626SNashe Mncube       t2STR_POST,
334*21f7c626SNashe Mncube       t2STR_PRE,
335*21f7c626SNashe Mncube       MVE_VLD20_16_wb,
336*21f7c626SNashe Mncube       MVE_VLD20_32_wb,
337*21f7c626SNashe Mncube       MVE_VLD20_8_wb,
338*21f7c626SNashe Mncube       MVE_VLD21_16_wb,
339*21f7c626SNashe Mncube       MVE_VLD21_32_wb,
340*21f7c626SNashe Mncube       MVE_VLD21_8_wb,
341*21f7c626SNashe Mncube       MVE_VLD40_16_wb,
342*21f7c626SNashe Mncube       MVE_VLD40_32_wb,
343*21f7c626SNashe Mncube       MVE_VLD40_8_wb,
344*21f7c626SNashe Mncube       MVE_VLD41_16_wb,
345*21f7c626SNashe Mncube       MVE_VLD41_32_wb,
346*21f7c626SNashe Mncube       MVE_VLD41_8_wb,
347*21f7c626SNashe Mncube       MVE_VLD42_16_wb,
348*21f7c626SNashe Mncube       MVE_VLD42_32_wb,
349*21f7c626SNashe Mncube       MVE_VLD42_8_wb,
350*21f7c626SNashe Mncube       MVE_VLD43_16_wb,
351*21f7c626SNashe Mncube       MVE_VLD43_32_wb,
352*21f7c626SNashe Mncube       MVE_VLD43_8_wb,
353*21f7c626SNashe Mncube       MVE_VLDRBS16_post,
354*21f7c626SNashe Mncube       MVE_VLDRBS16_pre,
355*21f7c626SNashe Mncube       MVE_VLDRBS32_post,
356*21f7c626SNashe Mncube       MVE_VLDRBS32_pre,
357*21f7c626SNashe Mncube       MVE_VLDRBU16_post,
358*21f7c626SNashe Mncube       MVE_VLDRBU16_pre,
359*21f7c626SNashe Mncube       MVE_VLDRBU32_post,
360*21f7c626SNashe Mncube       MVE_VLDRBU32_pre,
361*21f7c626SNashe Mncube       MVE_VLDRBU8_post,
362*21f7c626SNashe Mncube       MVE_VLDRBU8_pre,
363*21f7c626SNashe Mncube       MVE_VLDRDU64_qi_pre,
364*21f7c626SNashe Mncube       MVE_VLDRHS32_post,
365*21f7c626SNashe Mncube       MVE_VLDRHS32_pre,
366*21f7c626SNashe Mncube       MVE_VLDRHU16_post,
367*21f7c626SNashe Mncube       MVE_VLDRHU16_pre,
368*21f7c626SNashe Mncube       MVE_VLDRHU32_post,
369*21f7c626SNashe Mncube       MVE_VLDRHU32_pre,
370*21f7c626SNashe Mncube       MVE_VLDRWU32_post,
371*21f7c626SNashe Mncube       MVE_VLDRWU32_pre,
372*21f7c626SNashe Mncube       MVE_VLDRWU32_qi_pre,
373*21f7c626SNashe Mncube       MVE_VST20_16_wb,
374*21f7c626SNashe Mncube       MVE_VST20_32_wb,
375*21f7c626SNashe Mncube       MVE_VST20_8_wb,
376*21f7c626SNashe Mncube       MVE_VST21_16_wb,
377*21f7c626SNashe Mncube       MVE_VST21_32_wb,
378*21f7c626SNashe Mncube       MVE_VST21_8_wb,
379*21f7c626SNashe Mncube       MVE_VST40_16_wb,
380*21f7c626SNashe Mncube       MVE_VST40_32_wb,
381*21f7c626SNashe Mncube       MVE_VST40_8_wb,
382*21f7c626SNashe Mncube       MVE_VST41_16_wb,
383*21f7c626SNashe Mncube       MVE_VST41_32_wb,
384*21f7c626SNashe Mncube       MVE_VST41_8_wb,
385*21f7c626SNashe Mncube       MVE_VST42_16_wb,
386*21f7c626SNashe Mncube       MVE_VST42_32_wb,
387*21f7c626SNashe Mncube       MVE_VST42_8_wb,
388*21f7c626SNashe Mncube       MVE_VST43_16_wb,
389*21f7c626SNashe Mncube       MVE_VST43_32_wb,
390*21f7c626SNashe Mncube       MVE_VST43_8_wb,
391*21f7c626SNashe Mncube       MVE_VSTRB16_post,
392*21f7c626SNashe Mncube       MVE_VSTRB16_pre,
393*21f7c626SNashe Mncube       MVE_VSTRB32_post,
394*21f7c626SNashe Mncube       MVE_VSTRB32_pre,
395*21f7c626SNashe Mncube       MVE_VSTRBU8_post,
396*21f7c626SNashe Mncube       MVE_VSTRBU8_pre,
397*21f7c626SNashe Mncube       MVE_VSTRD64_qi_pre,
398*21f7c626SNashe Mncube       MVE_VSTRH32_post,
399*21f7c626SNashe Mncube       MVE_VSTRH32_pre,
400*21f7c626SNashe Mncube       MVE_VSTRHU16_post,
401*21f7c626SNashe Mncube       MVE_VSTRHU16_pre,
402*21f7c626SNashe Mncube       MVE_VSTRWU32_post,
403*21f7c626SNashe Mncube       MVE_VSTRWU32_pre,
404*21f7c626SNashe Mncube       MVE_VSTRW32_qi_pre,
405*21f7c626SNashe Mncube   };
406*21f7c626SNashe Mncube   std::initializer_list<unsigned> Address3List = {
407*21f7c626SNashe Mncube       t2LDRD_POST,
408*21f7c626SNashe Mncube       t2LDRD_PRE,
409*21f7c626SNashe Mncube       t2STRD_POST,
410*21f7c626SNashe Mncube       t2STRD_PRE,
411*21f7c626SNashe Mncube   };
412*21f7c626SNashe Mncube   // Compute a mask of which operands are involved in address computation
413*21f7c626SNashe Mncube   for (auto &op : Address1List) {
414*21f7c626SNashe Mncube     Info[op].AddressOpMask = 0x6;
415*21f7c626SNashe Mncube   }
416*21f7c626SNashe Mncube   for (auto &op : Address2List) {
417*21f7c626SNashe Mncube     Info[op].AddressOpMask = 0xc;
418*21f7c626SNashe Mncube   }
419*21f7c626SNashe Mncube   for (auto &op : Address3List) {
420*21f7c626SNashe Mncube     Info[op].AddressOpMask = 0x18;
421*21f7c626SNashe Mncube   }
422*21f7c626SNashe Mncube   for (auto &op : hasBRegAddrShiftList) {
423*21f7c626SNashe Mncube     Info[op].AddressOpMask |= 0x8;
424*21f7c626SNashe Mncube   }
425*21f7c626SNashe Mncube }
426*21f7c626SNashe Mncube 
427*21f7c626SNashe Mncube void InstructionInformation::markDPProducersConsumers(
428*21f7c626SNashe Mncube     const ARMBaseInstrInfo *TII) {
429*21f7c626SNashe Mncube   // Learn about all instructions which have FP source/dest registers
430*21f7c626SNashe Mncube   for (unsigned MI = 0; MI < ARM::INSTRUCTION_LIST_END; ++MI) {
431*21f7c626SNashe Mncube     const MCInstrDesc &MID = TII->get(MI);
432*21f7c626SNashe Mncube     auto Operands = MID.operands();
433*21f7c626SNashe Mncube     for (unsigned OI = 0, OIE = MID.getNumOperands(); OI != OIE; ++OI) {
434*21f7c626SNashe Mncube       bool MarkQP = false, MarkDP = false, MarkSP = false;
435*21f7c626SNashe Mncube       switch (Operands[OI].RegClass) {
436*21f7c626SNashe Mncube       case ARM::MQPRRegClassID:
437*21f7c626SNashe Mncube       case ARM::DPRRegClassID:
438*21f7c626SNashe Mncube       case ARM::DPR_8RegClassID:
439*21f7c626SNashe Mncube       case ARM::DPR_VFP2RegClassID:
440*21f7c626SNashe Mncube       case ARM::DPairRegClassID:
441*21f7c626SNashe Mncube       case ARM::DPairSpcRegClassID:
442*21f7c626SNashe Mncube       case ARM::DQuadRegClassID:
443*21f7c626SNashe Mncube       case ARM::DQuadSpcRegClassID:
444*21f7c626SNashe Mncube       case ARM::DTripleRegClassID:
445*21f7c626SNashe Mncube       case ARM::DTripleSpcRegClassID:
446*21f7c626SNashe Mncube         MarkDP = true;
447*21f7c626SNashe Mncube         break;
448*21f7c626SNashe Mncube       case ARM::QPRRegClassID:
449*21f7c626SNashe Mncube       case ARM::QPR_8RegClassID:
450*21f7c626SNashe Mncube       case ARM::QPR_VFP2RegClassID:
451*21f7c626SNashe Mncube       case ARM::QQPRRegClassID:
452*21f7c626SNashe Mncube       case ARM::QQQQPRRegClassID:
453*21f7c626SNashe Mncube         MarkQP = true;
454*21f7c626SNashe Mncube         break;
455*21f7c626SNashe Mncube       case ARM::SPRRegClassID:
456*21f7c626SNashe Mncube       case ARM::SPR_8RegClassID:
457*21f7c626SNashe Mncube       case ARM::FPWithVPRRegClassID:
458*21f7c626SNashe Mncube         MarkSP = true;
459*21f7c626SNashe Mncube         break;
460*21f7c626SNashe Mncube       default:
461*21f7c626SNashe Mncube         break;
462*21f7c626SNashe Mncube       }
463*21f7c626SNashe Mncube       if (MarkQP) {
464*21f7c626SNashe Mncube         if (OI < MID.getNumDefs())
465*21f7c626SNashe Mncube           Info[MI].ProducesQP = true;
466*21f7c626SNashe Mncube         else
467*21f7c626SNashe Mncube           Info[MI].ConsumesQP = true;
468*21f7c626SNashe Mncube       }
469*21f7c626SNashe Mncube       if (MarkDP) {
470*21f7c626SNashe Mncube         if (OI < MID.getNumDefs())
471*21f7c626SNashe Mncube           Info[MI].ProducesDP = true;
472*21f7c626SNashe Mncube         else
473*21f7c626SNashe Mncube           Info[MI].ConsumesDP = true;
474*21f7c626SNashe Mncube       }
475*21f7c626SNashe Mncube       if (MarkSP) {
476*21f7c626SNashe Mncube         if (OI < MID.getNumDefs())
477*21f7c626SNashe Mncube           Info[MI].ProducesSP = true;
478*21f7c626SNashe Mncube         else
479*21f7c626SNashe Mncube           Info[MI].ConsumesSP = true;
480*21f7c626SNashe Mncube       }
481*21f7c626SNashe Mncube     }
482*21f7c626SNashe Mncube   }
483*21f7c626SNashe Mncube }
484*21f7c626SNashe Mncube 
485*21f7c626SNashe Mncube } // anonymous namespace
486*21f7c626SNashe Mncube 
487*21f7c626SNashe Mncube static bool hasImplicitCPSRUse(const MachineInstr *MI) {
488*21f7c626SNashe Mncube   return MI->getDesc().hasImplicitUseOfPhysReg(ARM::CPSR);
489*21f7c626SNashe Mncube }
490*21f7c626SNashe Mncube 
491*21f7c626SNashe Mncube void ARMOverrideBypasses::setBidirLatencies(SUnit &SrcSU, SDep &SrcDep,
492*21f7c626SNashe Mncube                                             unsigned latency) {
493*21f7c626SNashe Mncube   SDep Reverse = SrcDep;
494*21f7c626SNashe Mncube   Reverse.setSUnit(&SrcSU);
495*21f7c626SNashe Mncube   for (SDep &PDep : SrcDep.getSUnit()->Preds) {
496*21f7c626SNashe Mncube     if (PDep == Reverse) {
497*21f7c626SNashe Mncube       PDep.setLatency(latency);
498*21f7c626SNashe Mncube       SrcDep.getSUnit()->setDepthDirty();
499*21f7c626SNashe Mncube       break;
500*21f7c626SNashe Mncube     }
501*21f7c626SNashe Mncube   }
502*21f7c626SNashe Mncube   SrcDep.setLatency(latency);
503*21f7c626SNashe Mncube   SrcSU.setHeightDirty();
504*21f7c626SNashe Mncube }
505*21f7c626SNashe Mncube 
506*21f7c626SNashe Mncube static bool mismatchedPred(ARMCC::CondCodes a, ARMCC::CondCodes b) {
507*21f7c626SNashe Mncube   return (a & 0xe) != (b & 0xe);
508*21f7c626SNashe Mncube }
509*21f7c626SNashe Mncube 
510*21f7c626SNashe Mncube // Set output dependences to zero latency for processors which can
511*21f7c626SNashe Mncube // simultaneously issue to the same register.  Returns true if a change
512*21f7c626SNashe Mncube // was made.
513*21f7c626SNashe Mncube bool ARMOverrideBypasses::zeroOutputDependences(SUnit &ISU, SDep &Dep) {
514*21f7c626SNashe Mncube   if (Dep.getKind() == SDep::Output) {
515*21f7c626SNashe Mncube     setBidirLatencies(ISU, Dep, 0);
516*21f7c626SNashe Mncube     return true;
517*21f7c626SNashe Mncube   }
518*21f7c626SNashe Mncube   return false;
519*21f7c626SNashe Mncube }
520*21f7c626SNashe Mncube 
521*21f7c626SNashe Mncube // The graph doesn't look inside of bundles to determine their
522*21f7c626SNashe Mncube // scheduling boundaries and reports zero latency into and out of them
523*21f7c626SNashe Mncube // (except for CPSR into the bundle, which has latency 1).
524*21f7c626SNashe Mncube // Make some better scheduling assumptions:
525*21f7c626SNashe Mncube // 1) CPSR uses have zero latency; other uses have incoming latency 1
526*21f7c626SNashe Mncube // 2) CPSR defs retain a latency of zero; others have a latency of 1.
527*21f7c626SNashe Mncube //
528*21f7c626SNashe Mncube // Returns 1 if a use change was made; 2 if a def change was made; 0 otherwise
529*21f7c626SNashe Mncube unsigned ARMOverrideBypasses::makeBundleAssumptions(SUnit &ISU, SDep &Dep) {
530*21f7c626SNashe Mncube 
531*21f7c626SNashe Mncube   SUnit &DepSU = *Dep.getSUnit();
532*21f7c626SNashe Mncube   const MachineInstr *SrcMI = ISU.getInstr();
533*21f7c626SNashe Mncube   unsigned SrcOpcode = SrcMI->getOpcode();
534*21f7c626SNashe Mncube   const MachineInstr *DstMI = DepSU.getInstr();
535*21f7c626SNashe Mncube   unsigned DstOpcode = DstMI->getOpcode();
536*21f7c626SNashe Mncube 
537*21f7c626SNashe Mncube   if (DstOpcode == ARM::BUNDLE && TII->isPredicated(*DstMI)) {
538*21f7c626SNashe Mncube     setBidirLatencies(
539*21f7c626SNashe Mncube         ISU, Dep,
540*21f7c626SNashe Mncube         (Dep.isAssignedRegDep() && Dep.getReg() == ARM::CPSR) ? 0 : 1);
541*21f7c626SNashe Mncube     return 1;
542*21f7c626SNashe Mncube   }
543*21f7c626SNashe Mncube   if (SrcOpcode == ARM::BUNDLE && TII->isPredicated(*SrcMI) &&
544*21f7c626SNashe Mncube       Dep.isAssignedRegDep() && Dep.getReg() != ARM::CPSR) {
545*21f7c626SNashe Mncube     setBidirLatencies(ISU, Dep, 1);
546*21f7c626SNashe Mncube     return 2;
547*21f7c626SNashe Mncube   }
548*21f7c626SNashe Mncube   return 0;
549*21f7c626SNashe Mncube }
550*21f7c626SNashe Mncube 
551*21f7c626SNashe Mncube // Determine whether there is a memory RAW hazard here and set up latency
552*21f7c626SNashe Mncube // accordingly
553*21f7c626SNashe Mncube bool ARMOverrideBypasses::memoryRAWHazard(SUnit &ISU, SDep &Dep,
554*21f7c626SNashe Mncube                                           unsigned latency) {
555*21f7c626SNashe Mncube   if (!Dep.isNormalMemory())
556*21f7c626SNashe Mncube     return false;
557*21f7c626SNashe Mncube   auto &SrcInst = *ISU.getInstr();
558*21f7c626SNashe Mncube   auto &DstInst = *Dep.getSUnit()->getInstr();
559*21f7c626SNashe Mncube   if (!SrcInst.mayStore() || !DstInst.mayLoad())
560*21f7c626SNashe Mncube     return false;
561*21f7c626SNashe Mncube 
562*21f7c626SNashe Mncube   auto SrcMO = *SrcInst.memoperands().begin();
563*21f7c626SNashe Mncube   auto DstMO = *DstInst.memoperands().begin();
564*21f7c626SNashe Mncube   auto SrcVal = SrcMO->getValue();
565*21f7c626SNashe Mncube   auto DstVal = DstMO->getValue();
566*21f7c626SNashe Mncube   auto SrcPseudoVal = SrcMO->getPseudoValue();
567*21f7c626SNashe Mncube   auto DstPseudoVal = DstMO->getPseudoValue();
568*21f7c626SNashe Mncube   if (SrcVal && DstVal && AA->alias(SrcVal, DstVal) == AliasResult::MustAlias &&
569*21f7c626SNashe Mncube       SrcMO->getOffset() == DstMO->getOffset()) {
570*21f7c626SNashe Mncube     setBidirLatencies(ISU, Dep, latency);
571*21f7c626SNashe Mncube     return true;
572*21f7c626SNashe Mncube   } else if (SrcPseudoVal && DstPseudoVal &&
573*21f7c626SNashe Mncube              SrcPseudoVal->kind() == DstPseudoVal->kind() &&
574*21f7c626SNashe Mncube              SrcPseudoVal->kind() == PseudoSourceValue::FixedStack) {
575*21f7c626SNashe Mncube     // Spills/fills
576*21f7c626SNashe Mncube     auto FS0 = cast<FixedStackPseudoSourceValue>(SrcPseudoVal);
577*21f7c626SNashe Mncube     auto FS1 = cast<FixedStackPseudoSourceValue>(DstPseudoVal);
578*21f7c626SNashe Mncube     if (FS0 == FS1) {
579*21f7c626SNashe Mncube       setBidirLatencies(ISU, Dep, latency);
580*21f7c626SNashe Mncube       return true;
581*21f7c626SNashe Mncube     }
582*21f7c626SNashe Mncube   }
583*21f7c626SNashe Mncube   return false;
584*21f7c626SNashe Mncube }
585*21f7c626SNashe Mncube 
586*21f7c626SNashe Mncube namespace {
587*21f7c626SNashe Mncube 
588*21f7c626SNashe Mncube std::unique_ptr<InstructionInformation> II;
589*21f7c626SNashe Mncube 
590*21f7c626SNashe Mncube class CortexM7InstructionInformation : public InstructionInformation {
591*21f7c626SNashe Mncube public:
592*21f7c626SNashe Mncube   CortexM7InstructionInformation(const ARMBaseInstrInfo *TII)
593*21f7c626SNashe Mncube       : InstructionInformation(TII) {}
594*21f7c626SNashe Mncube };
595*21f7c626SNashe Mncube 
596*21f7c626SNashe Mncube class CortexM7Overrides : public ARMOverrideBypasses {
597*21f7c626SNashe Mncube public:
598*21f7c626SNashe Mncube   CortexM7Overrides(const ARMBaseInstrInfo *TII, AAResults *AA)
599*21f7c626SNashe Mncube       : ARMOverrideBypasses(TII, AA) {
600*21f7c626SNashe Mncube     if (!II)
601*21f7c626SNashe Mncube       II.reset(new CortexM7InstructionInformation(TII));
602*21f7c626SNashe Mncube   }
603*21f7c626SNashe Mncube 
604*21f7c626SNashe Mncube   void modifyBypasses(SUnit &) override;
605*21f7c626SNashe Mncube };
606*21f7c626SNashe Mncube 
607*21f7c626SNashe Mncube void CortexM7Overrides::modifyBypasses(SUnit &ISU) {
608*21f7c626SNashe Mncube   const MachineInstr *SrcMI = ISU.getInstr();
609*21f7c626SNashe Mncube   unsigned SrcOpcode = SrcMI->getOpcode();
610*21f7c626SNashe Mncube   bool isNSWload = II->isNonSubwordLoad(SrcOpcode);
611*21f7c626SNashe Mncube 
612*21f7c626SNashe Mncube   // Walk the successors looking for latency overrides that are needed
613*21f7c626SNashe Mncube   for (SDep &Dep : ISU.Succs) {
614*21f7c626SNashe Mncube 
615*21f7c626SNashe Mncube     // Output dependences should have 0 latency, as M7 is able to
616*21f7c626SNashe Mncube     // schedule writers to the same register for simultaneous issue.
617*21f7c626SNashe Mncube     if (zeroOutputDependences(ISU, Dep))
618*21f7c626SNashe Mncube       continue;
619*21f7c626SNashe Mncube 
620*21f7c626SNashe Mncube     if (memoryRAWHazard(ISU, Dep, 4))
621*21f7c626SNashe Mncube       continue;
622*21f7c626SNashe Mncube 
623*21f7c626SNashe Mncube     // Ignore dependencies other than data
624*21f7c626SNashe Mncube     if (Dep.getKind() != SDep::Data)
625*21f7c626SNashe Mncube       continue;
626*21f7c626SNashe Mncube 
627*21f7c626SNashe Mncube     SUnit &DepSU = *Dep.getSUnit();
628*21f7c626SNashe Mncube     if (DepSU.isBoundaryNode())
629*21f7c626SNashe Mncube       continue;
630*21f7c626SNashe Mncube 
631*21f7c626SNashe Mncube     if (makeBundleAssumptions(ISU, Dep) == 1)
632*21f7c626SNashe Mncube       continue;
633*21f7c626SNashe Mncube 
634*21f7c626SNashe Mncube     const MachineInstr *DstMI = DepSU.getInstr();
635*21f7c626SNashe Mncube     unsigned DstOpcode = DstMI->getOpcode();
636*21f7c626SNashe Mncube 
637*21f7c626SNashe Mncube     // Word loads into any multiply or divide instruction are considered
638*21f7c626SNashe Mncube     // cannot bypass their scheduling stage. Didn't do this in the .td file
639*21f7c626SNashe Mncube     // because we cannot easily create a read advance that is 0 from certain
640*21f7c626SNashe Mncube     // writer classes and 1 from all the rest.
641*21f7c626SNashe Mncube     // (The other way around would have been easy.)
642*21f7c626SNashe Mncube     if (isNSWload && (II->isMultiply(DstOpcode) || II->isDivide(DstOpcode)))
643*21f7c626SNashe Mncube       setBidirLatencies(ISU, Dep, Dep.getLatency() + 1);
644*21f7c626SNashe Mncube 
645*21f7c626SNashe Mncube     // Word loads into B operand of a load/store are considered cannot bypass
646*21f7c626SNashe Mncube     // their scheduling stage. Cannot do in the .td file because
647*21f7c626SNashe Mncube     // need to decide between -1 and -2 for ReadAdvance
648*21f7c626SNashe Mncube     if (isNSWload && II->hasBRegAddr(DstOpcode) &&
649*21f7c626SNashe Mncube         DstMI->getOperand(2).getReg() == Dep.getReg())
650*21f7c626SNashe Mncube       setBidirLatencies(ISU, Dep, Dep.getLatency() + 1);
651*21f7c626SNashe Mncube 
652*21f7c626SNashe Mncube     // Multiplies into any address generation cannot bypass from EX3.  Cannot do
653*21f7c626SNashe Mncube     // in the .td file because need to decide between -1 and -2 for ReadAdvance
654*21f7c626SNashe Mncube     if (II->isMultiply(SrcOpcode)) {
655*21f7c626SNashe Mncube       unsigned OpMask = II->getAddressOpMask(DstOpcode) >> 1;
656*21f7c626SNashe Mncube       for (unsigned i = 1; OpMask; ++i, OpMask >>= 1) {
657*21f7c626SNashe Mncube         if ((OpMask & 1) && DstMI->getOperand(i).isReg() &&
658*21f7c626SNashe Mncube             DstMI->getOperand(i).getReg() == Dep.getReg()) {
659*21f7c626SNashe Mncube           setBidirLatencies(ISU, Dep, 4); // first legal bypass is EX4->EX1
660*21f7c626SNashe Mncube           break;
661*21f7c626SNashe Mncube         }
662*21f7c626SNashe Mncube       }
663*21f7c626SNashe Mncube     }
664*21f7c626SNashe Mncube 
665*21f7c626SNashe Mncube     // Mismatched conditional producers take longer on M7; they end up looking
666*21f7c626SNashe Mncube     // like they were produced at EX3 and read at IS.
667*21f7c626SNashe Mncube     if (TII->isPredicated(*SrcMI) && Dep.isAssignedRegDep() &&
668*21f7c626SNashe Mncube         (SrcOpcode == ARM::BUNDLE ||
669*21f7c626SNashe Mncube          mismatchedPred(TII->getPredicate(*SrcMI),
670*21f7c626SNashe Mncube                         TII->getPredicate(*DstMI)))) {
671*21f7c626SNashe Mncube       unsigned Lat = 1;
672*21f7c626SNashe Mncube       // Operand A of shift+ALU is treated as an EX1 read instead of EX2.
673*21f7c626SNashe Mncube       if (II->isInlineShiftALU(DstOpcode) && DstMI->getOperand(3).getImm() &&
674*21f7c626SNashe Mncube           DstMI->getOperand(1).getReg() == Dep.getReg())
675*21f7c626SNashe Mncube         Lat = 2;
676*21f7c626SNashe Mncube       Lat = std::min(3u, Dep.getLatency() + Lat);
677*21f7c626SNashe Mncube       setBidirLatencies(ISU, Dep, std::max(Dep.getLatency(), Lat));
678*21f7c626SNashe Mncube     }
679*21f7c626SNashe Mncube 
680*21f7c626SNashe Mncube     // CC setter into conditional producer shouldn't have a latency of more
681*21f7c626SNashe Mncube     // than 1 unless it's due to an implicit read. (All the "true" readers
682*21f7c626SNashe Mncube     // of the condition code use an implicit read, and predicates use an
683*21f7c626SNashe Mncube     // explicit.)
684*21f7c626SNashe Mncube     if (Dep.isAssignedRegDep() && Dep.getReg() == ARM::CPSR &&
685*21f7c626SNashe Mncube         TII->isPredicated(*DstMI) && !hasImplicitCPSRUse(DstMI))
686*21f7c626SNashe Mncube       setBidirLatencies(ISU, Dep, 1);
687*21f7c626SNashe Mncube 
688*21f7c626SNashe Mncube     // REV instructions cannot bypass directly into the EX1 shifter.  The
689*21f7c626SNashe Mncube     // code is slightly inexact as it doesn't attempt to ensure that the bypass
690*21f7c626SNashe Mncube     // is to the shifter operands.
691*21f7c626SNashe Mncube     if (II->isRev(SrcOpcode)) {
692*21f7c626SNashe Mncube       if (II->isInlineShiftALU(DstOpcode))
693*21f7c626SNashe Mncube         setBidirLatencies(ISU, Dep, 2);
694*21f7c626SNashe Mncube       else if (II->isShift(DstOpcode))
695*21f7c626SNashe Mncube         setBidirLatencies(ISU, Dep, 1);
696*21f7c626SNashe Mncube     }
697*21f7c626SNashe Mncube   }
698*21f7c626SNashe Mncube }
699*21f7c626SNashe Mncube 
700*21f7c626SNashe Mncube class M85InstructionInformation : public InstructionInformation {
701*21f7c626SNashe Mncube public:
702*21f7c626SNashe Mncube   M85InstructionInformation(const ARMBaseInstrInfo *t)
703*21f7c626SNashe Mncube       : InstructionInformation(t) {
704*21f7c626SNashe Mncube     markDPProducersConsumers(t);
705*21f7c626SNashe Mncube   }
706*21f7c626SNashe Mncube };
707*21f7c626SNashe Mncube 
708*21f7c626SNashe Mncube class M85Overrides : public ARMOverrideBypasses {
709*21f7c626SNashe Mncube public:
710*21f7c626SNashe Mncube   M85Overrides(const ARMBaseInstrInfo *t, AAResults *a)
711*21f7c626SNashe Mncube       : ARMOverrideBypasses(t, a) {
712*21f7c626SNashe Mncube     if (!II)
713*21f7c626SNashe Mncube       II.reset(new M85InstructionInformation(t));
714*21f7c626SNashe Mncube   }
715*21f7c626SNashe Mncube 
716*21f7c626SNashe Mncube   void modifyBypasses(SUnit &) override;
717*21f7c626SNashe Mncube 
718*21f7c626SNashe Mncube private:
719*21f7c626SNashe Mncube   unsigned computeBypassStage(const MCSchedClassDesc *SCD);
720*21f7c626SNashe Mncube   signed modifyMixedWidthFP(const MachineInstr *SrcMI,
721*21f7c626SNashe Mncube                             const MachineInstr *DstMI, unsigned RegID,
722*21f7c626SNashe Mncube                             const MCSchedClassDesc *SCD);
723*21f7c626SNashe Mncube };
724*21f7c626SNashe Mncube 
725*21f7c626SNashe Mncube unsigned M85Overrides::computeBypassStage(const MCSchedClassDesc *SCDesc) {
726*21f7c626SNashe Mncube   auto SM = DAG->getSchedModel();
727*21f7c626SNashe Mncube   unsigned DefIdx = 0; // just look for the first output's timing
728*21f7c626SNashe Mncube   if (DefIdx < SCDesc->NumWriteLatencyEntries) {
729*21f7c626SNashe Mncube     // Lookup the definition's write latency in SubtargetInfo.
730*21f7c626SNashe Mncube     const MCWriteLatencyEntry *WLEntry =
731*21f7c626SNashe Mncube         SM->getSubtargetInfo()->getWriteLatencyEntry(SCDesc, DefIdx);
732*21f7c626SNashe Mncube     unsigned Latency = WLEntry->Cycles >= 0 ? WLEntry->Cycles : 1000;
733*21f7c626SNashe Mncube     if (Latency == 4)
734*21f7c626SNashe Mncube       return 2;
735*21f7c626SNashe Mncube     else if (Latency == 5)
736*21f7c626SNashe Mncube       return 3;
737*21f7c626SNashe Mncube     else if (Latency > 3)
738*21f7c626SNashe Mncube       return 3;
739*21f7c626SNashe Mncube     else
740*21f7c626SNashe Mncube       return Latency;
741*21f7c626SNashe Mncube   }
742*21f7c626SNashe Mncube   return 2;
743*21f7c626SNashe Mncube }
744*21f7c626SNashe Mncube 
745*21f7c626SNashe Mncube // Latency changes for bypassing between FP registers of different sizes:
746*21f7c626SNashe Mncube //
747*21f7c626SNashe Mncube // Note that mixed DP/SP are unlikely because of the semantics
748*21f7c626SNashe Mncube // of C.  Mixed MVE/SP are quite common when MVE intrinsics are used.
749*21f7c626SNashe Mncube signed M85Overrides::modifyMixedWidthFP(const MachineInstr *SrcMI,
750*21f7c626SNashe Mncube                                         const MachineInstr *DstMI,
751*21f7c626SNashe Mncube                                         unsigned RegID,
752*21f7c626SNashe Mncube                                         const MCSchedClassDesc *SCD) {
753*21f7c626SNashe Mncube 
754*21f7c626SNashe Mncube   if (!II->producesSP(SrcMI->getOpcode()) &&
755*21f7c626SNashe Mncube       !II->producesDP(SrcMI->getOpcode()) &&
756*21f7c626SNashe Mncube       !II->producesQP(SrcMI->getOpcode()))
757*21f7c626SNashe Mncube     return 0;
758*21f7c626SNashe Mncube 
759*21f7c626SNashe Mncube   if (Register::isVirtualRegister(RegID)) {
760*21f7c626SNashe Mncube     if (II->producesSP(SrcMI->getOpcode()) &&
761*21f7c626SNashe Mncube         II->consumesDP(DstMI->getOpcode())) {
762*21f7c626SNashe Mncube       for (auto &OP : SrcMI->operands())
763*21f7c626SNashe Mncube         if (OP.isReg() && OP.isDef() && OP.getReg() == RegID &&
764*21f7c626SNashe Mncube             OP.getSubReg() == ARM::ssub_1)
765*21f7c626SNashe Mncube           return 5 - computeBypassStage(SCD);
766*21f7c626SNashe Mncube     } else if (II->producesSP(SrcMI->getOpcode()) &&
767*21f7c626SNashe Mncube                II->consumesQP(DstMI->getOpcode())) {
768*21f7c626SNashe Mncube       for (auto &OP : SrcMI->operands())
769*21f7c626SNashe Mncube         if (OP.isReg() && OP.isDef() && OP.getReg() == RegID &&
770*21f7c626SNashe Mncube             (OP.getSubReg() == ARM::ssub_1 || OP.getSubReg() == ARM::ssub_3))
771*21f7c626SNashe Mncube           return 5 - computeBypassStage(SCD) -
772*21f7c626SNashe Mncube                  ((OP.getSubReg() == ARM::ssub_2 ||
773*21f7c626SNashe Mncube                    OP.getSubReg() == ARM::ssub_3)
774*21f7c626SNashe Mncube                       ? 1
775*21f7c626SNashe Mncube                       : 0);
776*21f7c626SNashe Mncube     } else if (II->producesDP(SrcMI->getOpcode()) &&
777*21f7c626SNashe Mncube                II->consumesQP(DstMI->getOpcode())) {
778*21f7c626SNashe Mncube       for (auto &OP : SrcMI->operands())
779*21f7c626SNashe Mncube         if (OP.isReg() && OP.isDef() && OP.getReg() == RegID &&
780*21f7c626SNashe Mncube             OP.getSubReg() == ARM::ssub_1)
781*21f7c626SNashe Mncube           return -1;
782*21f7c626SNashe Mncube     } else if (II->producesDP(SrcMI->getOpcode()) &&
783*21f7c626SNashe Mncube                II->consumesSP(DstMI->getOpcode())) {
784*21f7c626SNashe Mncube       for (auto &OP : DstMI->operands())
785*21f7c626SNashe Mncube         if (OP.isReg() && OP.isUse() && OP.getReg() == RegID &&
786*21f7c626SNashe Mncube             OP.getSubReg() == ARM::ssub_1)
787*21f7c626SNashe Mncube           return 5 - computeBypassStage(SCD);
788*21f7c626SNashe Mncube     } else if (II->producesQP(SrcMI->getOpcode()) &&
789*21f7c626SNashe Mncube                II->consumesSP(DstMI->getOpcode())) {
790*21f7c626SNashe Mncube       for (auto &OP : DstMI->operands())
791*21f7c626SNashe Mncube         if (OP.isReg() && OP.isUse() && OP.getReg() == RegID &&
792*21f7c626SNashe Mncube             (OP.getSubReg() == ARM::ssub_1 || OP.getSubReg() == ARM::ssub_3))
793*21f7c626SNashe Mncube           return 5 - computeBypassStage(SCD) +
794*21f7c626SNashe Mncube                  ((OP.getSubReg() == ARM::ssub_2 ||
795*21f7c626SNashe Mncube                    OP.getSubReg() == ARM::ssub_3)
796*21f7c626SNashe Mncube                       ? 1
797*21f7c626SNashe Mncube                       : 0);
798*21f7c626SNashe Mncube     } else if (II->producesQP(SrcMI->getOpcode()) &&
799*21f7c626SNashe Mncube                II->consumesDP(DstMI->getOpcode())) {
800*21f7c626SNashe Mncube       for (auto &OP : DstMI->operands())
801*21f7c626SNashe Mncube         if (OP.isReg() && OP.isUse() && OP.getReg() == RegID &&
802*21f7c626SNashe Mncube             OP.getSubReg() == ARM::ssub_1)
803*21f7c626SNashe Mncube           return 1;
804*21f7c626SNashe Mncube     }
805*21f7c626SNashe Mncube   } else if (Register::isPhysicalRegister(RegID)) {
806*21f7c626SNashe Mncube     // Note that when the producer is narrower, not all of the producers
807*21f7c626SNashe Mncube     // may be present in the scheduling graph; somewhere earlier in the
808*21f7c626SNashe Mncube     // compiler, an implicit def/use of the aliased full register gets
809*21f7c626SNashe Mncube     // added to the producer, and so only that producer is seen as *the*
810*21f7c626SNashe Mncube     // single producer.  This behavior also has the unfortunate effect of
811*21f7c626SNashe Mncube     // serializing the producers in the compiler's view of things.
812*21f7c626SNashe Mncube     if (II->producesSP(SrcMI->getOpcode()) &&
813*21f7c626SNashe Mncube         II->consumesDP(DstMI->getOpcode())) {
814*21f7c626SNashe Mncube       for (auto &OP : SrcMI->operands())
815*21f7c626SNashe Mncube         if (OP.isReg() && OP.isDef() && OP.getReg() >= ARM::S1 &&
816*21f7c626SNashe Mncube             OP.getReg() <= ARM::S31 && (OP.getReg() - ARM::S0) % 2 &&
817*21f7c626SNashe Mncube             (OP.getReg() == RegID ||
818*21f7c626SNashe Mncube              (OP.getReg() - ARM::S0) / 2 + ARM::D0 == RegID ||
819*21f7c626SNashe Mncube              (OP.getReg() - ARM::S0) / 4 + ARM::Q0 == RegID))
820*21f7c626SNashe Mncube           return 5 - computeBypassStage(SCD);
821*21f7c626SNashe Mncube     } else if (II->producesSP(SrcMI->getOpcode()) &&
822*21f7c626SNashe Mncube                II->consumesQP(DstMI->getOpcode())) {
823*21f7c626SNashe Mncube       for (auto &OP : SrcMI->operands())
824*21f7c626SNashe Mncube         if (OP.isReg() && OP.isDef() && OP.getReg() >= ARM::S1 &&
825*21f7c626SNashe Mncube             OP.getReg() <= ARM::S31 && (OP.getReg() - ARM::S0) % 2 &&
826*21f7c626SNashe Mncube             (OP.getReg() == RegID ||
827*21f7c626SNashe Mncube              (OP.getReg() - ARM::S0) / 2 + ARM::D0 == RegID ||
828*21f7c626SNashe Mncube              (OP.getReg() - ARM::S0) / 4 + ARM::Q0 == RegID))
829*21f7c626SNashe Mncube           return 5 - computeBypassStage(SCD) -
830*21f7c626SNashe Mncube                  (((OP.getReg() - ARM::S0) / 2) % 2 ? 1 : 0);
831*21f7c626SNashe Mncube     } else if (II->producesDP(SrcMI->getOpcode()) &&
832*21f7c626SNashe Mncube                II->consumesQP(DstMI->getOpcode())) {
833*21f7c626SNashe Mncube       for (auto &OP : SrcMI->operands())
834*21f7c626SNashe Mncube         if (OP.isReg() && OP.isDef() && OP.getReg() >= ARM::D0 &&
835*21f7c626SNashe Mncube             OP.getReg() <= ARM::D15 && (OP.getReg() - ARM::D0) % 2 &&
836*21f7c626SNashe Mncube             (OP.getReg() == RegID ||
837*21f7c626SNashe Mncube              (OP.getReg() - ARM::D0) / 2 + ARM::Q0 == RegID))
838*21f7c626SNashe Mncube           return -1;
839*21f7c626SNashe Mncube     } else if (II->producesDP(SrcMI->getOpcode()) &&
840*21f7c626SNashe Mncube                II->consumesSP(DstMI->getOpcode())) {
841*21f7c626SNashe Mncube       if (RegID >= ARM::S1 && RegID <= ARM::S31 && (RegID - ARM::S0) % 2)
842*21f7c626SNashe Mncube         return 5 - computeBypassStage(SCD);
843*21f7c626SNashe Mncube     } else if (II->producesQP(SrcMI->getOpcode()) &&
844*21f7c626SNashe Mncube                II->consumesSP(DstMI->getOpcode())) {
845*21f7c626SNashe Mncube       if (RegID >= ARM::S1 && RegID <= ARM::S31 && (RegID - ARM::S0) % 2)
846*21f7c626SNashe Mncube         return 5 - computeBypassStage(SCD) +
847*21f7c626SNashe Mncube                (((RegID - ARM::S0) / 2) % 2 ? 1 : 0);
848*21f7c626SNashe Mncube     } else if (II->producesQP(SrcMI->getOpcode()) &&
849*21f7c626SNashe Mncube                II->consumesDP(DstMI->getOpcode())) {
850*21f7c626SNashe Mncube       if (RegID >= ARM::D1 && RegID <= ARM::D15 && (RegID - ARM::D0) % 2)
851*21f7c626SNashe Mncube         return 1;
852*21f7c626SNashe Mncube     }
853*21f7c626SNashe Mncube   }
854*21f7c626SNashe Mncube   return 0;
855*21f7c626SNashe Mncube }
856*21f7c626SNashe Mncube 
857*21f7c626SNashe Mncube void M85Overrides::modifyBypasses(SUnit &ISU) {
858*21f7c626SNashe Mncube   const MachineInstr *SrcMI = ISU.getInstr();
859*21f7c626SNashe Mncube   unsigned SrcOpcode = SrcMI->getOpcode();
860*21f7c626SNashe Mncube   bool isNSWload = II->isNonSubwordLoad(SrcOpcode);
861*21f7c626SNashe Mncube 
862*21f7c626SNashe Mncube   // Walk the successors looking for latency overrides that are needed
863*21f7c626SNashe Mncube   for (SDep &Dep : ISU.Succs) {
864*21f7c626SNashe Mncube 
865*21f7c626SNashe Mncube     // Output dependences should have 0 latency, as CortexM85 is able to
866*21f7c626SNashe Mncube     // schedule writers to the same register for simultaneous issue.
867*21f7c626SNashe Mncube     if (zeroOutputDependences(ISU, Dep))
868*21f7c626SNashe Mncube       continue;
869*21f7c626SNashe Mncube 
870*21f7c626SNashe Mncube     if (memoryRAWHazard(ISU, Dep, 3))
871*21f7c626SNashe Mncube       continue;
872*21f7c626SNashe Mncube 
873*21f7c626SNashe Mncube     // Ignore dependencies other than data or strong ordering.
874*21f7c626SNashe Mncube     if (Dep.getKind() != SDep::Data)
875*21f7c626SNashe Mncube       continue;
876*21f7c626SNashe Mncube 
877*21f7c626SNashe Mncube     SUnit &DepSU = *Dep.getSUnit();
878*21f7c626SNashe Mncube     if (DepSU.isBoundaryNode())
879*21f7c626SNashe Mncube       continue;
880*21f7c626SNashe Mncube 
881*21f7c626SNashe Mncube     if (makeBundleAssumptions(ISU, Dep) == 1)
882*21f7c626SNashe Mncube       continue;
883*21f7c626SNashe Mncube 
884*21f7c626SNashe Mncube     const MachineInstr *DstMI = DepSU.getInstr();
885*21f7c626SNashe Mncube     unsigned DstOpcode = DstMI->getOpcode();
886*21f7c626SNashe Mncube 
887*21f7c626SNashe Mncube     // Word loads into B operand of a load/store with cannot bypass their
888*21f7c626SNashe Mncube     // scheduling stage. Cannot do in the .td file because need to decide
889*21f7c626SNashe Mncube     // between -1 and -2 for ReadAdvance
890*21f7c626SNashe Mncube 
891*21f7c626SNashe Mncube     if (isNSWload && II->hasBRegAddrShift(DstOpcode) &&
892*21f7c626SNashe Mncube         DstMI->getOperand(3).getImm() != 0 && // shift operand
893*21f7c626SNashe Mncube         DstMI->getOperand(2).getReg() == Dep.getReg())
894*21f7c626SNashe Mncube       setBidirLatencies(ISU, Dep, Dep.getLatency() + 1);
895*21f7c626SNashe Mncube 
896*21f7c626SNashe Mncube     if (isNSWload && isMVEVectorInstruction(DstMI)) {
897*21f7c626SNashe Mncube       setBidirLatencies(ISU, Dep, Dep.getLatency() + 1);
898*21f7c626SNashe Mncube     }
899*21f7c626SNashe Mncube 
900*21f7c626SNashe Mncube     if (II->isMVEIntMAC(DstOpcode) &&
901*21f7c626SNashe Mncube         II->isMVEIntMACMatched(SrcOpcode, DstOpcode) &&
902*21f7c626SNashe Mncube         DstMI->getOperand(0).isReg() &&
903*21f7c626SNashe Mncube         DstMI->getOperand(0).getReg() == Dep.getReg())
904*21f7c626SNashe Mncube       setBidirLatencies(ISU, Dep, Dep.getLatency() - 1);
905*21f7c626SNashe Mncube 
906*21f7c626SNashe Mncube     // CC setter into conditional producer shouldn't have a latency of more
907*21f7c626SNashe Mncube     // than 0 unless it's due to an implicit read.
908*21f7c626SNashe Mncube     if (Dep.isAssignedRegDep() && Dep.getReg() == ARM::CPSR &&
909*21f7c626SNashe Mncube         TII->isPredicated(*DstMI) && !hasImplicitCPSRUse(DstMI))
910*21f7c626SNashe Mncube       setBidirLatencies(ISU, Dep, 0);
911*21f7c626SNashe Mncube 
912*21f7c626SNashe Mncube     if (signed ALat = modifyMixedWidthFP(SrcMI, DstMI, Dep.getReg(),
913*21f7c626SNashe Mncube                                          DAG->getSchedClass(&ISU)))
914*21f7c626SNashe Mncube       setBidirLatencies(ISU, Dep, std::max(0, signed(Dep.getLatency()) + ALat));
915*21f7c626SNashe Mncube 
916*21f7c626SNashe Mncube     if (II->isRev(SrcOpcode)) {
917*21f7c626SNashe Mncube       if (II->isInlineShiftALU(DstOpcode))
918*21f7c626SNashe Mncube         setBidirLatencies(ISU, Dep, 1);
919*21f7c626SNashe Mncube       else if (II->isShift(DstOpcode))
920*21f7c626SNashe Mncube         setBidirLatencies(ISU, Dep, 1);
921*21f7c626SNashe Mncube     }
922*21f7c626SNashe Mncube   }
923*21f7c626SNashe Mncube }
924*21f7c626SNashe Mncube 
925*21f7c626SNashe Mncube // Add M55 specific overrides for latencies between instructions. Currently it:
926*21f7c626SNashe Mncube //  - Adds an extra cycle latency between MVE VMLAV and scalar instructions.
927*21f7c626SNashe Mncube class CortexM55Overrides : public ARMOverrideBypasses {
928*21f7c626SNashe Mncube public:
929*21f7c626SNashe Mncube   CortexM55Overrides(const ARMBaseInstrInfo *TII, AAResults *AA)
930*21f7c626SNashe Mncube       : ARMOverrideBypasses(TII, AA) {}
931*21f7c626SNashe Mncube 
932*21f7c626SNashe Mncube   void modifyBypasses(SUnit &SU) override {
933*21f7c626SNashe Mncube     MachineInstr *SrcMI = SU.getInstr();
934*21f7c626SNashe Mncube     if (!(SrcMI->getDesc().TSFlags & ARMII::HorizontalReduction))
935*21f7c626SNashe Mncube       return;
936*21f7c626SNashe Mncube 
937*21f7c626SNashe Mncube     for (SDep &Dep : SU.Succs) {
938*21f7c626SNashe Mncube       if (Dep.getKind() != SDep::Data)
939*21f7c626SNashe Mncube         continue;
940*21f7c626SNashe Mncube       SUnit &DepSU = *Dep.getSUnit();
941*21f7c626SNashe Mncube       if (DepSU.isBoundaryNode())
942*21f7c626SNashe Mncube         continue;
943*21f7c626SNashe Mncube       MachineInstr *DstMI = DepSU.getInstr();
944*21f7c626SNashe Mncube 
945*21f7c626SNashe Mncube       if (!isMVEVectorInstruction(DstMI) && !DstMI->mayStore())
946*21f7c626SNashe Mncube         setBidirLatencies(SU, Dep, 3);
947*21f7c626SNashe Mncube     }
948*21f7c626SNashe Mncube   }
949*21f7c626SNashe Mncube };
950*21f7c626SNashe Mncube 
951*21f7c626SNashe Mncube } // end anonymous namespace
952*21f7c626SNashe Mncube 
953*21f7c626SNashe Mncube void ARMOverrideBypasses::apply(ScheduleDAGInstrs *DAGInstrs) {
954*21f7c626SNashe Mncube   DAG = DAGInstrs;
955*21f7c626SNashe Mncube   for (SUnit &ISU : DAGInstrs->SUnits) {
956*21f7c626SNashe Mncube     if (ISU.isBoundaryNode())
957*21f7c626SNashe Mncube       continue;
958*21f7c626SNashe Mncube     modifyBypasses(ISU);
959*21f7c626SNashe Mncube   }
960*21f7c626SNashe Mncube   if (DAGInstrs->ExitSU.getInstr())
961*21f7c626SNashe Mncube     modifyBypasses(DAGInstrs->ExitSU);
962*21f7c626SNashe Mncube }
963*21f7c626SNashe Mncube 
964*21f7c626SNashe Mncube std::unique_ptr<ScheduleDAGMutation>
965*21f7c626SNashe Mncube createARMLatencyMutations(const ARMSubtarget &ST, AAResults *AA) {
966*21f7c626SNashe Mncube   if (ST.isCortexM85())
967*21f7c626SNashe Mncube     return std::make_unique<M85Overrides>(ST.getInstrInfo(), AA);
968*21f7c626SNashe Mncube   else if (ST.isCortexM7())
969*21f7c626SNashe Mncube     return std::make_unique<CortexM7Overrides>(ST.getInstrInfo(), AA);
970*21f7c626SNashe Mncube   else if (ST.isCortexM55())
971*21f7c626SNashe Mncube     return std::make_unique<CortexM55Overrides>(ST.getInstrInfo(), AA);
972*21f7c626SNashe Mncube 
973*21f7c626SNashe Mncube   return nullptr;
974*21f7c626SNashe Mncube }
975*21f7c626SNashe Mncube 
976*21f7c626SNashe Mncube } // end namespace llvm
977