xref: /netbsd-src/external/apache2/llvm/dist/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp (revision 82d56013d7b633d116a93943de88e08335357a7c)
1 //===- AArch64RegisterBankInfo.cpp ----------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// This file implements the targeting of the RegisterBankInfo class for
10 /// AArch64.
11 /// \todo This should be generated by TableGen.
12 //===----------------------------------------------------------------------===//
13 
14 #include "AArch64RegisterBankInfo.h"
15 #include "AArch64InstrInfo.h"
16 #include "llvm/ADT/STLExtras.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/CodeGen/GlobalISel/RegisterBank.h"
19 #include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
20 #include "llvm/CodeGen/GlobalISel/Utils.h"
21 #include "llvm/CodeGen/LowLevelType.h"
22 #include "llvm/CodeGen/MachineFunction.h"
23 #include "llvm/CodeGen/MachineInstr.h"
24 #include "llvm/CodeGen/MachineOperand.h"
25 #include "llvm/CodeGen/MachineRegisterInfo.h"
26 #include "llvm/CodeGen/TargetOpcodes.h"
27 #include "llvm/CodeGen/TargetRegisterInfo.h"
28 #include "llvm/CodeGen/TargetSubtargetInfo.h"
29 #include "llvm/IR/IntrinsicsAArch64.h"
30 #include "llvm/Support/ErrorHandling.h"
31 #include <algorithm>
32 #include <cassert>
33 
34 #define GET_TARGET_REGBANK_IMPL
35 #include "AArch64GenRegisterBank.inc"
36 
37 // This file will be TableGen'ed at some point.
38 #include "AArch64GenRegisterBankInfo.def"
39 
40 using namespace llvm;
41 
AArch64RegisterBankInfo(const TargetRegisterInfo & TRI)42 AArch64RegisterBankInfo::AArch64RegisterBankInfo(const TargetRegisterInfo &TRI)
43     : AArch64GenRegisterBankInfo() {
44   static llvm::once_flag InitializeRegisterBankFlag;
45 
46   static auto InitializeRegisterBankOnce = [&]() {
47     // We have only one set of register banks, whatever the subtarget
48     // is. Therefore, the initialization of the RegBanks table should be
49     // done only once. Indeed the table of all register banks
50     // (AArch64::RegBanks) is unique in the compiler. At some point, it
51     // will get tablegen'ed and the whole constructor becomes empty.
52 
53     const RegisterBank &RBGPR = getRegBank(AArch64::GPRRegBankID);
54     (void)RBGPR;
55     assert(&AArch64::GPRRegBank == &RBGPR &&
56            "The order in RegBanks is messed up");
57 
58     const RegisterBank &RBFPR = getRegBank(AArch64::FPRRegBankID);
59     (void)RBFPR;
60     assert(&AArch64::FPRRegBank == &RBFPR &&
61            "The order in RegBanks is messed up");
62 
63     const RegisterBank &RBCCR = getRegBank(AArch64::CCRegBankID);
64     (void)RBCCR;
65     assert(&AArch64::CCRegBank == &RBCCR &&
66            "The order in RegBanks is messed up");
67 
68     // The GPR register bank is fully defined by all the registers in
69     // GR64all + its subclasses.
70     assert(RBGPR.covers(*TRI.getRegClass(AArch64::GPR32RegClassID)) &&
71            "Subclass not added?");
72     assert(RBGPR.getSize() == 128 && "GPRs should hold up to 128-bit");
73 
74     // The FPR register bank is fully defined by all the registers in
75     // GR64all + its subclasses.
76     assert(RBFPR.covers(*TRI.getRegClass(AArch64::QQRegClassID)) &&
77            "Subclass not added?");
78     assert(RBFPR.covers(*TRI.getRegClass(AArch64::FPR64RegClassID)) &&
79            "Subclass not added?");
80     assert(RBFPR.getSize() == 512 &&
81            "FPRs should hold up to 512-bit via QQQQ sequence");
82 
83     assert(RBCCR.covers(*TRI.getRegClass(AArch64::CCRRegClassID)) &&
84            "Class not added?");
85     assert(RBCCR.getSize() == 32 && "CCR should hold up to 32-bit");
86 
87     // Check that the TableGen'ed like file is in sync we our expectations.
88     // First, the Idx.
89     assert(checkPartialMappingIdx(PMI_FirstGPR, PMI_LastGPR,
90                                   {PMI_GPR32, PMI_GPR64, PMI_GPR128}) &&
91            "PartialMappingIdx's are incorrectly ordered");
92     assert(checkPartialMappingIdx(PMI_FirstFPR, PMI_LastFPR,
93                                   {PMI_FPR16, PMI_FPR32, PMI_FPR64, PMI_FPR128,
94                                    PMI_FPR256, PMI_FPR512}) &&
95            "PartialMappingIdx's are incorrectly ordered");
96 // Now, the content.
97 // Check partial mapping.
98 #define CHECK_PARTIALMAP(Idx, ValStartIdx, ValLength, RB)                      \
99   do {                                                                         \
100     assert(                                                                    \
101         checkPartialMap(PartialMappingIdx::Idx, ValStartIdx, ValLength, RB) && \
102         #Idx " is incorrectly initialized");                                   \
103   } while (false)
104 
105     CHECK_PARTIALMAP(PMI_GPR32, 0, 32, RBGPR);
106     CHECK_PARTIALMAP(PMI_GPR64, 0, 64, RBGPR);
107     CHECK_PARTIALMAP(PMI_GPR128, 0, 128, RBGPR);
108     CHECK_PARTIALMAP(PMI_FPR16, 0, 16, RBFPR);
109     CHECK_PARTIALMAP(PMI_FPR32, 0, 32, RBFPR);
110     CHECK_PARTIALMAP(PMI_FPR64, 0, 64, RBFPR);
111     CHECK_PARTIALMAP(PMI_FPR128, 0, 128, RBFPR);
112     CHECK_PARTIALMAP(PMI_FPR256, 0, 256, RBFPR);
113     CHECK_PARTIALMAP(PMI_FPR512, 0, 512, RBFPR);
114 
115 // Check value mapping.
116 #define CHECK_VALUEMAP_IMPL(RBName, Size, Offset)                              \
117   do {                                                                         \
118     assert(checkValueMapImpl(PartialMappingIdx::PMI_##RBName##Size,            \
119                              PartialMappingIdx::PMI_First##RBName, Size,       \
120                              Offset) &&                                        \
121            #RBName #Size " " #Offset " is incorrectly initialized");           \
122   } while (false)
123 
124 #define CHECK_VALUEMAP(RBName, Size) CHECK_VALUEMAP_IMPL(RBName, Size, 0)
125 
126     CHECK_VALUEMAP(GPR, 32);
127     CHECK_VALUEMAP(GPR, 64);
128     CHECK_VALUEMAP(GPR, 128);
129     CHECK_VALUEMAP(FPR, 16);
130     CHECK_VALUEMAP(FPR, 32);
131     CHECK_VALUEMAP(FPR, 64);
132     CHECK_VALUEMAP(FPR, 128);
133     CHECK_VALUEMAP(FPR, 256);
134     CHECK_VALUEMAP(FPR, 512);
135 
136 // Check the value mapping for 3-operands instructions where all the operands
137 // map to the same value mapping.
138 #define CHECK_VALUEMAP_3OPS(RBName, Size)                                      \
139   do {                                                                         \
140     CHECK_VALUEMAP_IMPL(RBName, Size, 0);                                      \
141     CHECK_VALUEMAP_IMPL(RBName, Size, 1);                                      \
142     CHECK_VALUEMAP_IMPL(RBName, Size, 2);                                      \
143   } while (false)
144 
145     CHECK_VALUEMAP_3OPS(GPR, 32);
146     CHECK_VALUEMAP_3OPS(GPR, 64);
147     CHECK_VALUEMAP_3OPS(GPR, 128);
148     CHECK_VALUEMAP_3OPS(FPR, 32);
149     CHECK_VALUEMAP_3OPS(FPR, 64);
150     CHECK_VALUEMAP_3OPS(FPR, 128);
151     CHECK_VALUEMAP_3OPS(FPR, 256);
152     CHECK_VALUEMAP_3OPS(FPR, 512);
153 
154 #define CHECK_VALUEMAP_CROSSREGCPY(RBNameDst, RBNameSrc, Size)                 \
155   do {                                                                         \
156     unsigned PartialMapDstIdx = PMI_##RBNameDst##Size - PMI_Min;               \
157     unsigned PartialMapSrcIdx = PMI_##RBNameSrc##Size - PMI_Min;               \
158     (void)PartialMapDstIdx;                                                    \
159     (void)PartialMapSrcIdx;                                                    \
160     const ValueMapping *Map = getCopyMapping(                                  \
161         AArch64::RBNameDst##RegBankID, AArch64::RBNameSrc##RegBankID, Size);  \
162     (void)Map;                                                                 \
163     assert(Map[0].BreakDown ==                                                 \
164                &AArch64GenRegisterBankInfo::PartMappings[PartialMapDstIdx] &&  \
165            Map[0].NumBreakDowns == 1 && #RBNameDst #Size                       \
166            " Dst is incorrectly initialized");                                 \
167     assert(Map[1].BreakDown ==                                                 \
168                &AArch64GenRegisterBankInfo::PartMappings[PartialMapSrcIdx] &&  \
169            Map[1].NumBreakDowns == 1 && #RBNameSrc #Size                       \
170            " Src is incorrectly initialized");                                 \
171                                                                                \
172   } while (false)
173 
174     CHECK_VALUEMAP_CROSSREGCPY(GPR, GPR, 32);
175     CHECK_VALUEMAP_CROSSREGCPY(GPR, FPR, 32);
176     CHECK_VALUEMAP_CROSSREGCPY(GPR, GPR, 64);
177     CHECK_VALUEMAP_CROSSREGCPY(GPR, FPR, 64);
178     CHECK_VALUEMAP_CROSSREGCPY(FPR, FPR, 32);
179     CHECK_VALUEMAP_CROSSREGCPY(FPR, GPR, 32);
180     CHECK_VALUEMAP_CROSSREGCPY(FPR, FPR, 64);
181     CHECK_VALUEMAP_CROSSREGCPY(FPR, GPR, 64);
182 
183 #define CHECK_VALUEMAP_FPEXT(DstSize, SrcSize)                                 \
184   do {                                                                         \
185     unsigned PartialMapDstIdx = PMI_FPR##DstSize - PMI_Min;                    \
186     unsigned PartialMapSrcIdx = PMI_FPR##SrcSize - PMI_Min;                    \
187     (void)PartialMapDstIdx;                                                    \
188     (void)PartialMapSrcIdx;                                                    \
189     const ValueMapping *Map = getFPExtMapping(DstSize, SrcSize);               \
190     (void)Map;                                                                 \
191     assert(Map[0].BreakDown ==                                                 \
192                &AArch64GenRegisterBankInfo::PartMappings[PartialMapDstIdx] &&  \
193            Map[0].NumBreakDowns == 1 && "FPR" #DstSize                         \
194                                         " Dst is incorrectly initialized");    \
195     assert(Map[1].BreakDown ==                                                 \
196                &AArch64GenRegisterBankInfo::PartMappings[PartialMapSrcIdx] &&  \
197            Map[1].NumBreakDowns == 1 && "FPR" #SrcSize                         \
198                                         " Src is incorrectly initialized");    \
199                                                                                \
200   } while (false)
201 
202     CHECK_VALUEMAP_FPEXT(32, 16);
203     CHECK_VALUEMAP_FPEXT(64, 16);
204     CHECK_VALUEMAP_FPEXT(64, 32);
205     CHECK_VALUEMAP_FPEXT(128, 64);
206 
207     assert(verify(TRI) && "Invalid register bank information");
208   };
209 
210   llvm::call_once(InitializeRegisterBankFlag, InitializeRegisterBankOnce);
211 }
212 
copyCost(const RegisterBank & A,const RegisterBank & B,unsigned Size) const213 unsigned AArch64RegisterBankInfo::copyCost(const RegisterBank &A,
214                                            const RegisterBank &B,
215                                            unsigned Size) const {
216   // What do we do with different size?
217   // copy are same size.
218   // Will introduce other hooks for different size:
219   // * extract cost.
220   // * build_sequence cost.
221 
222   // Copy from (resp. to) GPR to (resp. from) FPR involves FMOV.
223   // FIXME: This should be deduced from the scheduling model.
224   if (&A == &AArch64::GPRRegBank && &B == &AArch64::FPRRegBank)
225     // FMOVXDr or FMOVWSr.
226     return 5;
227   if (&A == &AArch64::FPRRegBank && &B == &AArch64::GPRRegBank)
228     // FMOVDXr or FMOVSWr.
229     return 4;
230 
231   return RegisterBankInfo::copyCost(A, B, Size);
232 }
233 
234 const RegisterBank &
getRegBankFromRegClass(const TargetRegisterClass & RC,LLT) const235 AArch64RegisterBankInfo::getRegBankFromRegClass(const TargetRegisterClass &RC,
236                                                 LLT) const {
237   switch (RC.getID()) {
238   case AArch64::FPR8RegClassID:
239   case AArch64::FPR16RegClassID:
240   case AArch64::FPR16_loRegClassID:
241   case AArch64::FPR32_with_hsub_in_FPR16_loRegClassID:
242   case AArch64::FPR32RegClassID:
243   case AArch64::FPR64RegClassID:
244   case AArch64::FPR64_loRegClassID:
245   case AArch64::FPR128RegClassID:
246   case AArch64::FPR128_loRegClassID:
247   case AArch64::DDRegClassID:
248   case AArch64::DDDRegClassID:
249   case AArch64::DDDDRegClassID:
250   case AArch64::QQRegClassID:
251   case AArch64::QQQRegClassID:
252   case AArch64::QQQQRegClassID:
253     return getRegBank(AArch64::FPRRegBankID);
254   case AArch64::GPR32commonRegClassID:
255   case AArch64::GPR32RegClassID:
256   case AArch64::GPR32spRegClassID:
257   case AArch64::GPR32sponlyRegClassID:
258   case AArch64::GPR32argRegClassID:
259   case AArch64::GPR32allRegClassID:
260   case AArch64::GPR64commonRegClassID:
261   case AArch64::GPR64RegClassID:
262   case AArch64::GPR64spRegClassID:
263   case AArch64::GPR64sponlyRegClassID:
264   case AArch64::GPR64argRegClassID:
265   case AArch64::GPR64allRegClassID:
266   case AArch64::GPR64noipRegClassID:
267   case AArch64::GPR64common_and_GPR64noipRegClassID:
268   case AArch64::GPR64noip_and_tcGPR64RegClassID:
269   case AArch64::tcGPR64RegClassID:
270   case AArch64::rtcGPR64RegClassID:
271   case AArch64::WSeqPairsClassRegClassID:
272   case AArch64::XSeqPairsClassRegClassID:
273     return getRegBank(AArch64::GPRRegBankID);
274   case AArch64::CCRRegClassID:
275     return getRegBank(AArch64::CCRegBankID);
276   default:
277     llvm_unreachable("Register class not supported");
278   }
279 }
280 
281 RegisterBankInfo::InstructionMappings
getInstrAlternativeMappings(const MachineInstr & MI) const282 AArch64RegisterBankInfo::getInstrAlternativeMappings(
283     const MachineInstr &MI) const {
284   const MachineFunction &MF = *MI.getParent()->getParent();
285   const TargetSubtargetInfo &STI = MF.getSubtarget();
286   const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
287   const MachineRegisterInfo &MRI = MF.getRegInfo();
288 
289   switch (MI.getOpcode()) {
290   case TargetOpcode::G_OR: {
291     // 32 and 64-bit or can be mapped on either FPR or
292     // GPR for the same cost.
293     unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, TRI);
294     if (Size != 32 && Size != 64)
295       break;
296 
297     // If the instruction has any implicit-defs or uses,
298     // do not mess with it.
299     if (MI.getNumOperands() != 3)
300       break;
301     InstructionMappings AltMappings;
302     const InstructionMapping &GPRMapping = getInstructionMapping(
303         /*ID*/ 1, /*Cost*/ 1, getValueMapping(PMI_FirstGPR, Size),
304         /*NumOperands*/ 3);
305     const InstructionMapping &FPRMapping = getInstructionMapping(
306         /*ID*/ 2, /*Cost*/ 1, getValueMapping(PMI_FirstFPR, Size),
307         /*NumOperands*/ 3);
308 
309     AltMappings.push_back(&GPRMapping);
310     AltMappings.push_back(&FPRMapping);
311     return AltMappings;
312   }
313   case TargetOpcode::G_BITCAST: {
314     unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, TRI);
315     if (Size != 32 && Size != 64)
316       break;
317 
318     // If the instruction has any implicit-defs or uses,
319     // do not mess with it.
320     if (MI.getNumOperands() != 2)
321       break;
322 
323     InstructionMappings AltMappings;
324     const InstructionMapping &GPRMapping = getInstructionMapping(
325         /*ID*/ 1, /*Cost*/ 1,
326         getCopyMapping(AArch64::GPRRegBankID, AArch64::GPRRegBankID, Size),
327         /*NumOperands*/ 2);
328     const InstructionMapping &FPRMapping = getInstructionMapping(
329         /*ID*/ 2, /*Cost*/ 1,
330         getCopyMapping(AArch64::FPRRegBankID, AArch64::FPRRegBankID, Size),
331         /*NumOperands*/ 2);
332     const InstructionMapping &GPRToFPRMapping = getInstructionMapping(
333         /*ID*/ 3,
334         /*Cost*/ copyCost(AArch64::GPRRegBank, AArch64::FPRRegBank, Size),
335         getCopyMapping(AArch64::FPRRegBankID, AArch64::GPRRegBankID, Size),
336         /*NumOperands*/ 2);
337     const InstructionMapping &FPRToGPRMapping = getInstructionMapping(
338         /*ID*/ 3,
339         /*Cost*/ copyCost(AArch64::GPRRegBank, AArch64::FPRRegBank, Size),
340         getCopyMapping(AArch64::GPRRegBankID, AArch64::FPRRegBankID, Size),
341         /*NumOperands*/ 2);
342 
343     AltMappings.push_back(&GPRMapping);
344     AltMappings.push_back(&FPRMapping);
345     AltMappings.push_back(&GPRToFPRMapping);
346     AltMappings.push_back(&FPRToGPRMapping);
347     return AltMappings;
348   }
349   case TargetOpcode::G_LOAD: {
350     unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, TRI);
351     if (Size != 64)
352       break;
353 
354     // If the instruction has any implicit-defs or uses,
355     // do not mess with it.
356     if (MI.getNumOperands() != 2)
357       break;
358 
359     InstructionMappings AltMappings;
360     const InstructionMapping &GPRMapping = getInstructionMapping(
361         /*ID*/ 1, /*Cost*/ 1,
362         getOperandsMapping({getValueMapping(PMI_FirstGPR, Size),
363                             // Addresses are GPR 64-bit.
364                             getValueMapping(PMI_FirstGPR, 64)}),
365         /*NumOperands*/ 2);
366     const InstructionMapping &FPRMapping = getInstructionMapping(
367         /*ID*/ 2, /*Cost*/ 1,
368         getOperandsMapping({getValueMapping(PMI_FirstFPR, Size),
369                             // Addresses are GPR 64-bit.
370                             getValueMapping(PMI_FirstGPR, 64)}),
371         /*NumOperands*/ 2);
372 
373     AltMappings.push_back(&GPRMapping);
374     AltMappings.push_back(&FPRMapping);
375     return AltMappings;
376   }
377   default:
378     break;
379   }
380   return RegisterBankInfo::getInstrAlternativeMappings(MI);
381 }
382 
applyMappingImpl(const OperandsMapper & OpdMapper) const383 void AArch64RegisterBankInfo::applyMappingImpl(
384     const OperandsMapper &OpdMapper) const {
385   switch (OpdMapper.getMI().getOpcode()) {
386   case TargetOpcode::G_OR:
387   case TargetOpcode::G_BITCAST:
388   case TargetOpcode::G_LOAD:
389     // Those ID must match getInstrAlternativeMappings.
390     assert((OpdMapper.getInstrMapping().getID() >= 1 &&
391             OpdMapper.getInstrMapping().getID() <= 4) &&
392            "Don't know how to handle that ID");
393     return applyDefaultMapping(OpdMapper);
394   default:
395     llvm_unreachable("Don't know how to handle that operation");
396   }
397 }
398 
399 /// Returns whether opcode \p Opc is a pre-isel generic floating-point opcode,
400 /// having only floating-point operands.
isPreISelGenericFloatingPointOpcode(unsigned Opc)401 static bool isPreISelGenericFloatingPointOpcode(unsigned Opc) {
402   switch (Opc) {
403   case TargetOpcode::G_FADD:
404   case TargetOpcode::G_FSUB:
405   case TargetOpcode::G_FMUL:
406   case TargetOpcode::G_FMA:
407   case TargetOpcode::G_FDIV:
408   case TargetOpcode::G_FCONSTANT:
409   case TargetOpcode::G_FPEXT:
410   case TargetOpcode::G_FPTRUNC:
411   case TargetOpcode::G_FCEIL:
412   case TargetOpcode::G_FFLOOR:
413   case TargetOpcode::G_FNEARBYINT:
414   case TargetOpcode::G_FNEG:
415   case TargetOpcode::G_FCOS:
416   case TargetOpcode::G_FSIN:
417   case TargetOpcode::G_FLOG10:
418   case TargetOpcode::G_FLOG:
419   case TargetOpcode::G_FLOG2:
420   case TargetOpcode::G_FSQRT:
421   case TargetOpcode::G_FABS:
422   case TargetOpcode::G_FEXP:
423   case TargetOpcode::G_FRINT:
424   case TargetOpcode::G_INTRINSIC_TRUNC:
425   case TargetOpcode::G_INTRINSIC_ROUND:
426     return true;
427   }
428   return false;
429 }
430 
431 const RegisterBankInfo::InstructionMapping &
getSameKindOfOperandsMapping(const MachineInstr & MI) const432 AArch64RegisterBankInfo::getSameKindOfOperandsMapping(
433     const MachineInstr &MI) const {
434   const unsigned Opc = MI.getOpcode();
435   const MachineFunction &MF = *MI.getParent()->getParent();
436   const MachineRegisterInfo &MRI = MF.getRegInfo();
437 
438   unsigned NumOperands = MI.getNumOperands();
439   assert(NumOperands <= 3 &&
440          "This code is for instructions with 3 or less operands");
441 
442   LLT Ty = MRI.getType(MI.getOperand(0).getReg());
443   unsigned Size = Ty.getSizeInBits();
444   bool IsFPR = Ty.isVector() || isPreISelGenericFloatingPointOpcode(Opc);
445 
446   PartialMappingIdx RBIdx = IsFPR ? PMI_FirstFPR : PMI_FirstGPR;
447 
448 #ifndef NDEBUG
449   // Make sure all the operands are using similar size and type.
450   // Should probably be checked by the machine verifier.
451   // This code won't catch cases where the number of lanes is
452   // different between the operands.
453   // If we want to go to that level of details, it is probably
454   // best to check that the types are the same, period.
455   // Currently, we just check that the register banks are the same
456   // for each types.
457   for (unsigned Idx = 1; Idx != NumOperands; ++Idx) {
458     LLT OpTy = MRI.getType(MI.getOperand(Idx).getReg());
459     assert(
460         AArch64GenRegisterBankInfo::getRegBankBaseIdxOffset(
461             RBIdx, OpTy.getSizeInBits()) ==
462             AArch64GenRegisterBankInfo::getRegBankBaseIdxOffset(RBIdx, Size) &&
463         "Operand has incompatible size");
464     bool OpIsFPR = OpTy.isVector() || isPreISelGenericFloatingPointOpcode(Opc);
465     (void)OpIsFPR;
466     assert(IsFPR == OpIsFPR && "Operand has incompatible type");
467   }
468 #endif // End NDEBUG.
469 
470   return getInstructionMapping(DefaultMappingID, 1,
471                                getValueMapping(RBIdx, Size), NumOperands);
472 }
473 
474 /// \returns true if a given intrinsic \p ID only uses and defines FPRs.
isFPIntrinsic(unsigned ID)475 static bool isFPIntrinsic(unsigned ID) {
476   // TODO: Add more intrinsics.
477   switch (ID) {
478   default:
479     return false;
480   case Intrinsic::aarch64_neon_uaddlv:
481     return true;
482   }
483 }
484 
hasFPConstraints(const MachineInstr & MI,const MachineRegisterInfo & MRI,const TargetRegisterInfo & TRI,unsigned Depth) const485 bool AArch64RegisterBankInfo::hasFPConstraints(const MachineInstr &MI,
486                                                const MachineRegisterInfo &MRI,
487                                                const TargetRegisterInfo &TRI,
488                                                unsigned Depth) const {
489   unsigned Op = MI.getOpcode();
490   if (Op == TargetOpcode::G_INTRINSIC && isFPIntrinsic(getIntrinsicID(MI)))
491     return true;
492 
493   // Do we have an explicit floating point instruction?
494   if (isPreISelGenericFloatingPointOpcode(Op))
495     return true;
496 
497   // No. Check if we have a copy-like instruction. If we do, then we could
498   // still be fed by floating point instructions.
499   if (Op != TargetOpcode::COPY && !MI.isPHI() &&
500       !isPreISelGenericOptimizationHint(Op))
501     return false;
502 
503   // Check if we already know the register bank.
504   auto *RB = getRegBank(MI.getOperand(0).getReg(), MRI, TRI);
505   if (RB == &AArch64::FPRRegBank)
506     return true;
507   if (RB == &AArch64::GPRRegBank)
508     return false;
509 
510   // We don't know anything.
511   //
512   // If we have a phi, we may be able to infer that it will be assigned a FPR
513   // based off of its inputs.
514   if (!MI.isPHI() || Depth > MaxFPRSearchDepth)
515     return false;
516 
517   return any_of(MI.explicit_uses(), [&](const MachineOperand &Op) {
518     return Op.isReg() &&
519            onlyDefinesFP(*MRI.getVRegDef(Op.getReg()), MRI, TRI, Depth + 1);
520   });
521 }
522 
onlyUsesFP(const MachineInstr & MI,const MachineRegisterInfo & MRI,const TargetRegisterInfo & TRI,unsigned Depth) const523 bool AArch64RegisterBankInfo::onlyUsesFP(const MachineInstr &MI,
524                                          const MachineRegisterInfo &MRI,
525                                          const TargetRegisterInfo &TRI,
526                                          unsigned Depth) const {
527   switch (MI.getOpcode()) {
528   case TargetOpcode::G_FPTOSI:
529   case TargetOpcode::G_FPTOUI:
530   case TargetOpcode::G_FCMP:
531     return true;
532   default:
533     break;
534   }
535   return hasFPConstraints(MI, MRI, TRI, Depth);
536 }
537 
onlyDefinesFP(const MachineInstr & MI,const MachineRegisterInfo & MRI,const TargetRegisterInfo & TRI,unsigned Depth) const538 bool AArch64RegisterBankInfo::onlyDefinesFP(const MachineInstr &MI,
539                                             const MachineRegisterInfo &MRI,
540                                             const TargetRegisterInfo &TRI,
541                                             unsigned Depth) const {
542   switch (MI.getOpcode()) {
543   case AArch64::G_DUP:
544   case TargetOpcode::G_SITOFP:
545   case TargetOpcode::G_UITOFP:
546   case TargetOpcode::G_EXTRACT_VECTOR_ELT:
547   case TargetOpcode::G_INSERT_VECTOR_ELT:
548   case TargetOpcode::G_BUILD_VECTOR:
549   case TargetOpcode::G_BUILD_VECTOR_TRUNC:
550     return true;
551   default:
552     break;
553   }
554   return hasFPConstraints(MI, MRI, TRI, Depth);
555 }
556 
557 const RegisterBankInfo::InstructionMapping &
getInstrMapping(const MachineInstr & MI) const558 AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
559   const unsigned Opc = MI.getOpcode();
560 
561   // Try the default logic for non-generic instructions that are either copies
562   // or already have some operands assigned to banks.
563   if ((Opc != TargetOpcode::COPY && !isPreISelGenericOpcode(Opc)) ||
564       Opc == TargetOpcode::G_PHI) {
565     const RegisterBankInfo::InstructionMapping &Mapping =
566         getInstrMappingImpl(MI);
567     if (Mapping.isValid())
568       return Mapping;
569   }
570 
571   const MachineFunction &MF = *MI.getParent()->getParent();
572   const MachineRegisterInfo &MRI = MF.getRegInfo();
573   const TargetSubtargetInfo &STI = MF.getSubtarget();
574   const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
575 
576   switch (Opc) {
577     // G_{F|S|U}REM are not listed because they are not legal.
578     // Arithmetic ops.
579   case TargetOpcode::G_ADD:
580   case TargetOpcode::G_SUB:
581   case TargetOpcode::G_PTR_ADD:
582   case TargetOpcode::G_MUL:
583   case TargetOpcode::G_SDIV:
584   case TargetOpcode::G_UDIV:
585     // Bitwise ops.
586   case TargetOpcode::G_AND:
587   case TargetOpcode::G_OR:
588   case TargetOpcode::G_XOR:
589     // Floating point ops.
590   case TargetOpcode::G_FADD:
591   case TargetOpcode::G_FSUB:
592   case TargetOpcode::G_FMUL:
593   case TargetOpcode::G_FDIV:
594     return getSameKindOfOperandsMapping(MI);
595   case TargetOpcode::G_FPEXT: {
596     LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
597     LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
598     return getInstructionMapping(
599         DefaultMappingID, /*Cost*/ 1,
600         getFPExtMapping(DstTy.getSizeInBits(), SrcTy.getSizeInBits()),
601         /*NumOperands*/ 2);
602   }
603     // Shifts.
604   case TargetOpcode::G_SHL:
605   case TargetOpcode::G_LSHR:
606   case TargetOpcode::G_ASHR: {
607     LLT ShiftAmtTy = MRI.getType(MI.getOperand(2).getReg());
608     LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
609     if (ShiftAmtTy.getSizeInBits() == 64 && SrcTy.getSizeInBits() == 32)
610       return getInstructionMapping(DefaultMappingID, 1,
611                                    &ValMappings[Shift64Imm], 3);
612     return getSameKindOfOperandsMapping(MI);
613   }
614   case TargetOpcode::COPY: {
615     Register DstReg = MI.getOperand(0).getReg();
616     Register SrcReg = MI.getOperand(1).getReg();
617     // Check if one of the register is not a generic register.
618     if ((Register::isPhysicalRegister(DstReg) ||
619          !MRI.getType(DstReg).isValid()) ||
620         (Register::isPhysicalRegister(SrcReg) ||
621          !MRI.getType(SrcReg).isValid())) {
622       const RegisterBank *DstRB = getRegBank(DstReg, MRI, TRI);
623       const RegisterBank *SrcRB = getRegBank(SrcReg, MRI, TRI);
624       if (!DstRB)
625         DstRB = SrcRB;
626       else if (!SrcRB)
627         SrcRB = DstRB;
628       // If both RB are null that means both registers are generic.
629       // We shouldn't be here.
630       assert(DstRB && SrcRB && "Both RegBank were nullptr");
631       unsigned Size = getSizeInBits(DstReg, MRI, TRI);
632       return getInstructionMapping(
633           DefaultMappingID, copyCost(*DstRB, *SrcRB, Size),
634           getCopyMapping(DstRB->getID(), SrcRB->getID(), Size),
635           // We only care about the mapping of the destination.
636           /*NumOperands*/ 1);
637     }
638     // Both registers are generic, use G_BITCAST.
639     LLVM_FALLTHROUGH;
640   }
641   case TargetOpcode::G_BITCAST: {
642     LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
643     LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
644     unsigned Size = DstTy.getSizeInBits();
645     bool DstIsGPR = !DstTy.isVector() && DstTy.getSizeInBits() <= 64;
646     bool SrcIsGPR = !SrcTy.isVector() && SrcTy.getSizeInBits() <= 64;
647     const RegisterBank &DstRB =
648         DstIsGPR ? AArch64::GPRRegBank : AArch64::FPRRegBank;
649     const RegisterBank &SrcRB =
650         SrcIsGPR ? AArch64::GPRRegBank : AArch64::FPRRegBank;
651     return getInstructionMapping(
652         DefaultMappingID, copyCost(DstRB, SrcRB, Size),
653         getCopyMapping(DstRB.getID(), SrcRB.getID(), Size),
654         // We only care about the mapping of the destination for COPY.
655         /*NumOperands*/ Opc == TargetOpcode::G_BITCAST ? 2 : 1);
656   }
657   default:
658     break;
659   }
660 
661   unsigned NumOperands = MI.getNumOperands();
662 
663   // Track the size and bank of each register.  We don't do partial mappings.
664   SmallVector<unsigned, 4> OpSize(NumOperands);
665   SmallVector<PartialMappingIdx, 4> OpRegBankIdx(NumOperands);
666   for (unsigned Idx = 0; Idx < NumOperands; ++Idx) {
667     auto &MO = MI.getOperand(Idx);
668     if (!MO.isReg() || !MO.getReg())
669       continue;
670 
671     LLT Ty = MRI.getType(MO.getReg());
672     OpSize[Idx] = Ty.getSizeInBits();
673 
674     // As a top-level guess, vectors go in FPRs, scalars and pointers in GPRs.
675     // For floating-point instructions, scalars go in FPRs.
676     if (Ty.isVector() || isPreISelGenericFloatingPointOpcode(Opc) ||
677         Ty.getSizeInBits() > 64)
678       OpRegBankIdx[Idx] = PMI_FirstFPR;
679     else
680       OpRegBankIdx[Idx] = PMI_FirstGPR;
681   }
682 
683   unsigned Cost = 1;
684   // Some of the floating-point instructions have mixed GPR and FPR operands:
685   // fine-tune the computed mapping.
686   switch (Opc) {
687   case AArch64::G_DUP: {
688     Register ScalarReg = MI.getOperand(1).getReg();
689     LLT ScalarTy = MRI.getType(ScalarReg);
690     auto ScalarDef = MRI.getVRegDef(ScalarReg);
691     // s8 is an exception for G_DUP, which we always want on gpr.
692     if (ScalarTy.getSizeInBits() != 8 &&
693         (getRegBank(ScalarReg, MRI, TRI) == &AArch64::FPRRegBank ||
694          onlyDefinesFP(*ScalarDef, MRI, TRI)))
695       OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
696     else
697       OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR};
698     break;
699   }
700   case TargetOpcode::G_TRUNC: {
701     LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
702     if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 128)
703       OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
704     break;
705   }
706   case TargetOpcode::G_SITOFP:
707   case TargetOpcode::G_UITOFP: {
708     if (MRI.getType(MI.getOperand(0).getReg()).isVector())
709       break;
710     // Integer to FP conversions don't necessarily happen between GPR -> FPR
711     // regbanks. They can also be done within an FPR register.
712     Register SrcReg = MI.getOperand(1).getReg();
713     if (getRegBank(SrcReg, MRI, TRI) == &AArch64::FPRRegBank)
714       OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
715     else
716       OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR};
717     break;
718   }
719   case TargetOpcode::G_FPTOSI:
720   case TargetOpcode::G_FPTOUI:
721     if (MRI.getType(MI.getOperand(0).getReg()).isVector())
722       break;
723     OpRegBankIdx = {PMI_FirstGPR, PMI_FirstFPR};
724     break;
725   case TargetOpcode::G_FCMP: {
726     // If the result is a vector, it must use a FPR.
727     AArch64GenRegisterBankInfo::PartialMappingIdx Idx0 =
728         MRI.getType(MI.getOperand(0).getReg()).isVector() ? PMI_FirstFPR
729                                                           : PMI_FirstGPR;
730     OpRegBankIdx = {Idx0,
731                     /* Predicate */ PMI_None, PMI_FirstFPR, PMI_FirstFPR};
732     break;
733   }
734   case TargetOpcode::G_BITCAST:
735     // This is going to be a cross register bank copy and this is expensive.
736     if (OpRegBankIdx[0] != OpRegBankIdx[1])
737       Cost = copyCost(
738           *AArch64GenRegisterBankInfo::PartMappings[OpRegBankIdx[0]].RegBank,
739           *AArch64GenRegisterBankInfo::PartMappings[OpRegBankIdx[1]].RegBank,
740           OpSize[0]);
741     break;
742   case TargetOpcode::G_LOAD:
743     // Loading in vector unit is slightly more expensive.
744     // This is actually only true for the LD1R and co instructions,
745     // but anyway for the fast mode this number does not matter and
746     // for the greedy mode the cost of the cross bank copy will
747     // offset this number.
748     // FIXME: Should be derived from the scheduling model.
749     if (OpRegBankIdx[0] != PMI_FirstGPR)
750       Cost = 2;
751     else
752       // Check if that load feeds fp instructions.
753       // In that case, we want the default mapping to be on FPR
754       // instead of blind map every scalar to GPR.
755       for (const MachineInstr &UseMI :
756            MRI.use_nodbg_instructions(MI.getOperand(0).getReg())) {
757         // If we have at least one direct use in a FP instruction,
758         // assume this was a floating point load in the IR.
759         // If it was not, we would have had a bitcast before
760         // reaching that instruction.
761         // Int->FP conversion operations are also captured in onlyDefinesFP().
762         if (onlyUsesFP(UseMI, MRI, TRI) || onlyDefinesFP(UseMI, MRI, TRI)) {
763           OpRegBankIdx[0] = PMI_FirstFPR;
764           break;
765         }
766       }
767     break;
768   case TargetOpcode::G_STORE:
769     // Check if that store is fed by fp instructions.
770     if (OpRegBankIdx[0] == PMI_FirstGPR) {
771       Register VReg = MI.getOperand(0).getReg();
772       if (!VReg)
773         break;
774       MachineInstr *DefMI = MRI.getVRegDef(VReg);
775       if (onlyDefinesFP(*DefMI, MRI, TRI))
776         OpRegBankIdx[0] = PMI_FirstFPR;
777       break;
778     }
779     break;
780   case TargetOpcode::G_SELECT: {
781     // If the destination is FPR, preserve that.
782     if (OpRegBankIdx[0] != PMI_FirstGPR)
783       break;
784 
785     // If we're taking in vectors, we have no choice but to put everything on
786     // FPRs, except for the condition. The condition must always be on a GPR.
787     LLT SrcTy = MRI.getType(MI.getOperand(2).getReg());
788     if (SrcTy.isVector()) {
789       OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR, PMI_FirstFPR, PMI_FirstFPR};
790       break;
791     }
792 
793     // Try to minimize the number of copies. If we have more floating point
794     // constrained values than not, then we'll put everything on FPR. Otherwise,
795     // everything has to be on GPR.
796     unsigned NumFP = 0;
797 
798     // Check if the uses of the result always produce floating point values.
799     //
800     // For example:
801     //
802     // %z = G_SELECT %cond %x %y
803     // fpr = G_FOO %z ...
804     if (any_of(MRI.use_nodbg_instructions(MI.getOperand(0).getReg()),
805                [&](MachineInstr &MI) { return onlyUsesFP(MI, MRI, TRI); }))
806       ++NumFP;
807 
808     // Check if the defs of the source values always produce floating point
809     // values.
810     //
811     // For example:
812     //
813     // %x = G_SOMETHING_ALWAYS_FLOAT %a ...
814     // %z = G_SELECT %cond %x %y
815     //
816     // Also check whether or not the sources have already been decided to be
817     // FPR. Keep track of this.
818     //
819     // This doesn't check the condition, since it's just whatever is in NZCV.
820     // This isn't passed explicitly in a register to fcsel/csel.
821     for (unsigned Idx = 2; Idx < 4; ++Idx) {
822       Register VReg = MI.getOperand(Idx).getReg();
823       MachineInstr *DefMI = MRI.getVRegDef(VReg);
824       if (getRegBank(VReg, MRI, TRI) == &AArch64::FPRRegBank ||
825           onlyDefinesFP(*DefMI, MRI, TRI))
826         ++NumFP;
827     }
828 
829     // If we have more FP constraints than not, then move everything over to
830     // FPR.
831     if (NumFP >= 2)
832       OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR, PMI_FirstFPR, PMI_FirstFPR};
833 
834     break;
835   }
836   case TargetOpcode::G_UNMERGE_VALUES: {
837     // If the first operand belongs to a FPR register bank, then make sure that
838     // we preserve that.
839     if (OpRegBankIdx[0] != PMI_FirstGPR)
840       break;
841 
842     LLT SrcTy = MRI.getType(MI.getOperand(MI.getNumOperands()-1).getReg());
843     // UNMERGE into scalars from a vector should always use FPR.
844     // Likewise if any of the uses are FP instructions.
845     if (SrcTy.isVector() || SrcTy == LLT::scalar(128) ||
846         any_of(MRI.use_nodbg_instructions(MI.getOperand(0).getReg()),
847                [&](MachineInstr &MI) { return onlyUsesFP(MI, MRI, TRI); })) {
848       // Set the register bank of every operand to FPR.
849       for (unsigned Idx = 0, NumOperands = MI.getNumOperands();
850            Idx < NumOperands; ++Idx)
851         OpRegBankIdx[Idx] = PMI_FirstFPR;
852     }
853     break;
854   }
855   case TargetOpcode::G_EXTRACT_VECTOR_ELT:
856     // Destination and source need to be FPRs.
857     OpRegBankIdx[0] = PMI_FirstFPR;
858     OpRegBankIdx[1] = PMI_FirstFPR;
859 
860     // Index needs to be a GPR.
861     OpRegBankIdx[2] = PMI_FirstGPR;
862     break;
863   case TargetOpcode::G_INSERT_VECTOR_ELT:
864     OpRegBankIdx[0] = PMI_FirstFPR;
865     OpRegBankIdx[1] = PMI_FirstFPR;
866 
867     // The element may be either a GPR or FPR. Preserve that behaviour.
868     if (getRegBank(MI.getOperand(2).getReg(), MRI, TRI) == &AArch64::FPRRegBank)
869       OpRegBankIdx[2] = PMI_FirstFPR;
870     else
871       OpRegBankIdx[2] = PMI_FirstGPR;
872 
873     // Index needs to be a GPR.
874     OpRegBankIdx[3] = PMI_FirstGPR;
875     break;
876   case TargetOpcode::G_EXTRACT: {
877     // For s128 sources we have to use fpr unless we know otherwise.
878     auto Src = MI.getOperand(1).getReg();
879     LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
880     if (SrcTy.getSizeInBits() != 128)
881       break;
882     auto Idx = MRI.getRegClassOrNull(Src) == &AArch64::XSeqPairsClassRegClass
883                    ? PMI_FirstGPR
884                    : PMI_FirstFPR;
885     OpRegBankIdx[0] = Idx;
886     OpRegBankIdx[1] = Idx;
887     break;
888   }
889   case TargetOpcode::G_BUILD_VECTOR: {
890     // If the first source operand belongs to a FPR register bank, then make
891     // sure that we preserve that.
892     if (OpRegBankIdx[1] != PMI_FirstGPR)
893       break;
894     Register VReg = MI.getOperand(1).getReg();
895     if (!VReg)
896       break;
897 
898     // Get the instruction that defined the source operand reg, and check if
899     // it's a floating point operation. Or, if it's a type like s16 which
900     // doesn't have a exact size gpr register class. The exception is if the
901     // build_vector has all constant operands, which may be better to leave as
902     // gpr without copies, so it can be matched in imported patterns.
903     MachineInstr *DefMI = MRI.getVRegDef(VReg);
904     unsigned DefOpc = DefMI->getOpcode();
905     const LLT SrcTy = MRI.getType(VReg);
906     if (all_of(MI.operands(), [&](const MachineOperand &Op) {
907           return Op.isDef() || MRI.getVRegDef(Op.getReg())->getOpcode() ==
908                                    TargetOpcode::G_CONSTANT;
909         }))
910       break;
911     if (isPreISelGenericFloatingPointOpcode(DefOpc) ||
912         SrcTy.getSizeInBits() < 32 ||
913         getRegBank(VReg, MRI, TRI) == &AArch64::FPRRegBank) {
914       // Have a floating point op.
915       // Make sure every operand gets mapped to a FPR register class.
916       unsigned NumOperands = MI.getNumOperands();
917       for (unsigned Idx = 0; Idx < NumOperands; ++Idx)
918         OpRegBankIdx[Idx] = PMI_FirstFPR;
919     }
920     break;
921   }
922   case TargetOpcode::G_VECREDUCE_FADD:
923   case TargetOpcode::G_VECREDUCE_FMUL:
924   case TargetOpcode::G_VECREDUCE_FMAX:
925   case TargetOpcode::G_VECREDUCE_FMIN:
926   case TargetOpcode::G_VECREDUCE_ADD:
927   case TargetOpcode::G_VECREDUCE_MUL:
928   case TargetOpcode::G_VECREDUCE_AND:
929   case TargetOpcode::G_VECREDUCE_OR:
930   case TargetOpcode::G_VECREDUCE_XOR:
931   case TargetOpcode::G_VECREDUCE_SMAX:
932   case TargetOpcode::G_VECREDUCE_SMIN:
933   case TargetOpcode::G_VECREDUCE_UMAX:
934   case TargetOpcode::G_VECREDUCE_UMIN:
935     // Reductions produce a scalar value from a vector, the scalar should be on
936     // FPR bank.
937     OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
938     break;
939   case TargetOpcode::G_VECREDUCE_SEQ_FADD:
940   case TargetOpcode::G_VECREDUCE_SEQ_FMUL:
941     // These reductions also take a scalar accumulator input.
942     // Assign them FPR for now.
943     OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR, PMI_FirstFPR};
944     break;
945   case TargetOpcode::G_INTRINSIC: {
946     // Check if we know that the intrinsic has any constraints on its register
947     // banks. If it does, then update the mapping accordingly.
948     unsigned ID = getIntrinsicID(MI);
949     unsigned Idx = 0;
950     if (!isFPIntrinsic(ID))
951       break;
952     for (const auto &Op : MI.explicit_operands()) {
953       if (Op.isReg())
954         OpRegBankIdx[Idx] = PMI_FirstFPR;
955       ++Idx;
956     }
957     break;
958   }
959   }
960 
961   // Finally construct the computed mapping.
962   SmallVector<const ValueMapping *, 8> OpdsMapping(NumOperands);
963   for (unsigned Idx = 0; Idx < NumOperands; ++Idx) {
964     if (MI.getOperand(Idx).isReg() && MI.getOperand(Idx).getReg()) {
965       auto Mapping = getValueMapping(OpRegBankIdx[Idx], OpSize[Idx]);
966       if (!Mapping->isValid())
967         return getInvalidInstructionMapping();
968 
969       OpdsMapping[Idx] = Mapping;
970     }
971   }
972 
973   return getInstructionMapping(DefaultMappingID, Cost,
974                                getOperandsMapping(OpdsMapping), NumOperands);
975 }
976