1 //===- AArch64RegisterBankInfo.cpp ----------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// This file implements the targeting of the RegisterBankInfo class for
10 /// AArch64.
11 /// \todo This should be generated by TableGen.
12 //===----------------------------------------------------------------------===//
13
14 #include "AArch64RegisterBankInfo.h"
15 #include "AArch64InstrInfo.h"
16 #include "llvm/ADT/STLExtras.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/CodeGen/GlobalISel/RegisterBank.h"
19 #include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
20 #include "llvm/CodeGen/GlobalISel/Utils.h"
21 #include "llvm/CodeGen/LowLevelType.h"
22 #include "llvm/CodeGen/MachineFunction.h"
23 #include "llvm/CodeGen/MachineInstr.h"
24 #include "llvm/CodeGen/MachineOperand.h"
25 #include "llvm/CodeGen/MachineRegisterInfo.h"
26 #include "llvm/CodeGen/TargetOpcodes.h"
27 #include "llvm/CodeGen/TargetRegisterInfo.h"
28 #include "llvm/CodeGen/TargetSubtargetInfo.h"
29 #include "llvm/IR/IntrinsicsAArch64.h"
30 #include "llvm/Support/ErrorHandling.h"
31 #include <algorithm>
32 #include <cassert>
33
34 #define GET_TARGET_REGBANK_IMPL
35 #include "AArch64GenRegisterBank.inc"
36
37 // This file will be TableGen'ed at some point.
38 #include "AArch64GenRegisterBankInfo.def"
39
40 using namespace llvm;
41
AArch64RegisterBankInfo(const TargetRegisterInfo & TRI)42 AArch64RegisterBankInfo::AArch64RegisterBankInfo(const TargetRegisterInfo &TRI)
43 : AArch64GenRegisterBankInfo() {
44 static llvm::once_flag InitializeRegisterBankFlag;
45
46 static auto InitializeRegisterBankOnce = [&]() {
47 // We have only one set of register banks, whatever the subtarget
48 // is. Therefore, the initialization of the RegBanks table should be
49 // done only once. Indeed the table of all register banks
50 // (AArch64::RegBanks) is unique in the compiler. At some point, it
51 // will get tablegen'ed and the whole constructor becomes empty.
52
53 const RegisterBank &RBGPR = getRegBank(AArch64::GPRRegBankID);
54 (void)RBGPR;
55 assert(&AArch64::GPRRegBank == &RBGPR &&
56 "The order in RegBanks is messed up");
57
58 const RegisterBank &RBFPR = getRegBank(AArch64::FPRRegBankID);
59 (void)RBFPR;
60 assert(&AArch64::FPRRegBank == &RBFPR &&
61 "The order in RegBanks is messed up");
62
63 const RegisterBank &RBCCR = getRegBank(AArch64::CCRegBankID);
64 (void)RBCCR;
65 assert(&AArch64::CCRegBank == &RBCCR &&
66 "The order in RegBanks is messed up");
67
68 // The GPR register bank is fully defined by all the registers in
69 // GR64all + its subclasses.
70 assert(RBGPR.covers(*TRI.getRegClass(AArch64::GPR32RegClassID)) &&
71 "Subclass not added?");
72 assert(RBGPR.getSize() == 128 && "GPRs should hold up to 128-bit");
73
74 // The FPR register bank is fully defined by all the registers in
75 // GR64all + its subclasses.
76 assert(RBFPR.covers(*TRI.getRegClass(AArch64::QQRegClassID)) &&
77 "Subclass not added?");
78 assert(RBFPR.covers(*TRI.getRegClass(AArch64::FPR64RegClassID)) &&
79 "Subclass not added?");
80 assert(RBFPR.getSize() == 512 &&
81 "FPRs should hold up to 512-bit via QQQQ sequence");
82
83 assert(RBCCR.covers(*TRI.getRegClass(AArch64::CCRRegClassID)) &&
84 "Class not added?");
85 assert(RBCCR.getSize() == 32 && "CCR should hold up to 32-bit");
86
87 // Check that the TableGen'ed like file is in sync we our expectations.
88 // First, the Idx.
89 assert(checkPartialMappingIdx(PMI_FirstGPR, PMI_LastGPR,
90 {PMI_GPR32, PMI_GPR64, PMI_GPR128}) &&
91 "PartialMappingIdx's are incorrectly ordered");
92 assert(checkPartialMappingIdx(PMI_FirstFPR, PMI_LastFPR,
93 {PMI_FPR16, PMI_FPR32, PMI_FPR64, PMI_FPR128,
94 PMI_FPR256, PMI_FPR512}) &&
95 "PartialMappingIdx's are incorrectly ordered");
96 // Now, the content.
97 // Check partial mapping.
98 #define CHECK_PARTIALMAP(Idx, ValStartIdx, ValLength, RB) \
99 do { \
100 assert( \
101 checkPartialMap(PartialMappingIdx::Idx, ValStartIdx, ValLength, RB) && \
102 #Idx " is incorrectly initialized"); \
103 } while (false)
104
105 CHECK_PARTIALMAP(PMI_GPR32, 0, 32, RBGPR);
106 CHECK_PARTIALMAP(PMI_GPR64, 0, 64, RBGPR);
107 CHECK_PARTIALMAP(PMI_GPR128, 0, 128, RBGPR);
108 CHECK_PARTIALMAP(PMI_FPR16, 0, 16, RBFPR);
109 CHECK_PARTIALMAP(PMI_FPR32, 0, 32, RBFPR);
110 CHECK_PARTIALMAP(PMI_FPR64, 0, 64, RBFPR);
111 CHECK_PARTIALMAP(PMI_FPR128, 0, 128, RBFPR);
112 CHECK_PARTIALMAP(PMI_FPR256, 0, 256, RBFPR);
113 CHECK_PARTIALMAP(PMI_FPR512, 0, 512, RBFPR);
114
115 // Check value mapping.
116 #define CHECK_VALUEMAP_IMPL(RBName, Size, Offset) \
117 do { \
118 assert(checkValueMapImpl(PartialMappingIdx::PMI_##RBName##Size, \
119 PartialMappingIdx::PMI_First##RBName, Size, \
120 Offset) && \
121 #RBName #Size " " #Offset " is incorrectly initialized"); \
122 } while (false)
123
124 #define CHECK_VALUEMAP(RBName, Size) CHECK_VALUEMAP_IMPL(RBName, Size, 0)
125
126 CHECK_VALUEMAP(GPR, 32);
127 CHECK_VALUEMAP(GPR, 64);
128 CHECK_VALUEMAP(GPR, 128);
129 CHECK_VALUEMAP(FPR, 16);
130 CHECK_VALUEMAP(FPR, 32);
131 CHECK_VALUEMAP(FPR, 64);
132 CHECK_VALUEMAP(FPR, 128);
133 CHECK_VALUEMAP(FPR, 256);
134 CHECK_VALUEMAP(FPR, 512);
135
136 // Check the value mapping for 3-operands instructions where all the operands
137 // map to the same value mapping.
138 #define CHECK_VALUEMAP_3OPS(RBName, Size) \
139 do { \
140 CHECK_VALUEMAP_IMPL(RBName, Size, 0); \
141 CHECK_VALUEMAP_IMPL(RBName, Size, 1); \
142 CHECK_VALUEMAP_IMPL(RBName, Size, 2); \
143 } while (false)
144
145 CHECK_VALUEMAP_3OPS(GPR, 32);
146 CHECK_VALUEMAP_3OPS(GPR, 64);
147 CHECK_VALUEMAP_3OPS(GPR, 128);
148 CHECK_VALUEMAP_3OPS(FPR, 32);
149 CHECK_VALUEMAP_3OPS(FPR, 64);
150 CHECK_VALUEMAP_3OPS(FPR, 128);
151 CHECK_VALUEMAP_3OPS(FPR, 256);
152 CHECK_VALUEMAP_3OPS(FPR, 512);
153
154 #define CHECK_VALUEMAP_CROSSREGCPY(RBNameDst, RBNameSrc, Size) \
155 do { \
156 unsigned PartialMapDstIdx = PMI_##RBNameDst##Size - PMI_Min; \
157 unsigned PartialMapSrcIdx = PMI_##RBNameSrc##Size - PMI_Min; \
158 (void)PartialMapDstIdx; \
159 (void)PartialMapSrcIdx; \
160 const ValueMapping *Map = getCopyMapping( \
161 AArch64::RBNameDst##RegBankID, AArch64::RBNameSrc##RegBankID, Size); \
162 (void)Map; \
163 assert(Map[0].BreakDown == \
164 &AArch64GenRegisterBankInfo::PartMappings[PartialMapDstIdx] && \
165 Map[0].NumBreakDowns == 1 && #RBNameDst #Size \
166 " Dst is incorrectly initialized"); \
167 assert(Map[1].BreakDown == \
168 &AArch64GenRegisterBankInfo::PartMappings[PartialMapSrcIdx] && \
169 Map[1].NumBreakDowns == 1 && #RBNameSrc #Size \
170 " Src is incorrectly initialized"); \
171 \
172 } while (false)
173
174 CHECK_VALUEMAP_CROSSREGCPY(GPR, GPR, 32);
175 CHECK_VALUEMAP_CROSSREGCPY(GPR, FPR, 32);
176 CHECK_VALUEMAP_CROSSREGCPY(GPR, GPR, 64);
177 CHECK_VALUEMAP_CROSSREGCPY(GPR, FPR, 64);
178 CHECK_VALUEMAP_CROSSREGCPY(FPR, FPR, 32);
179 CHECK_VALUEMAP_CROSSREGCPY(FPR, GPR, 32);
180 CHECK_VALUEMAP_CROSSREGCPY(FPR, FPR, 64);
181 CHECK_VALUEMAP_CROSSREGCPY(FPR, GPR, 64);
182
183 #define CHECK_VALUEMAP_FPEXT(DstSize, SrcSize) \
184 do { \
185 unsigned PartialMapDstIdx = PMI_FPR##DstSize - PMI_Min; \
186 unsigned PartialMapSrcIdx = PMI_FPR##SrcSize - PMI_Min; \
187 (void)PartialMapDstIdx; \
188 (void)PartialMapSrcIdx; \
189 const ValueMapping *Map = getFPExtMapping(DstSize, SrcSize); \
190 (void)Map; \
191 assert(Map[0].BreakDown == \
192 &AArch64GenRegisterBankInfo::PartMappings[PartialMapDstIdx] && \
193 Map[0].NumBreakDowns == 1 && "FPR" #DstSize \
194 " Dst is incorrectly initialized"); \
195 assert(Map[1].BreakDown == \
196 &AArch64GenRegisterBankInfo::PartMappings[PartialMapSrcIdx] && \
197 Map[1].NumBreakDowns == 1 && "FPR" #SrcSize \
198 " Src is incorrectly initialized"); \
199 \
200 } while (false)
201
202 CHECK_VALUEMAP_FPEXT(32, 16);
203 CHECK_VALUEMAP_FPEXT(64, 16);
204 CHECK_VALUEMAP_FPEXT(64, 32);
205 CHECK_VALUEMAP_FPEXT(128, 64);
206
207 assert(verify(TRI) && "Invalid register bank information");
208 };
209
210 llvm::call_once(InitializeRegisterBankFlag, InitializeRegisterBankOnce);
211 }
212
copyCost(const RegisterBank & A,const RegisterBank & B,unsigned Size) const213 unsigned AArch64RegisterBankInfo::copyCost(const RegisterBank &A,
214 const RegisterBank &B,
215 unsigned Size) const {
216 // What do we do with different size?
217 // copy are same size.
218 // Will introduce other hooks for different size:
219 // * extract cost.
220 // * build_sequence cost.
221
222 // Copy from (resp. to) GPR to (resp. from) FPR involves FMOV.
223 // FIXME: This should be deduced from the scheduling model.
224 if (&A == &AArch64::GPRRegBank && &B == &AArch64::FPRRegBank)
225 // FMOVXDr or FMOVWSr.
226 return 5;
227 if (&A == &AArch64::FPRRegBank && &B == &AArch64::GPRRegBank)
228 // FMOVDXr or FMOVSWr.
229 return 4;
230
231 return RegisterBankInfo::copyCost(A, B, Size);
232 }
233
234 const RegisterBank &
getRegBankFromRegClass(const TargetRegisterClass & RC,LLT) const235 AArch64RegisterBankInfo::getRegBankFromRegClass(const TargetRegisterClass &RC,
236 LLT) const {
237 switch (RC.getID()) {
238 case AArch64::FPR8RegClassID:
239 case AArch64::FPR16RegClassID:
240 case AArch64::FPR16_loRegClassID:
241 case AArch64::FPR32_with_hsub_in_FPR16_loRegClassID:
242 case AArch64::FPR32RegClassID:
243 case AArch64::FPR64RegClassID:
244 case AArch64::FPR64_loRegClassID:
245 case AArch64::FPR128RegClassID:
246 case AArch64::FPR128_loRegClassID:
247 case AArch64::DDRegClassID:
248 case AArch64::DDDRegClassID:
249 case AArch64::DDDDRegClassID:
250 case AArch64::QQRegClassID:
251 case AArch64::QQQRegClassID:
252 case AArch64::QQQQRegClassID:
253 return getRegBank(AArch64::FPRRegBankID);
254 case AArch64::GPR32commonRegClassID:
255 case AArch64::GPR32RegClassID:
256 case AArch64::GPR32spRegClassID:
257 case AArch64::GPR32sponlyRegClassID:
258 case AArch64::GPR32argRegClassID:
259 case AArch64::GPR32allRegClassID:
260 case AArch64::GPR64commonRegClassID:
261 case AArch64::GPR64RegClassID:
262 case AArch64::GPR64spRegClassID:
263 case AArch64::GPR64sponlyRegClassID:
264 case AArch64::GPR64argRegClassID:
265 case AArch64::GPR64allRegClassID:
266 case AArch64::GPR64noipRegClassID:
267 case AArch64::GPR64common_and_GPR64noipRegClassID:
268 case AArch64::GPR64noip_and_tcGPR64RegClassID:
269 case AArch64::tcGPR64RegClassID:
270 case AArch64::rtcGPR64RegClassID:
271 case AArch64::WSeqPairsClassRegClassID:
272 case AArch64::XSeqPairsClassRegClassID:
273 return getRegBank(AArch64::GPRRegBankID);
274 case AArch64::CCRRegClassID:
275 return getRegBank(AArch64::CCRegBankID);
276 default:
277 llvm_unreachable("Register class not supported");
278 }
279 }
280
281 RegisterBankInfo::InstructionMappings
getInstrAlternativeMappings(const MachineInstr & MI) const282 AArch64RegisterBankInfo::getInstrAlternativeMappings(
283 const MachineInstr &MI) const {
284 const MachineFunction &MF = *MI.getParent()->getParent();
285 const TargetSubtargetInfo &STI = MF.getSubtarget();
286 const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
287 const MachineRegisterInfo &MRI = MF.getRegInfo();
288
289 switch (MI.getOpcode()) {
290 case TargetOpcode::G_OR: {
291 // 32 and 64-bit or can be mapped on either FPR or
292 // GPR for the same cost.
293 unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, TRI);
294 if (Size != 32 && Size != 64)
295 break;
296
297 // If the instruction has any implicit-defs or uses,
298 // do not mess with it.
299 if (MI.getNumOperands() != 3)
300 break;
301 InstructionMappings AltMappings;
302 const InstructionMapping &GPRMapping = getInstructionMapping(
303 /*ID*/ 1, /*Cost*/ 1, getValueMapping(PMI_FirstGPR, Size),
304 /*NumOperands*/ 3);
305 const InstructionMapping &FPRMapping = getInstructionMapping(
306 /*ID*/ 2, /*Cost*/ 1, getValueMapping(PMI_FirstFPR, Size),
307 /*NumOperands*/ 3);
308
309 AltMappings.push_back(&GPRMapping);
310 AltMappings.push_back(&FPRMapping);
311 return AltMappings;
312 }
313 case TargetOpcode::G_BITCAST: {
314 unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, TRI);
315 if (Size != 32 && Size != 64)
316 break;
317
318 // If the instruction has any implicit-defs or uses,
319 // do not mess with it.
320 if (MI.getNumOperands() != 2)
321 break;
322
323 InstructionMappings AltMappings;
324 const InstructionMapping &GPRMapping = getInstructionMapping(
325 /*ID*/ 1, /*Cost*/ 1,
326 getCopyMapping(AArch64::GPRRegBankID, AArch64::GPRRegBankID, Size),
327 /*NumOperands*/ 2);
328 const InstructionMapping &FPRMapping = getInstructionMapping(
329 /*ID*/ 2, /*Cost*/ 1,
330 getCopyMapping(AArch64::FPRRegBankID, AArch64::FPRRegBankID, Size),
331 /*NumOperands*/ 2);
332 const InstructionMapping &GPRToFPRMapping = getInstructionMapping(
333 /*ID*/ 3,
334 /*Cost*/ copyCost(AArch64::GPRRegBank, AArch64::FPRRegBank, Size),
335 getCopyMapping(AArch64::FPRRegBankID, AArch64::GPRRegBankID, Size),
336 /*NumOperands*/ 2);
337 const InstructionMapping &FPRToGPRMapping = getInstructionMapping(
338 /*ID*/ 3,
339 /*Cost*/ copyCost(AArch64::GPRRegBank, AArch64::FPRRegBank, Size),
340 getCopyMapping(AArch64::GPRRegBankID, AArch64::FPRRegBankID, Size),
341 /*NumOperands*/ 2);
342
343 AltMappings.push_back(&GPRMapping);
344 AltMappings.push_back(&FPRMapping);
345 AltMappings.push_back(&GPRToFPRMapping);
346 AltMappings.push_back(&FPRToGPRMapping);
347 return AltMappings;
348 }
349 case TargetOpcode::G_LOAD: {
350 unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, TRI);
351 if (Size != 64)
352 break;
353
354 // If the instruction has any implicit-defs or uses,
355 // do not mess with it.
356 if (MI.getNumOperands() != 2)
357 break;
358
359 InstructionMappings AltMappings;
360 const InstructionMapping &GPRMapping = getInstructionMapping(
361 /*ID*/ 1, /*Cost*/ 1,
362 getOperandsMapping({getValueMapping(PMI_FirstGPR, Size),
363 // Addresses are GPR 64-bit.
364 getValueMapping(PMI_FirstGPR, 64)}),
365 /*NumOperands*/ 2);
366 const InstructionMapping &FPRMapping = getInstructionMapping(
367 /*ID*/ 2, /*Cost*/ 1,
368 getOperandsMapping({getValueMapping(PMI_FirstFPR, Size),
369 // Addresses are GPR 64-bit.
370 getValueMapping(PMI_FirstGPR, 64)}),
371 /*NumOperands*/ 2);
372
373 AltMappings.push_back(&GPRMapping);
374 AltMappings.push_back(&FPRMapping);
375 return AltMappings;
376 }
377 default:
378 break;
379 }
380 return RegisterBankInfo::getInstrAlternativeMappings(MI);
381 }
382
applyMappingImpl(const OperandsMapper & OpdMapper) const383 void AArch64RegisterBankInfo::applyMappingImpl(
384 const OperandsMapper &OpdMapper) const {
385 switch (OpdMapper.getMI().getOpcode()) {
386 case TargetOpcode::G_OR:
387 case TargetOpcode::G_BITCAST:
388 case TargetOpcode::G_LOAD:
389 // Those ID must match getInstrAlternativeMappings.
390 assert((OpdMapper.getInstrMapping().getID() >= 1 &&
391 OpdMapper.getInstrMapping().getID() <= 4) &&
392 "Don't know how to handle that ID");
393 return applyDefaultMapping(OpdMapper);
394 default:
395 llvm_unreachable("Don't know how to handle that operation");
396 }
397 }
398
399 /// Returns whether opcode \p Opc is a pre-isel generic floating-point opcode,
400 /// having only floating-point operands.
isPreISelGenericFloatingPointOpcode(unsigned Opc)401 static bool isPreISelGenericFloatingPointOpcode(unsigned Opc) {
402 switch (Opc) {
403 case TargetOpcode::G_FADD:
404 case TargetOpcode::G_FSUB:
405 case TargetOpcode::G_FMUL:
406 case TargetOpcode::G_FMA:
407 case TargetOpcode::G_FDIV:
408 case TargetOpcode::G_FCONSTANT:
409 case TargetOpcode::G_FPEXT:
410 case TargetOpcode::G_FPTRUNC:
411 case TargetOpcode::G_FCEIL:
412 case TargetOpcode::G_FFLOOR:
413 case TargetOpcode::G_FNEARBYINT:
414 case TargetOpcode::G_FNEG:
415 case TargetOpcode::G_FCOS:
416 case TargetOpcode::G_FSIN:
417 case TargetOpcode::G_FLOG10:
418 case TargetOpcode::G_FLOG:
419 case TargetOpcode::G_FLOG2:
420 case TargetOpcode::G_FSQRT:
421 case TargetOpcode::G_FABS:
422 case TargetOpcode::G_FEXP:
423 case TargetOpcode::G_FRINT:
424 case TargetOpcode::G_INTRINSIC_TRUNC:
425 case TargetOpcode::G_INTRINSIC_ROUND:
426 return true;
427 }
428 return false;
429 }
430
431 const RegisterBankInfo::InstructionMapping &
getSameKindOfOperandsMapping(const MachineInstr & MI) const432 AArch64RegisterBankInfo::getSameKindOfOperandsMapping(
433 const MachineInstr &MI) const {
434 const unsigned Opc = MI.getOpcode();
435 const MachineFunction &MF = *MI.getParent()->getParent();
436 const MachineRegisterInfo &MRI = MF.getRegInfo();
437
438 unsigned NumOperands = MI.getNumOperands();
439 assert(NumOperands <= 3 &&
440 "This code is for instructions with 3 or less operands");
441
442 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
443 unsigned Size = Ty.getSizeInBits();
444 bool IsFPR = Ty.isVector() || isPreISelGenericFloatingPointOpcode(Opc);
445
446 PartialMappingIdx RBIdx = IsFPR ? PMI_FirstFPR : PMI_FirstGPR;
447
448 #ifndef NDEBUG
449 // Make sure all the operands are using similar size and type.
450 // Should probably be checked by the machine verifier.
451 // This code won't catch cases where the number of lanes is
452 // different between the operands.
453 // If we want to go to that level of details, it is probably
454 // best to check that the types are the same, period.
455 // Currently, we just check that the register banks are the same
456 // for each types.
457 for (unsigned Idx = 1; Idx != NumOperands; ++Idx) {
458 LLT OpTy = MRI.getType(MI.getOperand(Idx).getReg());
459 assert(
460 AArch64GenRegisterBankInfo::getRegBankBaseIdxOffset(
461 RBIdx, OpTy.getSizeInBits()) ==
462 AArch64GenRegisterBankInfo::getRegBankBaseIdxOffset(RBIdx, Size) &&
463 "Operand has incompatible size");
464 bool OpIsFPR = OpTy.isVector() || isPreISelGenericFloatingPointOpcode(Opc);
465 (void)OpIsFPR;
466 assert(IsFPR == OpIsFPR && "Operand has incompatible type");
467 }
468 #endif // End NDEBUG.
469
470 return getInstructionMapping(DefaultMappingID, 1,
471 getValueMapping(RBIdx, Size), NumOperands);
472 }
473
474 /// \returns true if a given intrinsic \p ID only uses and defines FPRs.
isFPIntrinsic(unsigned ID)475 static bool isFPIntrinsic(unsigned ID) {
476 // TODO: Add more intrinsics.
477 switch (ID) {
478 default:
479 return false;
480 case Intrinsic::aarch64_neon_uaddlv:
481 return true;
482 }
483 }
484
hasFPConstraints(const MachineInstr & MI,const MachineRegisterInfo & MRI,const TargetRegisterInfo & TRI,unsigned Depth) const485 bool AArch64RegisterBankInfo::hasFPConstraints(const MachineInstr &MI,
486 const MachineRegisterInfo &MRI,
487 const TargetRegisterInfo &TRI,
488 unsigned Depth) const {
489 unsigned Op = MI.getOpcode();
490 if (Op == TargetOpcode::G_INTRINSIC && isFPIntrinsic(getIntrinsicID(MI)))
491 return true;
492
493 // Do we have an explicit floating point instruction?
494 if (isPreISelGenericFloatingPointOpcode(Op))
495 return true;
496
497 // No. Check if we have a copy-like instruction. If we do, then we could
498 // still be fed by floating point instructions.
499 if (Op != TargetOpcode::COPY && !MI.isPHI() &&
500 !isPreISelGenericOptimizationHint(Op))
501 return false;
502
503 // Check if we already know the register bank.
504 auto *RB = getRegBank(MI.getOperand(0).getReg(), MRI, TRI);
505 if (RB == &AArch64::FPRRegBank)
506 return true;
507 if (RB == &AArch64::GPRRegBank)
508 return false;
509
510 // We don't know anything.
511 //
512 // If we have a phi, we may be able to infer that it will be assigned a FPR
513 // based off of its inputs.
514 if (!MI.isPHI() || Depth > MaxFPRSearchDepth)
515 return false;
516
517 return any_of(MI.explicit_uses(), [&](const MachineOperand &Op) {
518 return Op.isReg() &&
519 onlyDefinesFP(*MRI.getVRegDef(Op.getReg()), MRI, TRI, Depth + 1);
520 });
521 }
522
onlyUsesFP(const MachineInstr & MI,const MachineRegisterInfo & MRI,const TargetRegisterInfo & TRI,unsigned Depth) const523 bool AArch64RegisterBankInfo::onlyUsesFP(const MachineInstr &MI,
524 const MachineRegisterInfo &MRI,
525 const TargetRegisterInfo &TRI,
526 unsigned Depth) const {
527 switch (MI.getOpcode()) {
528 case TargetOpcode::G_FPTOSI:
529 case TargetOpcode::G_FPTOUI:
530 case TargetOpcode::G_FCMP:
531 return true;
532 default:
533 break;
534 }
535 return hasFPConstraints(MI, MRI, TRI, Depth);
536 }
537
onlyDefinesFP(const MachineInstr & MI,const MachineRegisterInfo & MRI,const TargetRegisterInfo & TRI,unsigned Depth) const538 bool AArch64RegisterBankInfo::onlyDefinesFP(const MachineInstr &MI,
539 const MachineRegisterInfo &MRI,
540 const TargetRegisterInfo &TRI,
541 unsigned Depth) const {
542 switch (MI.getOpcode()) {
543 case AArch64::G_DUP:
544 case TargetOpcode::G_SITOFP:
545 case TargetOpcode::G_UITOFP:
546 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
547 case TargetOpcode::G_INSERT_VECTOR_ELT:
548 case TargetOpcode::G_BUILD_VECTOR:
549 case TargetOpcode::G_BUILD_VECTOR_TRUNC:
550 return true;
551 default:
552 break;
553 }
554 return hasFPConstraints(MI, MRI, TRI, Depth);
555 }
556
557 const RegisterBankInfo::InstructionMapping &
getInstrMapping(const MachineInstr & MI) const558 AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
559 const unsigned Opc = MI.getOpcode();
560
561 // Try the default logic for non-generic instructions that are either copies
562 // or already have some operands assigned to banks.
563 if ((Opc != TargetOpcode::COPY && !isPreISelGenericOpcode(Opc)) ||
564 Opc == TargetOpcode::G_PHI) {
565 const RegisterBankInfo::InstructionMapping &Mapping =
566 getInstrMappingImpl(MI);
567 if (Mapping.isValid())
568 return Mapping;
569 }
570
571 const MachineFunction &MF = *MI.getParent()->getParent();
572 const MachineRegisterInfo &MRI = MF.getRegInfo();
573 const TargetSubtargetInfo &STI = MF.getSubtarget();
574 const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
575
576 switch (Opc) {
577 // G_{F|S|U}REM are not listed because they are not legal.
578 // Arithmetic ops.
579 case TargetOpcode::G_ADD:
580 case TargetOpcode::G_SUB:
581 case TargetOpcode::G_PTR_ADD:
582 case TargetOpcode::G_MUL:
583 case TargetOpcode::G_SDIV:
584 case TargetOpcode::G_UDIV:
585 // Bitwise ops.
586 case TargetOpcode::G_AND:
587 case TargetOpcode::G_OR:
588 case TargetOpcode::G_XOR:
589 // Floating point ops.
590 case TargetOpcode::G_FADD:
591 case TargetOpcode::G_FSUB:
592 case TargetOpcode::G_FMUL:
593 case TargetOpcode::G_FDIV:
594 return getSameKindOfOperandsMapping(MI);
595 case TargetOpcode::G_FPEXT: {
596 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
597 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
598 return getInstructionMapping(
599 DefaultMappingID, /*Cost*/ 1,
600 getFPExtMapping(DstTy.getSizeInBits(), SrcTy.getSizeInBits()),
601 /*NumOperands*/ 2);
602 }
603 // Shifts.
604 case TargetOpcode::G_SHL:
605 case TargetOpcode::G_LSHR:
606 case TargetOpcode::G_ASHR: {
607 LLT ShiftAmtTy = MRI.getType(MI.getOperand(2).getReg());
608 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
609 if (ShiftAmtTy.getSizeInBits() == 64 && SrcTy.getSizeInBits() == 32)
610 return getInstructionMapping(DefaultMappingID, 1,
611 &ValMappings[Shift64Imm], 3);
612 return getSameKindOfOperandsMapping(MI);
613 }
614 case TargetOpcode::COPY: {
615 Register DstReg = MI.getOperand(0).getReg();
616 Register SrcReg = MI.getOperand(1).getReg();
617 // Check if one of the register is not a generic register.
618 if ((Register::isPhysicalRegister(DstReg) ||
619 !MRI.getType(DstReg).isValid()) ||
620 (Register::isPhysicalRegister(SrcReg) ||
621 !MRI.getType(SrcReg).isValid())) {
622 const RegisterBank *DstRB = getRegBank(DstReg, MRI, TRI);
623 const RegisterBank *SrcRB = getRegBank(SrcReg, MRI, TRI);
624 if (!DstRB)
625 DstRB = SrcRB;
626 else if (!SrcRB)
627 SrcRB = DstRB;
628 // If both RB are null that means both registers are generic.
629 // We shouldn't be here.
630 assert(DstRB && SrcRB && "Both RegBank were nullptr");
631 unsigned Size = getSizeInBits(DstReg, MRI, TRI);
632 return getInstructionMapping(
633 DefaultMappingID, copyCost(*DstRB, *SrcRB, Size),
634 getCopyMapping(DstRB->getID(), SrcRB->getID(), Size),
635 // We only care about the mapping of the destination.
636 /*NumOperands*/ 1);
637 }
638 // Both registers are generic, use G_BITCAST.
639 LLVM_FALLTHROUGH;
640 }
641 case TargetOpcode::G_BITCAST: {
642 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
643 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
644 unsigned Size = DstTy.getSizeInBits();
645 bool DstIsGPR = !DstTy.isVector() && DstTy.getSizeInBits() <= 64;
646 bool SrcIsGPR = !SrcTy.isVector() && SrcTy.getSizeInBits() <= 64;
647 const RegisterBank &DstRB =
648 DstIsGPR ? AArch64::GPRRegBank : AArch64::FPRRegBank;
649 const RegisterBank &SrcRB =
650 SrcIsGPR ? AArch64::GPRRegBank : AArch64::FPRRegBank;
651 return getInstructionMapping(
652 DefaultMappingID, copyCost(DstRB, SrcRB, Size),
653 getCopyMapping(DstRB.getID(), SrcRB.getID(), Size),
654 // We only care about the mapping of the destination for COPY.
655 /*NumOperands*/ Opc == TargetOpcode::G_BITCAST ? 2 : 1);
656 }
657 default:
658 break;
659 }
660
661 unsigned NumOperands = MI.getNumOperands();
662
663 // Track the size and bank of each register. We don't do partial mappings.
664 SmallVector<unsigned, 4> OpSize(NumOperands);
665 SmallVector<PartialMappingIdx, 4> OpRegBankIdx(NumOperands);
666 for (unsigned Idx = 0; Idx < NumOperands; ++Idx) {
667 auto &MO = MI.getOperand(Idx);
668 if (!MO.isReg() || !MO.getReg())
669 continue;
670
671 LLT Ty = MRI.getType(MO.getReg());
672 OpSize[Idx] = Ty.getSizeInBits();
673
674 // As a top-level guess, vectors go in FPRs, scalars and pointers in GPRs.
675 // For floating-point instructions, scalars go in FPRs.
676 if (Ty.isVector() || isPreISelGenericFloatingPointOpcode(Opc) ||
677 Ty.getSizeInBits() > 64)
678 OpRegBankIdx[Idx] = PMI_FirstFPR;
679 else
680 OpRegBankIdx[Idx] = PMI_FirstGPR;
681 }
682
683 unsigned Cost = 1;
684 // Some of the floating-point instructions have mixed GPR and FPR operands:
685 // fine-tune the computed mapping.
686 switch (Opc) {
687 case AArch64::G_DUP: {
688 Register ScalarReg = MI.getOperand(1).getReg();
689 LLT ScalarTy = MRI.getType(ScalarReg);
690 auto ScalarDef = MRI.getVRegDef(ScalarReg);
691 // s8 is an exception for G_DUP, which we always want on gpr.
692 if (ScalarTy.getSizeInBits() != 8 &&
693 (getRegBank(ScalarReg, MRI, TRI) == &AArch64::FPRRegBank ||
694 onlyDefinesFP(*ScalarDef, MRI, TRI)))
695 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
696 else
697 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR};
698 break;
699 }
700 case TargetOpcode::G_TRUNC: {
701 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
702 if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 128)
703 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
704 break;
705 }
706 case TargetOpcode::G_SITOFP:
707 case TargetOpcode::G_UITOFP: {
708 if (MRI.getType(MI.getOperand(0).getReg()).isVector())
709 break;
710 // Integer to FP conversions don't necessarily happen between GPR -> FPR
711 // regbanks. They can also be done within an FPR register.
712 Register SrcReg = MI.getOperand(1).getReg();
713 if (getRegBank(SrcReg, MRI, TRI) == &AArch64::FPRRegBank)
714 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
715 else
716 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR};
717 break;
718 }
719 case TargetOpcode::G_FPTOSI:
720 case TargetOpcode::G_FPTOUI:
721 if (MRI.getType(MI.getOperand(0).getReg()).isVector())
722 break;
723 OpRegBankIdx = {PMI_FirstGPR, PMI_FirstFPR};
724 break;
725 case TargetOpcode::G_FCMP: {
726 // If the result is a vector, it must use a FPR.
727 AArch64GenRegisterBankInfo::PartialMappingIdx Idx0 =
728 MRI.getType(MI.getOperand(0).getReg()).isVector() ? PMI_FirstFPR
729 : PMI_FirstGPR;
730 OpRegBankIdx = {Idx0,
731 /* Predicate */ PMI_None, PMI_FirstFPR, PMI_FirstFPR};
732 break;
733 }
734 case TargetOpcode::G_BITCAST:
735 // This is going to be a cross register bank copy and this is expensive.
736 if (OpRegBankIdx[0] != OpRegBankIdx[1])
737 Cost = copyCost(
738 *AArch64GenRegisterBankInfo::PartMappings[OpRegBankIdx[0]].RegBank,
739 *AArch64GenRegisterBankInfo::PartMappings[OpRegBankIdx[1]].RegBank,
740 OpSize[0]);
741 break;
742 case TargetOpcode::G_LOAD:
743 // Loading in vector unit is slightly more expensive.
744 // This is actually only true for the LD1R and co instructions,
745 // but anyway for the fast mode this number does not matter and
746 // for the greedy mode the cost of the cross bank copy will
747 // offset this number.
748 // FIXME: Should be derived from the scheduling model.
749 if (OpRegBankIdx[0] != PMI_FirstGPR)
750 Cost = 2;
751 else
752 // Check if that load feeds fp instructions.
753 // In that case, we want the default mapping to be on FPR
754 // instead of blind map every scalar to GPR.
755 for (const MachineInstr &UseMI :
756 MRI.use_nodbg_instructions(MI.getOperand(0).getReg())) {
757 // If we have at least one direct use in a FP instruction,
758 // assume this was a floating point load in the IR.
759 // If it was not, we would have had a bitcast before
760 // reaching that instruction.
761 // Int->FP conversion operations are also captured in onlyDefinesFP().
762 if (onlyUsesFP(UseMI, MRI, TRI) || onlyDefinesFP(UseMI, MRI, TRI)) {
763 OpRegBankIdx[0] = PMI_FirstFPR;
764 break;
765 }
766 }
767 break;
768 case TargetOpcode::G_STORE:
769 // Check if that store is fed by fp instructions.
770 if (OpRegBankIdx[0] == PMI_FirstGPR) {
771 Register VReg = MI.getOperand(0).getReg();
772 if (!VReg)
773 break;
774 MachineInstr *DefMI = MRI.getVRegDef(VReg);
775 if (onlyDefinesFP(*DefMI, MRI, TRI))
776 OpRegBankIdx[0] = PMI_FirstFPR;
777 break;
778 }
779 break;
780 case TargetOpcode::G_SELECT: {
781 // If the destination is FPR, preserve that.
782 if (OpRegBankIdx[0] != PMI_FirstGPR)
783 break;
784
785 // If we're taking in vectors, we have no choice but to put everything on
786 // FPRs, except for the condition. The condition must always be on a GPR.
787 LLT SrcTy = MRI.getType(MI.getOperand(2).getReg());
788 if (SrcTy.isVector()) {
789 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR, PMI_FirstFPR, PMI_FirstFPR};
790 break;
791 }
792
793 // Try to minimize the number of copies. If we have more floating point
794 // constrained values than not, then we'll put everything on FPR. Otherwise,
795 // everything has to be on GPR.
796 unsigned NumFP = 0;
797
798 // Check if the uses of the result always produce floating point values.
799 //
800 // For example:
801 //
802 // %z = G_SELECT %cond %x %y
803 // fpr = G_FOO %z ...
804 if (any_of(MRI.use_nodbg_instructions(MI.getOperand(0).getReg()),
805 [&](MachineInstr &MI) { return onlyUsesFP(MI, MRI, TRI); }))
806 ++NumFP;
807
808 // Check if the defs of the source values always produce floating point
809 // values.
810 //
811 // For example:
812 //
813 // %x = G_SOMETHING_ALWAYS_FLOAT %a ...
814 // %z = G_SELECT %cond %x %y
815 //
816 // Also check whether or not the sources have already been decided to be
817 // FPR. Keep track of this.
818 //
819 // This doesn't check the condition, since it's just whatever is in NZCV.
820 // This isn't passed explicitly in a register to fcsel/csel.
821 for (unsigned Idx = 2; Idx < 4; ++Idx) {
822 Register VReg = MI.getOperand(Idx).getReg();
823 MachineInstr *DefMI = MRI.getVRegDef(VReg);
824 if (getRegBank(VReg, MRI, TRI) == &AArch64::FPRRegBank ||
825 onlyDefinesFP(*DefMI, MRI, TRI))
826 ++NumFP;
827 }
828
829 // If we have more FP constraints than not, then move everything over to
830 // FPR.
831 if (NumFP >= 2)
832 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR, PMI_FirstFPR, PMI_FirstFPR};
833
834 break;
835 }
836 case TargetOpcode::G_UNMERGE_VALUES: {
837 // If the first operand belongs to a FPR register bank, then make sure that
838 // we preserve that.
839 if (OpRegBankIdx[0] != PMI_FirstGPR)
840 break;
841
842 LLT SrcTy = MRI.getType(MI.getOperand(MI.getNumOperands()-1).getReg());
843 // UNMERGE into scalars from a vector should always use FPR.
844 // Likewise if any of the uses are FP instructions.
845 if (SrcTy.isVector() || SrcTy == LLT::scalar(128) ||
846 any_of(MRI.use_nodbg_instructions(MI.getOperand(0).getReg()),
847 [&](MachineInstr &MI) { return onlyUsesFP(MI, MRI, TRI); })) {
848 // Set the register bank of every operand to FPR.
849 for (unsigned Idx = 0, NumOperands = MI.getNumOperands();
850 Idx < NumOperands; ++Idx)
851 OpRegBankIdx[Idx] = PMI_FirstFPR;
852 }
853 break;
854 }
855 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
856 // Destination and source need to be FPRs.
857 OpRegBankIdx[0] = PMI_FirstFPR;
858 OpRegBankIdx[1] = PMI_FirstFPR;
859
860 // Index needs to be a GPR.
861 OpRegBankIdx[2] = PMI_FirstGPR;
862 break;
863 case TargetOpcode::G_INSERT_VECTOR_ELT:
864 OpRegBankIdx[0] = PMI_FirstFPR;
865 OpRegBankIdx[1] = PMI_FirstFPR;
866
867 // The element may be either a GPR or FPR. Preserve that behaviour.
868 if (getRegBank(MI.getOperand(2).getReg(), MRI, TRI) == &AArch64::FPRRegBank)
869 OpRegBankIdx[2] = PMI_FirstFPR;
870 else
871 OpRegBankIdx[2] = PMI_FirstGPR;
872
873 // Index needs to be a GPR.
874 OpRegBankIdx[3] = PMI_FirstGPR;
875 break;
876 case TargetOpcode::G_EXTRACT: {
877 // For s128 sources we have to use fpr unless we know otherwise.
878 auto Src = MI.getOperand(1).getReg();
879 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
880 if (SrcTy.getSizeInBits() != 128)
881 break;
882 auto Idx = MRI.getRegClassOrNull(Src) == &AArch64::XSeqPairsClassRegClass
883 ? PMI_FirstGPR
884 : PMI_FirstFPR;
885 OpRegBankIdx[0] = Idx;
886 OpRegBankIdx[1] = Idx;
887 break;
888 }
889 case TargetOpcode::G_BUILD_VECTOR: {
890 // If the first source operand belongs to a FPR register bank, then make
891 // sure that we preserve that.
892 if (OpRegBankIdx[1] != PMI_FirstGPR)
893 break;
894 Register VReg = MI.getOperand(1).getReg();
895 if (!VReg)
896 break;
897
898 // Get the instruction that defined the source operand reg, and check if
899 // it's a floating point operation. Or, if it's a type like s16 which
900 // doesn't have a exact size gpr register class. The exception is if the
901 // build_vector has all constant operands, which may be better to leave as
902 // gpr without copies, so it can be matched in imported patterns.
903 MachineInstr *DefMI = MRI.getVRegDef(VReg);
904 unsigned DefOpc = DefMI->getOpcode();
905 const LLT SrcTy = MRI.getType(VReg);
906 if (all_of(MI.operands(), [&](const MachineOperand &Op) {
907 return Op.isDef() || MRI.getVRegDef(Op.getReg())->getOpcode() ==
908 TargetOpcode::G_CONSTANT;
909 }))
910 break;
911 if (isPreISelGenericFloatingPointOpcode(DefOpc) ||
912 SrcTy.getSizeInBits() < 32 ||
913 getRegBank(VReg, MRI, TRI) == &AArch64::FPRRegBank) {
914 // Have a floating point op.
915 // Make sure every operand gets mapped to a FPR register class.
916 unsigned NumOperands = MI.getNumOperands();
917 for (unsigned Idx = 0; Idx < NumOperands; ++Idx)
918 OpRegBankIdx[Idx] = PMI_FirstFPR;
919 }
920 break;
921 }
922 case TargetOpcode::G_VECREDUCE_FADD:
923 case TargetOpcode::G_VECREDUCE_FMUL:
924 case TargetOpcode::G_VECREDUCE_FMAX:
925 case TargetOpcode::G_VECREDUCE_FMIN:
926 case TargetOpcode::G_VECREDUCE_ADD:
927 case TargetOpcode::G_VECREDUCE_MUL:
928 case TargetOpcode::G_VECREDUCE_AND:
929 case TargetOpcode::G_VECREDUCE_OR:
930 case TargetOpcode::G_VECREDUCE_XOR:
931 case TargetOpcode::G_VECREDUCE_SMAX:
932 case TargetOpcode::G_VECREDUCE_SMIN:
933 case TargetOpcode::G_VECREDUCE_UMAX:
934 case TargetOpcode::G_VECREDUCE_UMIN:
935 // Reductions produce a scalar value from a vector, the scalar should be on
936 // FPR bank.
937 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
938 break;
939 case TargetOpcode::G_VECREDUCE_SEQ_FADD:
940 case TargetOpcode::G_VECREDUCE_SEQ_FMUL:
941 // These reductions also take a scalar accumulator input.
942 // Assign them FPR for now.
943 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR, PMI_FirstFPR};
944 break;
945 case TargetOpcode::G_INTRINSIC: {
946 // Check if we know that the intrinsic has any constraints on its register
947 // banks. If it does, then update the mapping accordingly.
948 unsigned ID = getIntrinsicID(MI);
949 unsigned Idx = 0;
950 if (!isFPIntrinsic(ID))
951 break;
952 for (const auto &Op : MI.explicit_operands()) {
953 if (Op.isReg())
954 OpRegBankIdx[Idx] = PMI_FirstFPR;
955 ++Idx;
956 }
957 break;
958 }
959 }
960
961 // Finally construct the computed mapping.
962 SmallVector<const ValueMapping *, 8> OpdsMapping(NumOperands);
963 for (unsigned Idx = 0; Idx < NumOperands; ++Idx) {
964 if (MI.getOperand(Idx).isReg() && MI.getOperand(Idx).getReg()) {
965 auto Mapping = getValueMapping(OpRegBankIdx[Idx], OpSize[Idx]);
966 if (!Mapping->isValid())
967 return getInvalidInstructionMapping();
968
969 OpdsMapping[Idx] = Mapping;
970 }
971 }
972
973 return getInstructionMapping(DefaultMappingID, Cost,
974 getOperandsMapping(OpdsMapping), NumOperands);
975 }
976