1 //===- AArch64RegisterBankInfo.cpp ----------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// This file implements the targeting of the RegisterBankInfo class for
10 /// AArch64.
11 /// \todo This should be generated by TableGen.
12 //===----------------------------------------------------------------------===//
13
14 #include "AArch64RegisterBankInfo.h"
15 #include "AArch64RegisterInfo.h"
16 #include "MCTargetDesc/AArch64MCTargetDesc.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/ADT/SmallVector.h"
19 #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
20 #include "llvm/CodeGen/GlobalISel/Utils.h"
21 #include "llvm/CodeGen/LowLevelType.h"
22 #include "llvm/CodeGen/MachineFunction.h"
23 #include "llvm/CodeGen/MachineInstr.h"
24 #include "llvm/CodeGen/MachineOperand.h"
25 #include "llvm/CodeGen/MachineRegisterInfo.h"
26 #include "llvm/CodeGen/RegisterBank.h"
27 #include "llvm/CodeGen/RegisterBankInfo.h"
28 #include "llvm/CodeGen/TargetOpcodes.h"
29 #include "llvm/CodeGen/TargetRegisterInfo.h"
30 #include "llvm/CodeGen/TargetSubtargetInfo.h"
31 #include "llvm/IR/IntrinsicsAArch64.h"
32 #include "llvm/Support/ErrorHandling.h"
33 #include <algorithm>
34 #include <cassert>
35
36 #define GET_TARGET_REGBANK_IMPL
37 #include "AArch64GenRegisterBank.inc"
38
39 // This file will be TableGen'ed at some point.
40 #include "AArch64GenRegisterBankInfo.def"
41
42 using namespace llvm;
43
AArch64RegisterBankInfo(const TargetRegisterInfo & TRI)44 AArch64RegisterBankInfo::AArch64RegisterBankInfo(
45 const TargetRegisterInfo &TRI) {
46 static llvm::once_flag InitializeRegisterBankFlag;
47
48 static auto InitializeRegisterBankOnce = [&]() {
49 // We have only one set of register banks, whatever the subtarget
50 // is. Therefore, the initialization of the RegBanks table should be
51 // done only once. Indeed the table of all register banks
52 // (AArch64::RegBanks) is unique in the compiler. At some point, it
53 // will get tablegen'ed and the whole constructor becomes empty.
54
55 const RegisterBank &RBGPR = getRegBank(AArch64::GPRRegBankID);
56 (void)RBGPR;
57 assert(&AArch64::GPRRegBank == &RBGPR &&
58 "The order in RegBanks is messed up");
59
60 const RegisterBank &RBFPR = getRegBank(AArch64::FPRRegBankID);
61 (void)RBFPR;
62 assert(&AArch64::FPRRegBank == &RBFPR &&
63 "The order in RegBanks is messed up");
64
65 const RegisterBank &RBCCR = getRegBank(AArch64::CCRegBankID);
66 (void)RBCCR;
67 assert(&AArch64::CCRegBank == &RBCCR &&
68 "The order in RegBanks is messed up");
69
70 // The GPR register bank is fully defined by all the registers in
71 // GR64all + its subclasses.
72 assert(RBGPR.covers(*TRI.getRegClass(AArch64::GPR32RegClassID)) &&
73 "Subclass not added?");
74 assert(RBGPR.getSize() == 128 && "GPRs should hold up to 128-bit");
75
76 // The FPR register bank is fully defined by all the registers in
77 // GR64all + its subclasses.
78 assert(RBFPR.covers(*TRI.getRegClass(AArch64::QQRegClassID)) &&
79 "Subclass not added?");
80 assert(RBFPR.covers(*TRI.getRegClass(AArch64::FPR64RegClassID)) &&
81 "Subclass not added?");
82 assert(RBFPR.getSize() == 512 &&
83 "FPRs should hold up to 512-bit via QQQQ sequence");
84
85 assert(RBCCR.covers(*TRI.getRegClass(AArch64::CCRRegClassID)) &&
86 "Class not added?");
87 assert(RBCCR.getSize() == 32 && "CCR should hold up to 32-bit");
88
89 // Check that the TableGen'ed like file is in sync we our expectations.
90 // First, the Idx.
91 assert(checkPartialMappingIdx(PMI_FirstGPR, PMI_LastGPR,
92 {PMI_GPR32, PMI_GPR64, PMI_GPR128}) &&
93 "PartialMappingIdx's are incorrectly ordered");
94 assert(checkPartialMappingIdx(PMI_FirstFPR, PMI_LastFPR,
95 {PMI_FPR16, PMI_FPR32, PMI_FPR64, PMI_FPR128,
96 PMI_FPR256, PMI_FPR512}) &&
97 "PartialMappingIdx's are incorrectly ordered");
98 // Now, the content.
99 // Check partial mapping.
100 #define CHECK_PARTIALMAP(Idx, ValStartIdx, ValLength, RB) \
101 do { \
102 assert( \
103 checkPartialMap(PartialMappingIdx::Idx, ValStartIdx, ValLength, RB) && \
104 #Idx " is incorrectly initialized"); \
105 } while (false)
106
107 CHECK_PARTIALMAP(PMI_GPR32, 0, 32, RBGPR);
108 CHECK_PARTIALMAP(PMI_GPR64, 0, 64, RBGPR);
109 CHECK_PARTIALMAP(PMI_GPR128, 0, 128, RBGPR);
110 CHECK_PARTIALMAP(PMI_FPR16, 0, 16, RBFPR);
111 CHECK_PARTIALMAP(PMI_FPR32, 0, 32, RBFPR);
112 CHECK_PARTIALMAP(PMI_FPR64, 0, 64, RBFPR);
113 CHECK_PARTIALMAP(PMI_FPR128, 0, 128, RBFPR);
114 CHECK_PARTIALMAP(PMI_FPR256, 0, 256, RBFPR);
115 CHECK_PARTIALMAP(PMI_FPR512, 0, 512, RBFPR);
116
117 // Check value mapping.
118 #define CHECK_VALUEMAP_IMPL(RBName, Size, Offset) \
119 do { \
120 assert(checkValueMapImpl(PartialMappingIdx::PMI_##RBName##Size, \
121 PartialMappingIdx::PMI_First##RBName, Size, \
122 Offset) && \
123 #RBName #Size " " #Offset " is incorrectly initialized"); \
124 } while (false)
125
126 #define CHECK_VALUEMAP(RBName, Size) CHECK_VALUEMAP_IMPL(RBName, Size, 0)
127
128 CHECK_VALUEMAP(GPR, 32);
129 CHECK_VALUEMAP(GPR, 64);
130 CHECK_VALUEMAP(GPR, 128);
131 CHECK_VALUEMAP(FPR, 16);
132 CHECK_VALUEMAP(FPR, 32);
133 CHECK_VALUEMAP(FPR, 64);
134 CHECK_VALUEMAP(FPR, 128);
135 CHECK_VALUEMAP(FPR, 256);
136 CHECK_VALUEMAP(FPR, 512);
137
138 // Check the value mapping for 3-operands instructions where all the operands
139 // map to the same value mapping.
140 #define CHECK_VALUEMAP_3OPS(RBName, Size) \
141 do { \
142 CHECK_VALUEMAP_IMPL(RBName, Size, 0); \
143 CHECK_VALUEMAP_IMPL(RBName, Size, 1); \
144 CHECK_VALUEMAP_IMPL(RBName, Size, 2); \
145 } while (false)
146
147 CHECK_VALUEMAP_3OPS(GPR, 32);
148 CHECK_VALUEMAP_3OPS(GPR, 64);
149 CHECK_VALUEMAP_3OPS(GPR, 128);
150 CHECK_VALUEMAP_3OPS(FPR, 32);
151 CHECK_VALUEMAP_3OPS(FPR, 64);
152 CHECK_VALUEMAP_3OPS(FPR, 128);
153 CHECK_VALUEMAP_3OPS(FPR, 256);
154 CHECK_VALUEMAP_3OPS(FPR, 512);
155
156 #define CHECK_VALUEMAP_CROSSREGCPY(RBNameDst, RBNameSrc, Size) \
157 do { \
158 unsigned PartialMapDstIdx = PMI_##RBNameDst##Size - PMI_Min; \
159 unsigned PartialMapSrcIdx = PMI_##RBNameSrc##Size - PMI_Min; \
160 (void)PartialMapDstIdx; \
161 (void)PartialMapSrcIdx; \
162 const ValueMapping *Map = getCopyMapping( \
163 AArch64::RBNameDst##RegBankID, AArch64::RBNameSrc##RegBankID, Size); \
164 (void)Map; \
165 assert(Map[0].BreakDown == \
166 &AArch64GenRegisterBankInfo::PartMappings[PartialMapDstIdx] && \
167 Map[0].NumBreakDowns == 1 && #RBNameDst #Size \
168 " Dst is incorrectly initialized"); \
169 assert(Map[1].BreakDown == \
170 &AArch64GenRegisterBankInfo::PartMappings[PartialMapSrcIdx] && \
171 Map[1].NumBreakDowns == 1 && #RBNameSrc #Size \
172 " Src is incorrectly initialized"); \
173 \
174 } while (false)
175
176 CHECK_VALUEMAP_CROSSREGCPY(GPR, GPR, 32);
177 CHECK_VALUEMAP_CROSSREGCPY(GPR, FPR, 32);
178 CHECK_VALUEMAP_CROSSREGCPY(GPR, GPR, 64);
179 CHECK_VALUEMAP_CROSSREGCPY(GPR, FPR, 64);
180 CHECK_VALUEMAP_CROSSREGCPY(FPR, FPR, 32);
181 CHECK_VALUEMAP_CROSSREGCPY(FPR, GPR, 32);
182 CHECK_VALUEMAP_CROSSREGCPY(FPR, FPR, 64);
183 CHECK_VALUEMAP_CROSSREGCPY(FPR, GPR, 64);
184
185 #define CHECK_VALUEMAP_FPEXT(DstSize, SrcSize) \
186 do { \
187 unsigned PartialMapDstIdx = PMI_FPR##DstSize - PMI_Min; \
188 unsigned PartialMapSrcIdx = PMI_FPR##SrcSize - PMI_Min; \
189 (void)PartialMapDstIdx; \
190 (void)PartialMapSrcIdx; \
191 const ValueMapping *Map = getFPExtMapping(DstSize, SrcSize); \
192 (void)Map; \
193 assert(Map[0].BreakDown == \
194 &AArch64GenRegisterBankInfo::PartMappings[PartialMapDstIdx] && \
195 Map[0].NumBreakDowns == 1 && "FPR" #DstSize \
196 " Dst is incorrectly initialized"); \
197 assert(Map[1].BreakDown == \
198 &AArch64GenRegisterBankInfo::PartMappings[PartialMapSrcIdx] && \
199 Map[1].NumBreakDowns == 1 && "FPR" #SrcSize \
200 " Src is incorrectly initialized"); \
201 \
202 } while (false)
203
204 CHECK_VALUEMAP_FPEXT(32, 16);
205 CHECK_VALUEMAP_FPEXT(64, 16);
206 CHECK_VALUEMAP_FPEXT(64, 32);
207 CHECK_VALUEMAP_FPEXT(128, 64);
208
209 assert(verify(TRI) && "Invalid register bank information");
210 };
211
212 llvm::call_once(InitializeRegisterBankFlag, InitializeRegisterBankOnce);
213 }
214
copyCost(const RegisterBank & A,const RegisterBank & B,unsigned Size) const215 unsigned AArch64RegisterBankInfo::copyCost(const RegisterBank &A,
216 const RegisterBank &B,
217 unsigned Size) const {
218 // What do we do with different size?
219 // copy are same size.
220 // Will introduce other hooks for different size:
221 // * extract cost.
222 // * build_sequence cost.
223
224 // Copy from (resp. to) GPR to (resp. from) FPR involves FMOV.
225 // FIXME: This should be deduced from the scheduling model.
226 if (&A == &AArch64::GPRRegBank && &B == &AArch64::FPRRegBank)
227 // FMOVXDr or FMOVWSr.
228 return 5;
229 if (&A == &AArch64::FPRRegBank && &B == &AArch64::GPRRegBank)
230 // FMOVDXr or FMOVSWr.
231 return 4;
232
233 return RegisterBankInfo::copyCost(A, B, Size);
234 }
235
236 const RegisterBank &
getRegBankFromRegClass(const TargetRegisterClass & RC,LLT) const237 AArch64RegisterBankInfo::getRegBankFromRegClass(const TargetRegisterClass &RC,
238 LLT) const {
239 switch (RC.getID()) {
240 case AArch64::FPR8RegClassID:
241 case AArch64::FPR16RegClassID:
242 case AArch64::FPR16_loRegClassID:
243 case AArch64::FPR32_with_hsub_in_FPR16_loRegClassID:
244 case AArch64::FPR32RegClassID:
245 case AArch64::FPR64RegClassID:
246 case AArch64::FPR64_loRegClassID:
247 case AArch64::FPR128RegClassID:
248 case AArch64::FPR128_loRegClassID:
249 case AArch64::DDRegClassID:
250 case AArch64::DDDRegClassID:
251 case AArch64::DDDDRegClassID:
252 case AArch64::QQRegClassID:
253 case AArch64::QQQRegClassID:
254 case AArch64::QQQQRegClassID:
255 return getRegBank(AArch64::FPRRegBankID);
256 case AArch64::GPR32commonRegClassID:
257 case AArch64::GPR32RegClassID:
258 case AArch64::GPR32spRegClassID:
259 case AArch64::GPR32sponlyRegClassID:
260 case AArch64::GPR32argRegClassID:
261 case AArch64::GPR32allRegClassID:
262 case AArch64::GPR64commonRegClassID:
263 case AArch64::GPR64RegClassID:
264 case AArch64::GPR64spRegClassID:
265 case AArch64::GPR64sponlyRegClassID:
266 case AArch64::GPR64argRegClassID:
267 case AArch64::GPR64allRegClassID:
268 case AArch64::GPR64noipRegClassID:
269 case AArch64::GPR64common_and_GPR64noipRegClassID:
270 case AArch64::GPR64noip_and_tcGPR64RegClassID:
271 case AArch64::tcGPR64RegClassID:
272 case AArch64::rtcGPR64RegClassID:
273 case AArch64::WSeqPairsClassRegClassID:
274 case AArch64::XSeqPairsClassRegClassID:
275 case AArch64::MatrixIndexGPR32_8_11RegClassID:
276 case AArch64::MatrixIndexGPR32_12_15RegClassID:
277 case AArch64::GPR64_with_sub_32_in_MatrixIndexGPR32_8_11RegClassID:
278 case AArch64::GPR64_with_sub_32_in_MatrixIndexGPR32_12_15RegClassID:
279 return getRegBank(AArch64::GPRRegBankID);
280 case AArch64::CCRRegClassID:
281 return getRegBank(AArch64::CCRegBankID);
282 default:
283 llvm_unreachable("Register class not supported");
284 }
285 }
286
287 RegisterBankInfo::InstructionMappings
getInstrAlternativeMappings(const MachineInstr & MI) const288 AArch64RegisterBankInfo::getInstrAlternativeMappings(
289 const MachineInstr &MI) const {
290 const MachineFunction &MF = *MI.getParent()->getParent();
291 const TargetSubtargetInfo &STI = MF.getSubtarget();
292 const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
293 const MachineRegisterInfo &MRI = MF.getRegInfo();
294
295 switch (MI.getOpcode()) {
296 case TargetOpcode::G_OR: {
297 // 32 and 64-bit or can be mapped on either FPR or
298 // GPR for the same cost.
299 unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, TRI);
300 if (Size != 32 && Size != 64)
301 break;
302
303 // If the instruction has any implicit-defs or uses,
304 // do not mess with it.
305 if (MI.getNumOperands() != 3)
306 break;
307 InstructionMappings AltMappings;
308 const InstructionMapping &GPRMapping = getInstructionMapping(
309 /*ID*/ 1, /*Cost*/ 1, getValueMapping(PMI_FirstGPR, Size),
310 /*NumOperands*/ 3);
311 const InstructionMapping &FPRMapping = getInstructionMapping(
312 /*ID*/ 2, /*Cost*/ 1, getValueMapping(PMI_FirstFPR, Size),
313 /*NumOperands*/ 3);
314
315 AltMappings.push_back(&GPRMapping);
316 AltMappings.push_back(&FPRMapping);
317 return AltMappings;
318 }
319 case TargetOpcode::G_BITCAST: {
320 unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, TRI);
321 if (Size != 32 && Size != 64)
322 break;
323
324 // If the instruction has any implicit-defs or uses,
325 // do not mess with it.
326 if (MI.getNumOperands() != 2)
327 break;
328
329 InstructionMappings AltMappings;
330 const InstructionMapping &GPRMapping = getInstructionMapping(
331 /*ID*/ 1, /*Cost*/ 1,
332 getCopyMapping(AArch64::GPRRegBankID, AArch64::GPRRegBankID, Size),
333 /*NumOperands*/ 2);
334 const InstructionMapping &FPRMapping = getInstructionMapping(
335 /*ID*/ 2, /*Cost*/ 1,
336 getCopyMapping(AArch64::FPRRegBankID, AArch64::FPRRegBankID, Size),
337 /*NumOperands*/ 2);
338 const InstructionMapping &GPRToFPRMapping = getInstructionMapping(
339 /*ID*/ 3,
340 /*Cost*/ copyCost(AArch64::GPRRegBank, AArch64::FPRRegBank, Size),
341 getCopyMapping(AArch64::FPRRegBankID, AArch64::GPRRegBankID, Size),
342 /*NumOperands*/ 2);
343 const InstructionMapping &FPRToGPRMapping = getInstructionMapping(
344 /*ID*/ 3,
345 /*Cost*/ copyCost(AArch64::GPRRegBank, AArch64::FPRRegBank, Size),
346 getCopyMapping(AArch64::GPRRegBankID, AArch64::FPRRegBankID, Size),
347 /*NumOperands*/ 2);
348
349 AltMappings.push_back(&GPRMapping);
350 AltMappings.push_back(&FPRMapping);
351 AltMappings.push_back(&GPRToFPRMapping);
352 AltMappings.push_back(&FPRToGPRMapping);
353 return AltMappings;
354 }
355 case TargetOpcode::G_LOAD: {
356 unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, TRI);
357 if (Size != 64)
358 break;
359
360 // If the instruction has any implicit-defs or uses,
361 // do not mess with it.
362 if (MI.getNumOperands() != 2)
363 break;
364
365 InstructionMappings AltMappings;
366 const InstructionMapping &GPRMapping = getInstructionMapping(
367 /*ID*/ 1, /*Cost*/ 1,
368 getOperandsMapping({getValueMapping(PMI_FirstGPR, Size),
369 // Addresses are GPR 64-bit.
370 getValueMapping(PMI_FirstGPR, 64)}),
371 /*NumOperands*/ 2);
372 const InstructionMapping &FPRMapping = getInstructionMapping(
373 /*ID*/ 2, /*Cost*/ 1,
374 getOperandsMapping({getValueMapping(PMI_FirstFPR, Size),
375 // Addresses are GPR 64-bit.
376 getValueMapping(PMI_FirstGPR, 64)}),
377 /*NumOperands*/ 2);
378
379 AltMappings.push_back(&GPRMapping);
380 AltMappings.push_back(&FPRMapping);
381 return AltMappings;
382 }
383 default:
384 break;
385 }
386 return RegisterBankInfo::getInstrAlternativeMappings(MI);
387 }
388
applyMappingImpl(const OperandsMapper & OpdMapper) const389 void AArch64RegisterBankInfo::applyMappingImpl(
390 const OperandsMapper &OpdMapper) const {
391 switch (OpdMapper.getMI().getOpcode()) {
392 case TargetOpcode::G_OR:
393 case TargetOpcode::G_BITCAST:
394 case TargetOpcode::G_LOAD:
395 // Those ID must match getInstrAlternativeMappings.
396 assert((OpdMapper.getInstrMapping().getID() >= 1 &&
397 OpdMapper.getInstrMapping().getID() <= 4) &&
398 "Don't know how to handle that ID");
399 return applyDefaultMapping(OpdMapper);
400 default:
401 llvm_unreachable("Don't know how to handle that operation");
402 }
403 }
404
405 /// Returns whether opcode \p Opc is a pre-isel generic floating-point opcode,
406 /// having only floating-point operands.
isPreISelGenericFloatingPointOpcode(unsigned Opc)407 static bool isPreISelGenericFloatingPointOpcode(unsigned Opc) {
408 switch (Opc) {
409 case TargetOpcode::G_FADD:
410 case TargetOpcode::G_FSUB:
411 case TargetOpcode::G_FMUL:
412 case TargetOpcode::G_FMA:
413 case TargetOpcode::G_FDIV:
414 case TargetOpcode::G_FCONSTANT:
415 case TargetOpcode::G_FPEXT:
416 case TargetOpcode::G_FPTRUNC:
417 case TargetOpcode::G_FCEIL:
418 case TargetOpcode::G_FFLOOR:
419 case TargetOpcode::G_FNEARBYINT:
420 case TargetOpcode::G_FNEG:
421 case TargetOpcode::G_FCOS:
422 case TargetOpcode::G_FSIN:
423 case TargetOpcode::G_FLOG10:
424 case TargetOpcode::G_FLOG:
425 case TargetOpcode::G_FLOG2:
426 case TargetOpcode::G_FSQRT:
427 case TargetOpcode::G_FABS:
428 case TargetOpcode::G_FEXP:
429 case TargetOpcode::G_FRINT:
430 case TargetOpcode::G_INTRINSIC_TRUNC:
431 case TargetOpcode::G_INTRINSIC_ROUND:
432 case TargetOpcode::G_FMAXNUM:
433 case TargetOpcode::G_FMINNUM:
434 case TargetOpcode::G_FMAXIMUM:
435 case TargetOpcode::G_FMINIMUM:
436 return true;
437 }
438 return false;
439 }
440
441 const RegisterBankInfo::InstructionMapping &
getSameKindOfOperandsMapping(const MachineInstr & MI) const442 AArch64RegisterBankInfo::getSameKindOfOperandsMapping(
443 const MachineInstr &MI) const {
444 const unsigned Opc = MI.getOpcode();
445 const MachineFunction &MF = *MI.getParent()->getParent();
446 const MachineRegisterInfo &MRI = MF.getRegInfo();
447
448 unsigned NumOperands = MI.getNumOperands();
449 assert(NumOperands <= 3 &&
450 "This code is for instructions with 3 or less operands");
451
452 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
453 unsigned Size = Ty.getSizeInBits();
454 bool IsFPR = Ty.isVector() || isPreISelGenericFloatingPointOpcode(Opc);
455
456 PartialMappingIdx RBIdx = IsFPR ? PMI_FirstFPR : PMI_FirstGPR;
457
458 #ifndef NDEBUG
459 // Make sure all the operands are using similar size and type.
460 // Should probably be checked by the machine verifier.
461 // This code won't catch cases where the number of lanes is
462 // different between the operands.
463 // If we want to go to that level of details, it is probably
464 // best to check that the types are the same, period.
465 // Currently, we just check that the register banks are the same
466 // for each types.
467 for (unsigned Idx = 1; Idx != NumOperands; ++Idx) {
468 LLT OpTy = MRI.getType(MI.getOperand(Idx).getReg());
469 assert(
470 AArch64GenRegisterBankInfo::getRegBankBaseIdxOffset(
471 RBIdx, OpTy.getSizeInBits()) ==
472 AArch64GenRegisterBankInfo::getRegBankBaseIdxOffset(RBIdx, Size) &&
473 "Operand has incompatible size");
474 bool OpIsFPR = OpTy.isVector() || isPreISelGenericFloatingPointOpcode(Opc);
475 (void)OpIsFPR;
476 assert(IsFPR == OpIsFPR && "Operand has incompatible type");
477 }
478 #endif // End NDEBUG.
479
480 return getInstructionMapping(DefaultMappingID, 1,
481 getValueMapping(RBIdx, Size), NumOperands);
482 }
483
484 /// \returns true if a given intrinsic \p ID only uses and defines FPRs.
isFPIntrinsic(unsigned ID)485 static bool isFPIntrinsic(unsigned ID) {
486 // TODO: Add more intrinsics.
487 switch (ID) {
488 default:
489 return false;
490 case Intrinsic::aarch64_neon_uaddlv:
491 return true;
492 }
493 }
494
hasFPConstraints(const MachineInstr & MI,const MachineRegisterInfo & MRI,const TargetRegisterInfo & TRI,unsigned Depth) const495 bool AArch64RegisterBankInfo::hasFPConstraints(const MachineInstr &MI,
496 const MachineRegisterInfo &MRI,
497 const TargetRegisterInfo &TRI,
498 unsigned Depth) const {
499 unsigned Op = MI.getOpcode();
500 if (Op == TargetOpcode::G_INTRINSIC && isFPIntrinsic(MI.getIntrinsicID()))
501 return true;
502
503 // Do we have an explicit floating point instruction?
504 if (isPreISelGenericFloatingPointOpcode(Op))
505 return true;
506
507 // No. Check if we have a copy-like instruction. If we do, then we could
508 // still be fed by floating point instructions.
509 if (Op != TargetOpcode::COPY && !MI.isPHI() &&
510 !isPreISelGenericOptimizationHint(Op))
511 return false;
512
513 // Check if we already know the register bank.
514 auto *RB = getRegBank(MI.getOperand(0).getReg(), MRI, TRI);
515 if (RB == &AArch64::FPRRegBank)
516 return true;
517 if (RB == &AArch64::GPRRegBank)
518 return false;
519
520 // We don't know anything.
521 //
522 // If we have a phi, we may be able to infer that it will be assigned a FPR
523 // based off of its inputs.
524 if (!MI.isPHI() || Depth > MaxFPRSearchDepth)
525 return false;
526
527 return any_of(MI.explicit_uses(), [&](const MachineOperand &Op) {
528 return Op.isReg() &&
529 onlyDefinesFP(*MRI.getVRegDef(Op.getReg()), MRI, TRI, Depth + 1);
530 });
531 }
532
onlyUsesFP(const MachineInstr & MI,const MachineRegisterInfo & MRI,const TargetRegisterInfo & TRI,unsigned Depth) const533 bool AArch64RegisterBankInfo::onlyUsesFP(const MachineInstr &MI,
534 const MachineRegisterInfo &MRI,
535 const TargetRegisterInfo &TRI,
536 unsigned Depth) const {
537 switch (MI.getOpcode()) {
538 case TargetOpcode::G_FPTOSI:
539 case TargetOpcode::G_FPTOUI:
540 case TargetOpcode::G_FCMP:
541 case TargetOpcode::G_LROUND:
542 case TargetOpcode::G_LLROUND:
543 return true;
544 default:
545 break;
546 }
547 return hasFPConstraints(MI, MRI, TRI, Depth);
548 }
549
onlyDefinesFP(const MachineInstr & MI,const MachineRegisterInfo & MRI,const TargetRegisterInfo & TRI,unsigned Depth) const550 bool AArch64RegisterBankInfo::onlyDefinesFP(const MachineInstr &MI,
551 const MachineRegisterInfo &MRI,
552 const TargetRegisterInfo &TRI,
553 unsigned Depth) const {
554 switch (MI.getOpcode()) {
555 case AArch64::G_DUP:
556 case TargetOpcode::G_SITOFP:
557 case TargetOpcode::G_UITOFP:
558 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
559 case TargetOpcode::G_INSERT_VECTOR_ELT:
560 case TargetOpcode::G_BUILD_VECTOR:
561 case TargetOpcode::G_BUILD_VECTOR_TRUNC:
562 return true;
563 default:
564 break;
565 }
566 return hasFPConstraints(MI, MRI, TRI, Depth);
567 }
568
569 const RegisterBankInfo::InstructionMapping &
getInstrMapping(const MachineInstr & MI) const570 AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
571 const unsigned Opc = MI.getOpcode();
572
573 // Try the default logic for non-generic instructions that are either copies
574 // or already have some operands assigned to banks.
575 if ((Opc != TargetOpcode::COPY && !isPreISelGenericOpcode(Opc)) ||
576 Opc == TargetOpcode::G_PHI) {
577 const RegisterBankInfo::InstructionMapping &Mapping =
578 getInstrMappingImpl(MI);
579 if (Mapping.isValid())
580 return Mapping;
581 }
582
583 const MachineFunction &MF = *MI.getParent()->getParent();
584 const MachineRegisterInfo &MRI = MF.getRegInfo();
585 const TargetSubtargetInfo &STI = MF.getSubtarget();
586 const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
587
588 switch (Opc) {
589 // G_{F|S|U}REM are not listed because they are not legal.
590 // Arithmetic ops.
591 case TargetOpcode::G_ADD:
592 case TargetOpcode::G_SUB:
593 case TargetOpcode::G_PTR_ADD:
594 case TargetOpcode::G_MUL:
595 case TargetOpcode::G_SDIV:
596 case TargetOpcode::G_UDIV:
597 // Bitwise ops.
598 case TargetOpcode::G_AND:
599 case TargetOpcode::G_OR:
600 case TargetOpcode::G_XOR:
601 // Floating point ops.
602 case TargetOpcode::G_FADD:
603 case TargetOpcode::G_FSUB:
604 case TargetOpcode::G_FMUL:
605 case TargetOpcode::G_FDIV:
606 case TargetOpcode::G_FMAXIMUM:
607 case TargetOpcode::G_FMINIMUM:
608 return getSameKindOfOperandsMapping(MI);
609 case TargetOpcode::G_FPEXT: {
610 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
611 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
612 return getInstructionMapping(
613 DefaultMappingID, /*Cost*/ 1,
614 getFPExtMapping(DstTy.getSizeInBits(), SrcTy.getSizeInBits()),
615 /*NumOperands*/ 2);
616 }
617 // Shifts.
618 case TargetOpcode::G_SHL:
619 case TargetOpcode::G_LSHR:
620 case TargetOpcode::G_ASHR: {
621 LLT ShiftAmtTy = MRI.getType(MI.getOperand(2).getReg());
622 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
623 if (ShiftAmtTy.getSizeInBits() == 64 && SrcTy.getSizeInBits() == 32)
624 return getInstructionMapping(DefaultMappingID, 1,
625 &ValMappings[Shift64Imm], 3);
626 return getSameKindOfOperandsMapping(MI);
627 }
628 case TargetOpcode::COPY: {
629 Register DstReg = MI.getOperand(0).getReg();
630 Register SrcReg = MI.getOperand(1).getReg();
631 // Check if one of the register is not a generic register.
632 if ((DstReg.isPhysical() || !MRI.getType(DstReg).isValid()) ||
633 (SrcReg.isPhysical() || !MRI.getType(SrcReg).isValid())) {
634 const RegisterBank *DstRB = getRegBank(DstReg, MRI, TRI);
635 const RegisterBank *SrcRB = getRegBank(SrcReg, MRI, TRI);
636 if (!DstRB)
637 DstRB = SrcRB;
638 else if (!SrcRB)
639 SrcRB = DstRB;
640 // If both RB are null that means both registers are generic.
641 // We shouldn't be here.
642 assert(DstRB && SrcRB && "Both RegBank were nullptr");
643 unsigned Size = getSizeInBits(DstReg, MRI, TRI);
644 return getInstructionMapping(
645 DefaultMappingID, copyCost(*DstRB, *SrcRB, Size),
646 getCopyMapping(DstRB->getID(), SrcRB->getID(), Size),
647 // We only care about the mapping of the destination.
648 /*NumOperands*/ 1);
649 }
650 // Both registers are generic, use G_BITCAST.
651 [[fallthrough]];
652 }
653 case TargetOpcode::G_BITCAST: {
654 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
655 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
656 unsigned Size = DstTy.getSizeInBits();
657 bool DstIsGPR = !DstTy.isVector() && DstTy.getSizeInBits() <= 64;
658 bool SrcIsGPR = !SrcTy.isVector() && SrcTy.getSizeInBits() <= 64;
659 const RegisterBank &DstRB =
660 DstIsGPR ? AArch64::GPRRegBank : AArch64::FPRRegBank;
661 const RegisterBank &SrcRB =
662 SrcIsGPR ? AArch64::GPRRegBank : AArch64::FPRRegBank;
663 return getInstructionMapping(
664 DefaultMappingID, copyCost(DstRB, SrcRB, Size),
665 getCopyMapping(DstRB.getID(), SrcRB.getID(), Size),
666 // We only care about the mapping of the destination for COPY.
667 /*NumOperands*/ Opc == TargetOpcode::G_BITCAST ? 2 : 1);
668 }
669 default:
670 break;
671 }
672
673 unsigned NumOperands = MI.getNumOperands();
674
675 // Track the size and bank of each register. We don't do partial mappings.
676 SmallVector<unsigned, 4> OpSize(NumOperands);
677 SmallVector<PartialMappingIdx, 4> OpRegBankIdx(NumOperands);
678 for (unsigned Idx = 0; Idx < NumOperands; ++Idx) {
679 auto &MO = MI.getOperand(Idx);
680 if (!MO.isReg() || !MO.getReg())
681 continue;
682
683 LLT Ty = MRI.getType(MO.getReg());
684 if (!Ty.isValid())
685 continue;
686 OpSize[Idx] = Ty.getSizeInBits();
687
688 // As a top-level guess, vectors go in FPRs, scalars and pointers in GPRs.
689 // For floating-point instructions, scalars go in FPRs.
690 if (Ty.isVector() || isPreISelGenericFloatingPointOpcode(Opc) ||
691 Ty.getSizeInBits() > 64)
692 OpRegBankIdx[Idx] = PMI_FirstFPR;
693 else
694 OpRegBankIdx[Idx] = PMI_FirstGPR;
695 }
696
697 unsigned Cost = 1;
698 // Some of the floating-point instructions have mixed GPR and FPR operands:
699 // fine-tune the computed mapping.
700 switch (Opc) {
701 case AArch64::G_DUP: {
702 Register ScalarReg = MI.getOperand(1).getReg();
703 LLT ScalarTy = MRI.getType(ScalarReg);
704 auto ScalarDef = MRI.getVRegDef(ScalarReg);
705 // s8 is an exception for G_DUP, which we always want on gpr.
706 if (ScalarTy.getSizeInBits() != 8 &&
707 (getRegBank(ScalarReg, MRI, TRI) == &AArch64::FPRRegBank ||
708 onlyDefinesFP(*ScalarDef, MRI, TRI)))
709 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
710 else
711 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR};
712 break;
713 }
714 case TargetOpcode::G_TRUNC: {
715 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
716 if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 128)
717 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
718 break;
719 }
720 case TargetOpcode::G_SITOFP:
721 case TargetOpcode::G_UITOFP: {
722 if (MRI.getType(MI.getOperand(0).getReg()).isVector())
723 break;
724 // Integer to FP conversions don't necessarily happen between GPR -> FPR
725 // regbanks. They can also be done within an FPR register.
726 Register SrcReg = MI.getOperand(1).getReg();
727 if (getRegBank(SrcReg, MRI, TRI) == &AArch64::FPRRegBank)
728 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
729 else
730 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR};
731 break;
732 }
733 case TargetOpcode::G_FPTOSI:
734 case TargetOpcode::G_FPTOUI:
735 if (MRI.getType(MI.getOperand(0).getReg()).isVector())
736 break;
737 OpRegBankIdx = {PMI_FirstGPR, PMI_FirstFPR};
738 break;
739 case TargetOpcode::G_FCMP: {
740 // If the result is a vector, it must use a FPR.
741 AArch64GenRegisterBankInfo::PartialMappingIdx Idx0 =
742 MRI.getType(MI.getOperand(0).getReg()).isVector() ? PMI_FirstFPR
743 : PMI_FirstGPR;
744 OpRegBankIdx = {Idx0,
745 /* Predicate */ PMI_None, PMI_FirstFPR, PMI_FirstFPR};
746 break;
747 }
748 case TargetOpcode::G_BITCAST:
749 // This is going to be a cross register bank copy and this is expensive.
750 if (OpRegBankIdx[0] != OpRegBankIdx[1])
751 Cost = copyCost(
752 *AArch64GenRegisterBankInfo::PartMappings[OpRegBankIdx[0]].RegBank,
753 *AArch64GenRegisterBankInfo::PartMappings[OpRegBankIdx[1]].RegBank,
754 OpSize[0]);
755 break;
756 case TargetOpcode::G_LOAD:
757 // Loading in vector unit is slightly more expensive.
758 // This is actually only true for the LD1R and co instructions,
759 // but anyway for the fast mode this number does not matter and
760 // for the greedy mode the cost of the cross bank copy will
761 // offset this number.
762 // FIXME: Should be derived from the scheduling model.
763 if (OpRegBankIdx[0] != PMI_FirstGPR) {
764 Cost = 2;
765 break;
766 }
767
768 if (cast<GLoad>(MI).isAtomic()) {
769 // Atomics always use GPR destinations. Don't refine any further.
770 OpRegBankIdx[0] = PMI_FirstGPR;
771 break;
772 }
773
774 // Check if that load feeds fp instructions.
775 // In that case, we want the default mapping to be on FPR
776 // instead of blind map every scalar to GPR.
777 if (any_of(MRI.use_nodbg_instructions(MI.getOperand(0).getReg()),
778 [&](const MachineInstr &UseMI) {
779 // If we have at least one direct use in a FP instruction,
780 // assume this was a floating point load in the IR. If it was
781 // not, we would have had a bitcast before reaching that
782 // instruction.
783 //
784 // Int->FP conversion operations are also captured in
785 // onlyDefinesFP().
786 return onlyUsesFP(UseMI, MRI, TRI) ||
787 onlyDefinesFP(UseMI, MRI, TRI);
788 }))
789 OpRegBankIdx[0] = PMI_FirstFPR;
790 break;
791 case TargetOpcode::G_STORE:
792 // Check if that store is fed by fp instructions.
793 if (OpRegBankIdx[0] == PMI_FirstGPR) {
794 Register VReg = MI.getOperand(0).getReg();
795 if (!VReg)
796 break;
797 MachineInstr *DefMI = MRI.getVRegDef(VReg);
798 if (onlyDefinesFP(*DefMI, MRI, TRI))
799 OpRegBankIdx[0] = PMI_FirstFPR;
800 break;
801 }
802 break;
803 case TargetOpcode::G_SELECT: {
804 // If the destination is FPR, preserve that.
805 if (OpRegBankIdx[0] != PMI_FirstGPR)
806 break;
807
808 // If we're taking in vectors, we have no choice but to put everything on
809 // FPRs, except for the condition. The condition must always be on a GPR.
810 LLT SrcTy = MRI.getType(MI.getOperand(2).getReg());
811 if (SrcTy.isVector()) {
812 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR, PMI_FirstFPR, PMI_FirstFPR};
813 break;
814 }
815
816 // Try to minimize the number of copies. If we have more floating point
817 // constrained values than not, then we'll put everything on FPR. Otherwise,
818 // everything has to be on GPR.
819 unsigned NumFP = 0;
820
821 // Check if the uses of the result always produce floating point values.
822 //
823 // For example:
824 //
825 // %z = G_SELECT %cond %x %y
826 // fpr = G_FOO %z ...
827 if (any_of(MRI.use_nodbg_instructions(MI.getOperand(0).getReg()),
828 [&](MachineInstr &MI) { return onlyUsesFP(MI, MRI, TRI); }))
829 ++NumFP;
830
831 // Check if the defs of the source values always produce floating point
832 // values.
833 //
834 // For example:
835 //
836 // %x = G_SOMETHING_ALWAYS_FLOAT %a ...
837 // %z = G_SELECT %cond %x %y
838 //
839 // Also check whether or not the sources have already been decided to be
840 // FPR. Keep track of this.
841 //
842 // This doesn't check the condition, since it's just whatever is in NZCV.
843 // This isn't passed explicitly in a register to fcsel/csel.
844 for (unsigned Idx = 2; Idx < 4; ++Idx) {
845 Register VReg = MI.getOperand(Idx).getReg();
846 MachineInstr *DefMI = MRI.getVRegDef(VReg);
847 if (getRegBank(VReg, MRI, TRI) == &AArch64::FPRRegBank ||
848 onlyDefinesFP(*DefMI, MRI, TRI))
849 ++NumFP;
850 }
851
852 // If we have more FP constraints than not, then move everything over to
853 // FPR.
854 if (NumFP >= 2)
855 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR, PMI_FirstFPR, PMI_FirstFPR};
856
857 break;
858 }
859 case TargetOpcode::G_UNMERGE_VALUES: {
860 // If the first operand belongs to a FPR register bank, then make sure that
861 // we preserve that.
862 if (OpRegBankIdx[0] != PMI_FirstGPR)
863 break;
864
865 LLT SrcTy = MRI.getType(MI.getOperand(MI.getNumOperands()-1).getReg());
866 // UNMERGE into scalars from a vector should always use FPR.
867 // Likewise if any of the uses are FP instructions.
868 if (SrcTy.isVector() || SrcTy == LLT::scalar(128) ||
869 any_of(MRI.use_nodbg_instructions(MI.getOperand(0).getReg()),
870 [&](MachineInstr &MI) { return onlyUsesFP(MI, MRI, TRI); })) {
871 // Set the register bank of every operand to FPR.
872 for (unsigned Idx = 0, NumOperands = MI.getNumOperands();
873 Idx < NumOperands; ++Idx)
874 OpRegBankIdx[Idx] = PMI_FirstFPR;
875 }
876 break;
877 }
878 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
879 // Destination and source need to be FPRs.
880 OpRegBankIdx[0] = PMI_FirstFPR;
881 OpRegBankIdx[1] = PMI_FirstFPR;
882
883 // Index needs to be a GPR.
884 OpRegBankIdx[2] = PMI_FirstGPR;
885 break;
886 case TargetOpcode::G_INSERT_VECTOR_ELT:
887 OpRegBankIdx[0] = PMI_FirstFPR;
888 OpRegBankIdx[1] = PMI_FirstFPR;
889
890 // The element may be either a GPR or FPR. Preserve that behaviour.
891 if (getRegBank(MI.getOperand(2).getReg(), MRI, TRI) == &AArch64::FPRRegBank)
892 OpRegBankIdx[2] = PMI_FirstFPR;
893 else
894 OpRegBankIdx[2] = PMI_FirstGPR;
895
896 // Index needs to be a GPR.
897 OpRegBankIdx[3] = PMI_FirstGPR;
898 break;
899 case TargetOpcode::G_EXTRACT: {
900 // For s128 sources we have to use fpr unless we know otherwise.
901 auto Src = MI.getOperand(1).getReg();
902 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
903 if (SrcTy.getSizeInBits() != 128)
904 break;
905 auto Idx = MRI.getRegClassOrNull(Src) == &AArch64::XSeqPairsClassRegClass
906 ? PMI_FirstGPR
907 : PMI_FirstFPR;
908 OpRegBankIdx[0] = Idx;
909 OpRegBankIdx[1] = Idx;
910 break;
911 }
912 case TargetOpcode::G_BUILD_VECTOR: {
913 // If the first source operand belongs to a FPR register bank, then make
914 // sure that we preserve that.
915 if (OpRegBankIdx[1] != PMI_FirstGPR)
916 break;
917 Register VReg = MI.getOperand(1).getReg();
918 if (!VReg)
919 break;
920
921 // Get the instruction that defined the source operand reg, and check if
922 // it's a floating point operation. Or, if it's a type like s16 which
923 // doesn't have a exact size gpr register class. The exception is if the
924 // build_vector has all constant operands, which may be better to leave as
925 // gpr without copies, so it can be matched in imported patterns.
926 MachineInstr *DefMI = MRI.getVRegDef(VReg);
927 unsigned DefOpc = DefMI->getOpcode();
928 const LLT SrcTy = MRI.getType(VReg);
929 if (all_of(MI.operands(), [&](const MachineOperand &Op) {
930 return Op.isDef() || MRI.getVRegDef(Op.getReg())->getOpcode() ==
931 TargetOpcode::G_CONSTANT;
932 }))
933 break;
934 if (isPreISelGenericFloatingPointOpcode(DefOpc) ||
935 SrcTy.getSizeInBits() < 32 ||
936 getRegBank(VReg, MRI, TRI) == &AArch64::FPRRegBank) {
937 // Have a floating point op.
938 // Make sure every operand gets mapped to a FPR register class.
939 unsigned NumOperands = MI.getNumOperands();
940 for (unsigned Idx = 0; Idx < NumOperands; ++Idx)
941 OpRegBankIdx[Idx] = PMI_FirstFPR;
942 }
943 break;
944 }
945 case TargetOpcode::G_VECREDUCE_FADD:
946 case TargetOpcode::G_VECREDUCE_FMUL:
947 case TargetOpcode::G_VECREDUCE_FMAX:
948 case TargetOpcode::G_VECREDUCE_FMIN:
949 case TargetOpcode::G_VECREDUCE_ADD:
950 case TargetOpcode::G_VECREDUCE_MUL:
951 case TargetOpcode::G_VECREDUCE_AND:
952 case TargetOpcode::G_VECREDUCE_OR:
953 case TargetOpcode::G_VECREDUCE_XOR:
954 case TargetOpcode::G_VECREDUCE_SMAX:
955 case TargetOpcode::G_VECREDUCE_SMIN:
956 case TargetOpcode::G_VECREDUCE_UMAX:
957 case TargetOpcode::G_VECREDUCE_UMIN:
958 // Reductions produce a scalar value from a vector, the scalar should be on
959 // FPR bank.
960 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
961 break;
962 case TargetOpcode::G_VECREDUCE_SEQ_FADD:
963 case TargetOpcode::G_VECREDUCE_SEQ_FMUL:
964 // These reductions also take a scalar accumulator input.
965 // Assign them FPR for now.
966 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR, PMI_FirstFPR};
967 break;
968 case TargetOpcode::G_INTRINSIC: {
969 // Check if we know that the intrinsic has any constraints on its register
970 // banks. If it does, then update the mapping accordingly.
971 unsigned ID = MI.getIntrinsicID();
972 unsigned Idx = 0;
973 if (!isFPIntrinsic(ID))
974 break;
975 for (const auto &Op : MI.explicit_operands()) {
976 if (Op.isReg())
977 OpRegBankIdx[Idx] = PMI_FirstFPR;
978 ++Idx;
979 }
980 break;
981 }
982 case TargetOpcode::G_LROUND:
983 case TargetOpcode::G_LLROUND: {
984 // Source is always floating point and destination is always integer.
985 OpRegBankIdx = {PMI_FirstGPR, PMI_FirstFPR};
986 break;
987 }
988 }
989
990 // Finally construct the computed mapping.
991 SmallVector<const ValueMapping *, 8> OpdsMapping(NumOperands);
992 for (unsigned Idx = 0; Idx < NumOperands; ++Idx) {
993 if (MI.getOperand(Idx).isReg() && MI.getOperand(Idx).getReg()) {
994 LLT Ty = MRI.getType(MI.getOperand(Idx).getReg());
995 if (!Ty.isValid())
996 continue;
997 auto Mapping = getValueMapping(OpRegBankIdx[Idx], OpSize[Idx]);
998 if (!Mapping->isValid())
999 return getInvalidInstructionMapping();
1000
1001 OpdsMapping[Idx] = Mapping;
1002 }
1003 }
1004
1005 return getInstructionMapping(DefaultMappingID, Cost,
1006 getOperandsMapping(OpdsMapping), NumOperands);
1007 }
1008