1 //===- AArch64RegisterBankInfo.cpp ----------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// \file 9 /// This file implements the targeting of the RegisterBankInfo class for 10 /// AArch64. 11 /// \todo This should be generated by TableGen. 12 //===----------------------------------------------------------------------===// 13 14 #include "AArch64RegisterBankInfo.h" 15 #include "AArch64RegisterInfo.h" 16 #include "MCTargetDesc/AArch64MCTargetDesc.h" 17 #include "llvm/ADT/STLExtras.h" 18 #include "llvm/ADT/SmallVector.h" 19 #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" 20 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" 21 #include "llvm/CodeGen/GlobalISel/Utils.h" 22 #include "llvm/CodeGen/LowLevelTypeUtils.h" 23 #include "llvm/CodeGen/MachineFunction.h" 24 #include "llvm/CodeGen/MachineInstr.h" 25 #include "llvm/CodeGen/MachineOperand.h" 26 #include "llvm/CodeGen/MachineRegisterInfo.h" 27 #include "llvm/CodeGen/RegisterBank.h" 28 #include "llvm/CodeGen/RegisterBankInfo.h" 29 #include "llvm/CodeGen/TargetOpcodes.h" 30 #include "llvm/CodeGen/TargetRegisterInfo.h" 31 #include "llvm/CodeGen/TargetSubtargetInfo.h" 32 #include "llvm/IR/IntrinsicsAArch64.h" 33 #include "llvm/Support/ErrorHandling.h" 34 #include "llvm/Support/Threading.h" 35 #include <cassert> 36 37 #define GET_TARGET_REGBANK_IMPL 38 #include "AArch64GenRegisterBank.inc" 39 40 // This file will be TableGen'ed at some point. 41 #include "AArch64GenRegisterBankInfo.def" 42 43 using namespace llvm; 44 static const unsigned CustomMappingID = 1; 45 46 AArch64RegisterBankInfo::AArch64RegisterBankInfo( 47 const TargetRegisterInfo &TRI) { 48 static llvm::once_flag InitializeRegisterBankFlag; 49 50 static auto InitializeRegisterBankOnce = [&]() { 51 // We have only one set of register banks, whatever the subtarget 52 // is. Therefore, the initialization of the RegBanks table should be 53 // done only once. Indeed the table of all register banks 54 // (AArch64::RegBanks) is unique in the compiler. At some point, it 55 // will get tablegen'ed and the whole constructor becomes empty. 56 57 const RegisterBank &RBGPR = getRegBank(AArch64::GPRRegBankID); 58 (void)RBGPR; 59 assert(&AArch64::GPRRegBank == &RBGPR && 60 "The order in RegBanks is messed up"); 61 62 const RegisterBank &RBFPR = getRegBank(AArch64::FPRRegBankID); 63 (void)RBFPR; 64 assert(&AArch64::FPRRegBank == &RBFPR && 65 "The order in RegBanks is messed up"); 66 67 const RegisterBank &RBCCR = getRegBank(AArch64::CCRegBankID); 68 (void)RBCCR; 69 assert(&AArch64::CCRegBank == &RBCCR && 70 "The order in RegBanks is messed up"); 71 72 // The GPR register bank is fully defined by all the registers in 73 // GR64all + its subclasses. 74 assert(RBGPR.covers(*TRI.getRegClass(AArch64::GPR32RegClassID)) && 75 "Subclass not added?"); 76 assert(getMaximumSize(RBGPR.getID()) == 128 && 77 "GPRs should hold up to 128-bit"); 78 79 // The FPR register bank is fully defined by all the registers in 80 // GR64all + its subclasses. 81 assert(RBFPR.covers(*TRI.getRegClass(AArch64::QQRegClassID)) && 82 "Subclass not added?"); 83 assert(RBFPR.covers(*TRI.getRegClass(AArch64::FPR64RegClassID)) && 84 "Subclass not added?"); 85 assert(getMaximumSize(RBFPR.getID()) == 512 && 86 "FPRs should hold up to 512-bit via QQQQ sequence"); 87 88 assert(RBCCR.covers(*TRI.getRegClass(AArch64::CCRRegClassID)) && 89 "Class not added?"); 90 assert(getMaximumSize(RBCCR.getID()) == 32 && 91 "CCR should hold up to 32-bit"); 92 93 // Check that the TableGen'ed like file is in sync we our expectations. 94 // First, the Idx. 95 assert(checkPartialMappingIdx(PMI_FirstGPR, PMI_LastGPR, 96 {PMI_GPR32, PMI_GPR64, PMI_GPR128}) && 97 "PartialMappingIdx's are incorrectly ordered"); 98 assert(checkPartialMappingIdx(PMI_FirstFPR, PMI_LastFPR, 99 {PMI_FPR16, PMI_FPR32, PMI_FPR64, PMI_FPR128, 100 PMI_FPR256, PMI_FPR512}) && 101 "PartialMappingIdx's are incorrectly ordered"); 102 // Now, the content. 103 // Check partial mapping. 104 #define CHECK_PARTIALMAP(Idx, ValStartIdx, ValLength, RB) \ 105 do { \ 106 assert( \ 107 checkPartialMap(PartialMappingIdx::Idx, ValStartIdx, ValLength, RB) && \ 108 #Idx " is incorrectly initialized"); \ 109 } while (false) 110 111 CHECK_PARTIALMAP(PMI_GPR32, 0, 32, RBGPR); 112 CHECK_PARTIALMAP(PMI_GPR64, 0, 64, RBGPR); 113 CHECK_PARTIALMAP(PMI_GPR128, 0, 128, RBGPR); 114 CHECK_PARTIALMAP(PMI_FPR16, 0, 16, RBFPR); 115 CHECK_PARTIALMAP(PMI_FPR32, 0, 32, RBFPR); 116 CHECK_PARTIALMAP(PMI_FPR64, 0, 64, RBFPR); 117 CHECK_PARTIALMAP(PMI_FPR128, 0, 128, RBFPR); 118 CHECK_PARTIALMAP(PMI_FPR256, 0, 256, RBFPR); 119 CHECK_PARTIALMAP(PMI_FPR512, 0, 512, RBFPR); 120 121 // Check value mapping. 122 #define CHECK_VALUEMAP_IMPL(RBName, Size, Offset) \ 123 do { \ 124 assert(checkValueMapImpl(PartialMappingIdx::PMI_##RBName##Size, \ 125 PartialMappingIdx::PMI_First##RBName, Size, \ 126 Offset) && \ 127 #RBName #Size " " #Offset " is incorrectly initialized"); \ 128 } while (false) 129 130 #define CHECK_VALUEMAP(RBName, Size) CHECK_VALUEMAP_IMPL(RBName, Size, 0) 131 132 CHECK_VALUEMAP(GPR, 32); 133 CHECK_VALUEMAP(GPR, 64); 134 CHECK_VALUEMAP(GPR, 128); 135 CHECK_VALUEMAP(FPR, 16); 136 CHECK_VALUEMAP(FPR, 32); 137 CHECK_VALUEMAP(FPR, 64); 138 CHECK_VALUEMAP(FPR, 128); 139 CHECK_VALUEMAP(FPR, 256); 140 CHECK_VALUEMAP(FPR, 512); 141 142 // Check the value mapping for 3-operands instructions where all the operands 143 // map to the same value mapping. 144 #define CHECK_VALUEMAP_3OPS(RBName, Size) \ 145 do { \ 146 CHECK_VALUEMAP_IMPL(RBName, Size, 0); \ 147 CHECK_VALUEMAP_IMPL(RBName, Size, 1); \ 148 CHECK_VALUEMAP_IMPL(RBName, Size, 2); \ 149 } while (false) 150 151 CHECK_VALUEMAP_3OPS(GPR, 32); 152 CHECK_VALUEMAP_3OPS(GPR, 64); 153 CHECK_VALUEMAP_3OPS(GPR, 128); 154 CHECK_VALUEMAP_3OPS(FPR, 32); 155 CHECK_VALUEMAP_3OPS(FPR, 64); 156 CHECK_VALUEMAP_3OPS(FPR, 128); 157 CHECK_VALUEMAP_3OPS(FPR, 256); 158 CHECK_VALUEMAP_3OPS(FPR, 512); 159 160 #define CHECK_VALUEMAP_CROSSREGCPY(RBNameDst, RBNameSrc, Size) \ 161 do { \ 162 unsigned PartialMapDstIdx = PMI_##RBNameDst##Size - PMI_Min; \ 163 unsigned PartialMapSrcIdx = PMI_##RBNameSrc##Size - PMI_Min; \ 164 (void)PartialMapDstIdx; \ 165 (void)PartialMapSrcIdx; \ 166 const ValueMapping *Map = getCopyMapping(AArch64::RBNameDst##RegBankID, \ 167 AArch64::RBNameSrc##RegBankID, \ 168 TypeSize::getFixed(Size)); \ 169 (void)Map; \ 170 assert(Map[0].BreakDown == \ 171 &AArch64GenRegisterBankInfo::PartMappings[PartialMapDstIdx] && \ 172 Map[0].NumBreakDowns == 1 && \ 173 #RBNameDst #Size " Dst is incorrectly initialized"); \ 174 assert(Map[1].BreakDown == \ 175 &AArch64GenRegisterBankInfo::PartMappings[PartialMapSrcIdx] && \ 176 Map[1].NumBreakDowns == 1 && \ 177 #RBNameSrc #Size " Src is incorrectly initialized"); \ 178 \ 179 } while (false) 180 181 CHECK_VALUEMAP_CROSSREGCPY(GPR, GPR, 32); 182 CHECK_VALUEMAP_CROSSREGCPY(GPR, FPR, 32); 183 CHECK_VALUEMAP_CROSSREGCPY(GPR, GPR, 64); 184 CHECK_VALUEMAP_CROSSREGCPY(GPR, FPR, 64); 185 CHECK_VALUEMAP_CROSSREGCPY(FPR, FPR, 32); 186 CHECK_VALUEMAP_CROSSREGCPY(FPR, GPR, 32); 187 CHECK_VALUEMAP_CROSSREGCPY(FPR, FPR, 64); 188 CHECK_VALUEMAP_CROSSREGCPY(FPR, GPR, 64); 189 190 #define CHECK_VALUEMAP_FPEXT(DstSize, SrcSize) \ 191 do { \ 192 unsigned PartialMapDstIdx = PMI_FPR##DstSize - PMI_Min; \ 193 unsigned PartialMapSrcIdx = PMI_FPR##SrcSize - PMI_Min; \ 194 (void)PartialMapDstIdx; \ 195 (void)PartialMapSrcIdx; \ 196 const ValueMapping *Map = getFPExtMapping(DstSize, SrcSize); \ 197 (void)Map; \ 198 assert(Map[0].BreakDown == \ 199 &AArch64GenRegisterBankInfo::PartMappings[PartialMapDstIdx] && \ 200 Map[0].NumBreakDowns == 1 && "FPR" #DstSize \ 201 " Dst is incorrectly initialized"); \ 202 assert(Map[1].BreakDown == \ 203 &AArch64GenRegisterBankInfo::PartMappings[PartialMapSrcIdx] && \ 204 Map[1].NumBreakDowns == 1 && "FPR" #SrcSize \ 205 " Src is incorrectly initialized"); \ 206 \ 207 } while (false) 208 209 CHECK_VALUEMAP_FPEXT(32, 16); 210 CHECK_VALUEMAP_FPEXT(64, 16); 211 CHECK_VALUEMAP_FPEXT(64, 32); 212 CHECK_VALUEMAP_FPEXT(128, 64); 213 214 assert(verify(TRI) && "Invalid register bank information"); 215 }; 216 217 llvm::call_once(InitializeRegisterBankFlag, InitializeRegisterBankOnce); 218 } 219 220 unsigned AArch64RegisterBankInfo::copyCost(const RegisterBank &A, 221 const RegisterBank &B, 222 const TypeSize Size) const { 223 // What do we do with different size? 224 // copy are same size. 225 // Will introduce other hooks for different size: 226 // * extract cost. 227 // * build_sequence cost. 228 229 // Copy from (resp. to) GPR to (resp. from) FPR involves FMOV. 230 // FIXME: This should be deduced from the scheduling model. 231 if (&A == &AArch64::GPRRegBank && &B == &AArch64::FPRRegBank) 232 // FMOVXDr or FMOVWSr. 233 return 5; 234 if (&A == &AArch64::FPRRegBank && &B == &AArch64::GPRRegBank) 235 // FMOVDXr or FMOVSWr. 236 return 4; 237 238 return RegisterBankInfo::copyCost(A, B, Size); 239 } 240 241 const RegisterBank & 242 AArch64RegisterBankInfo::getRegBankFromRegClass(const TargetRegisterClass &RC, 243 LLT Ty) const { 244 switch (RC.getID()) { 245 case AArch64::GPR64sponlyRegClassID: 246 return getRegBank(AArch64::GPRRegBankID); 247 default: 248 return AArch64GenRegisterBankInfo::getRegBankFromRegClass(RC, Ty); 249 } 250 } 251 252 RegisterBankInfo::InstructionMappings 253 AArch64RegisterBankInfo::getInstrAlternativeMappings( 254 const MachineInstr &MI) const { 255 const MachineFunction &MF = *MI.getParent()->getParent(); 256 const TargetSubtargetInfo &STI = MF.getSubtarget(); 257 const TargetRegisterInfo &TRI = *STI.getRegisterInfo(); 258 const MachineRegisterInfo &MRI = MF.getRegInfo(); 259 260 switch (MI.getOpcode()) { 261 case TargetOpcode::G_OR: { 262 // 32 and 64-bit or can be mapped on either FPR or 263 // GPR for the same cost. 264 TypeSize Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, TRI); 265 if (Size != 32 && Size != 64) 266 break; 267 268 // If the instruction has any implicit-defs or uses, 269 // do not mess with it. 270 if (MI.getNumOperands() != 3) 271 break; 272 InstructionMappings AltMappings; 273 const InstructionMapping &GPRMapping = getInstructionMapping( 274 /*ID*/ 1, /*Cost*/ 1, getValueMapping(PMI_FirstGPR, Size), 275 /*NumOperands*/ 3); 276 const InstructionMapping &FPRMapping = getInstructionMapping( 277 /*ID*/ 2, /*Cost*/ 1, getValueMapping(PMI_FirstFPR, Size), 278 /*NumOperands*/ 3); 279 280 AltMappings.push_back(&GPRMapping); 281 AltMappings.push_back(&FPRMapping); 282 return AltMappings; 283 } 284 case TargetOpcode::G_BITCAST: { 285 TypeSize Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, TRI); 286 if (Size != 32 && Size != 64) 287 break; 288 289 // If the instruction has any implicit-defs or uses, 290 // do not mess with it. 291 if (MI.getNumOperands() != 2) 292 break; 293 294 InstructionMappings AltMappings; 295 const InstructionMapping &GPRMapping = getInstructionMapping( 296 /*ID*/ 1, /*Cost*/ 1, 297 getCopyMapping(AArch64::GPRRegBankID, AArch64::GPRRegBankID, Size), 298 /*NumOperands*/ 2); 299 const InstructionMapping &FPRMapping = getInstructionMapping( 300 /*ID*/ 2, /*Cost*/ 1, 301 getCopyMapping(AArch64::FPRRegBankID, AArch64::FPRRegBankID, Size), 302 /*NumOperands*/ 2); 303 const InstructionMapping &GPRToFPRMapping = getInstructionMapping( 304 /*ID*/ 3, 305 /*Cost*/ 306 copyCost(AArch64::GPRRegBank, AArch64::FPRRegBank, 307 TypeSize::getFixed(Size)), 308 getCopyMapping(AArch64::FPRRegBankID, AArch64::GPRRegBankID, Size), 309 /*NumOperands*/ 2); 310 const InstructionMapping &FPRToGPRMapping = getInstructionMapping( 311 /*ID*/ 3, 312 /*Cost*/ 313 copyCost(AArch64::GPRRegBank, AArch64::FPRRegBank, 314 TypeSize::getFixed(Size)), 315 getCopyMapping(AArch64::GPRRegBankID, AArch64::FPRRegBankID, Size), 316 /*NumOperands*/ 2); 317 318 AltMappings.push_back(&GPRMapping); 319 AltMappings.push_back(&FPRMapping); 320 AltMappings.push_back(&GPRToFPRMapping); 321 AltMappings.push_back(&FPRToGPRMapping); 322 return AltMappings; 323 } 324 case TargetOpcode::G_LOAD: { 325 TypeSize Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, TRI); 326 if (Size != 64) 327 break; 328 329 // If the instruction has any implicit-defs or uses, 330 // do not mess with it. 331 if (MI.getNumOperands() != 2) 332 break; 333 334 InstructionMappings AltMappings; 335 const InstructionMapping &GPRMapping = getInstructionMapping( 336 /*ID*/ 1, /*Cost*/ 1, 337 getOperandsMapping( 338 {getValueMapping(PMI_FirstGPR, Size), 339 // Addresses are GPR 64-bit. 340 getValueMapping(PMI_FirstGPR, TypeSize::getFixed(64))}), 341 /*NumOperands*/ 2); 342 const InstructionMapping &FPRMapping = getInstructionMapping( 343 /*ID*/ 2, /*Cost*/ 1, 344 getOperandsMapping( 345 {getValueMapping(PMI_FirstFPR, Size), 346 // Addresses are GPR 64-bit. 347 getValueMapping(PMI_FirstGPR, TypeSize::getFixed(64))}), 348 /*NumOperands*/ 2); 349 350 AltMappings.push_back(&GPRMapping); 351 AltMappings.push_back(&FPRMapping); 352 return AltMappings; 353 } 354 default: 355 break; 356 } 357 return RegisterBankInfo::getInstrAlternativeMappings(MI); 358 } 359 360 void AArch64RegisterBankInfo::applyMappingImpl( 361 MachineIRBuilder &Builder, const OperandsMapper &OpdMapper) const { 362 MachineInstr &MI = OpdMapper.getMI(); 363 MachineRegisterInfo &MRI = OpdMapper.getMRI(); 364 365 switch (MI.getOpcode()) { 366 case TargetOpcode::G_OR: 367 case TargetOpcode::G_BITCAST: 368 case TargetOpcode::G_LOAD: 369 // Those ID must match getInstrAlternativeMappings. 370 assert((OpdMapper.getInstrMapping().getID() >= 1 && 371 OpdMapper.getInstrMapping().getID() <= 4) && 372 "Don't know how to handle that ID"); 373 return applyDefaultMapping(OpdMapper); 374 case TargetOpcode::G_INSERT_VECTOR_ELT: { 375 // Extend smaller gpr operands to 32 bit. 376 Builder.setInsertPt(*MI.getParent(), MI.getIterator()); 377 auto Ext = Builder.buildAnyExt(LLT::scalar(32), MI.getOperand(2).getReg()); 378 MRI.setRegBank(Ext.getReg(0), getRegBank(AArch64::GPRRegBankID)); 379 MI.getOperand(2).setReg(Ext.getReg(0)); 380 return applyDefaultMapping(OpdMapper); 381 } 382 case AArch64::G_DUP: { 383 // Extend smaller gpr to 32-bits 384 assert(MRI.getType(MI.getOperand(1).getReg()).getSizeInBits() < 32 && 385 "Expected sources smaller than 32-bits"); 386 Builder.setInsertPt(*MI.getParent(), MI.getIterator()); 387 388 Register ConstReg; 389 auto ConstMI = MRI.getVRegDef(MI.getOperand(1).getReg()); 390 if (ConstMI->getOpcode() == TargetOpcode::G_CONSTANT) { 391 auto CstVal = ConstMI->getOperand(1).getCImm()->getValue(); 392 ConstReg = 393 Builder.buildConstant(LLT::scalar(32), CstVal.sext(32)).getReg(0); 394 } else { 395 ConstReg = Builder.buildAnyExt(LLT::scalar(32), MI.getOperand(1).getReg()) 396 .getReg(0); 397 } 398 MRI.setRegBank(ConstReg, getRegBank(AArch64::GPRRegBankID)); 399 MI.getOperand(1).setReg(ConstReg); 400 return applyDefaultMapping(OpdMapper); 401 } 402 default: 403 llvm_unreachable("Don't know how to handle that operation"); 404 } 405 } 406 407 const RegisterBankInfo::InstructionMapping & 408 AArch64RegisterBankInfo::getSameKindOfOperandsMapping( 409 const MachineInstr &MI) const { 410 const unsigned Opc = MI.getOpcode(); 411 const MachineFunction &MF = *MI.getParent()->getParent(); 412 const MachineRegisterInfo &MRI = MF.getRegInfo(); 413 414 unsigned NumOperands = MI.getNumOperands(); 415 assert(NumOperands <= 3 && 416 "This code is for instructions with 3 or less operands"); 417 418 LLT Ty = MRI.getType(MI.getOperand(0).getReg()); 419 TypeSize Size = Ty.getSizeInBits(); 420 bool IsFPR = Ty.isVector() || isPreISelGenericFloatingPointOpcode(Opc); 421 422 PartialMappingIdx RBIdx = IsFPR ? PMI_FirstFPR : PMI_FirstGPR; 423 424 #ifndef NDEBUG 425 // Make sure all the operands are using similar size and type. 426 // Should probably be checked by the machine verifier. 427 // This code won't catch cases where the number of lanes is 428 // different between the operands. 429 // If we want to go to that level of details, it is probably 430 // best to check that the types are the same, period. 431 // Currently, we just check that the register banks are the same 432 // for each types. 433 for (unsigned Idx = 1; Idx != NumOperands; ++Idx) { 434 LLT OpTy = MRI.getType(MI.getOperand(Idx).getReg()); 435 assert( 436 AArch64GenRegisterBankInfo::getRegBankBaseIdxOffset( 437 RBIdx, OpTy.getSizeInBits()) == 438 AArch64GenRegisterBankInfo::getRegBankBaseIdxOffset(RBIdx, Size) && 439 "Operand has incompatible size"); 440 bool OpIsFPR = OpTy.isVector() || isPreISelGenericFloatingPointOpcode(Opc); 441 (void)OpIsFPR; 442 assert(IsFPR == OpIsFPR && "Operand has incompatible type"); 443 } 444 #endif // End NDEBUG. 445 446 return getInstructionMapping(DefaultMappingID, 1, 447 getValueMapping(RBIdx, Size), NumOperands); 448 } 449 450 /// \returns true if a given intrinsic only uses and defines FPRs. 451 static bool isFPIntrinsic(const MachineRegisterInfo &MRI, 452 const MachineInstr &MI) { 453 // TODO: Add more intrinsics. 454 switch (cast<GIntrinsic>(MI).getIntrinsicID()) { 455 default: 456 return false; 457 case Intrinsic::aarch64_neon_uaddlv: 458 case Intrinsic::aarch64_neon_uaddv: 459 case Intrinsic::aarch64_neon_saddv: 460 case Intrinsic::aarch64_neon_umaxv: 461 case Intrinsic::aarch64_neon_smaxv: 462 case Intrinsic::aarch64_neon_uminv: 463 case Intrinsic::aarch64_neon_sminv: 464 case Intrinsic::aarch64_neon_faddv: 465 case Intrinsic::aarch64_neon_fmaxv: 466 case Intrinsic::aarch64_neon_fminv: 467 case Intrinsic::aarch64_neon_fmaxnmv: 468 case Intrinsic::aarch64_neon_fminnmv: 469 return true; 470 case Intrinsic::aarch64_neon_saddlv: { 471 const LLT SrcTy = MRI.getType(MI.getOperand(2).getReg()); 472 return SrcTy.getElementType().getSizeInBits() >= 16 && 473 SrcTy.getElementCount().getFixedValue() >= 4; 474 } 475 } 476 } 477 478 bool AArch64RegisterBankInfo::isPHIWithFPContraints( 479 const MachineInstr &MI, const MachineRegisterInfo &MRI, 480 const TargetRegisterInfo &TRI, const unsigned Depth) const { 481 if (!MI.isPHI() || Depth > MaxFPRSearchDepth) 482 return false; 483 484 return any_of(MRI.use_nodbg_instructions(MI.getOperand(0).getReg()), 485 [&](const MachineInstr &UseMI) { 486 if (onlyUsesFP(UseMI, MRI, TRI, Depth + 1)) 487 return true; 488 return isPHIWithFPContraints(UseMI, MRI, TRI, Depth + 1); 489 }); 490 } 491 492 bool AArch64RegisterBankInfo::hasFPConstraints(const MachineInstr &MI, 493 const MachineRegisterInfo &MRI, 494 const TargetRegisterInfo &TRI, 495 unsigned Depth) const { 496 unsigned Op = MI.getOpcode(); 497 if (Op == TargetOpcode::G_INTRINSIC && isFPIntrinsic(MRI, MI)) 498 return true; 499 500 // Do we have an explicit floating point instruction? 501 if (isPreISelGenericFloatingPointOpcode(Op)) 502 return true; 503 504 // No. Check if we have a copy-like instruction. If we do, then we could 505 // still be fed by floating point instructions. 506 if (Op != TargetOpcode::COPY && !MI.isPHI() && 507 !isPreISelGenericOptimizationHint(Op)) 508 return false; 509 510 // Check if we already know the register bank. 511 auto *RB = getRegBank(MI.getOperand(0).getReg(), MRI, TRI); 512 if (RB == &AArch64::FPRRegBank) 513 return true; 514 if (RB == &AArch64::GPRRegBank) 515 return false; 516 517 // We don't know anything. 518 // 519 // If we have a phi, we may be able to infer that it will be assigned a FPR 520 // based off of its inputs. 521 if (!MI.isPHI() || Depth > MaxFPRSearchDepth) 522 return false; 523 524 return any_of(MI.explicit_uses(), [&](const MachineOperand &Op) { 525 return Op.isReg() && 526 onlyDefinesFP(*MRI.getVRegDef(Op.getReg()), MRI, TRI, Depth + 1); 527 }); 528 } 529 530 bool AArch64RegisterBankInfo::onlyUsesFP(const MachineInstr &MI, 531 const MachineRegisterInfo &MRI, 532 const TargetRegisterInfo &TRI, 533 unsigned Depth) const { 534 switch (MI.getOpcode()) { 535 case TargetOpcode::G_FPTOSI: 536 case TargetOpcode::G_FPTOUI: 537 case TargetOpcode::G_FPTOSI_SAT: 538 case TargetOpcode::G_FPTOUI_SAT: 539 case TargetOpcode::G_FCMP: 540 case TargetOpcode::G_LROUND: 541 case TargetOpcode::G_LLROUND: 542 return true; 543 default: 544 break; 545 } 546 return hasFPConstraints(MI, MRI, TRI, Depth); 547 } 548 549 bool AArch64RegisterBankInfo::onlyDefinesFP(const MachineInstr &MI, 550 const MachineRegisterInfo &MRI, 551 const TargetRegisterInfo &TRI, 552 unsigned Depth) const { 553 switch (MI.getOpcode()) { 554 case AArch64::G_DUP: 555 case TargetOpcode::G_SITOFP: 556 case TargetOpcode::G_UITOFP: 557 case TargetOpcode::G_EXTRACT_VECTOR_ELT: 558 case TargetOpcode::G_INSERT_VECTOR_ELT: 559 case TargetOpcode::G_BUILD_VECTOR: 560 case TargetOpcode::G_BUILD_VECTOR_TRUNC: 561 return true; 562 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS: 563 switch (cast<GIntrinsic>(MI).getIntrinsicID()) { 564 case Intrinsic::aarch64_neon_ld1x2: 565 case Intrinsic::aarch64_neon_ld1x3: 566 case Intrinsic::aarch64_neon_ld1x4: 567 case Intrinsic::aarch64_neon_ld2: 568 case Intrinsic::aarch64_neon_ld2lane: 569 case Intrinsic::aarch64_neon_ld2r: 570 case Intrinsic::aarch64_neon_ld3: 571 case Intrinsic::aarch64_neon_ld3lane: 572 case Intrinsic::aarch64_neon_ld3r: 573 case Intrinsic::aarch64_neon_ld4: 574 case Intrinsic::aarch64_neon_ld4lane: 575 case Intrinsic::aarch64_neon_ld4r: 576 return true; 577 default: 578 break; 579 } 580 break; 581 default: 582 break; 583 } 584 return hasFPConstraints(MI, MRI, TRI, Depth); 585 } 586 587 bool AArch64RegisterBankInfo::isLoadFromFPType(const MachineInstr &MI) const { 588 // GMemOperation because we also want to match indexed loads. 589 auto *MemOp = cast<GMemOperation>(&MI); 590 const Value *LdVal = MemOp->getMMO().getValue(); 591 if (!LdVal) 592 return false; 593 594 Type *EltTy = nullptr; 595 if (const GlobalValue *GV = dyn_cast<GlobalValue>(LdVal)) { 596 EltTy = GV->getValueType(); 597 // Look at the first element of the struct to determine the type we are 598 // loading 599 while (StructType *StructEltTy = dyn_cast<StructType>(EltTy)) { 600 if (StructEltTy->getNumElements() == 0) 601 break; 602 EltTy = StructEltTy->getTypeAtIndex(0U); 603 } 604 // Look at the first element of the array to determine its type 605 if (isa<ArrayType>(EltTy)) 606 EltTy = EltTy->getArrayElementType(); 607 } else { 608 // FIXME: grubbing around uses is pretty ugly, but with no more 609 // `getPointerElementType` there's not much else we can do. 610 for (const auto *LdUser : LdVal->users()) { 611 if (isa<LoadInst>(LdUser)) { 612 EltTy = LdUser->getType(); 613 break; 614 } 615 if (isa<StoreInst>(LdUser) && LdUser->getOperand(1) == LdVal) { 616 EltTy = LdUser->getOperand(0)->getType(); 617 break; 618 } 619 } 620 } 621 return EltTy && EltTy->isFPOrFPVectorTy(); 622 } 623 624 const RegisterBankInfo::InstructionMapping & 625 AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { 626 const unsigned Opc = MI.getOpcode(); 627 628 // Try the default logic for non-generic instructions that are either copies 629 // or already have some operands assigned to banks. 630 if ((Opc != TargetOpcode::COPY && !isPreISelGenericOpcode(Opc)) || 631 Opc == TargetOpcode::G_PHI) { 632 const RegisterBankInfo::InstructionMapping &Mapping = 633 getInstrMappingImpl(MI); 634 if (Mapping.isValid()) 635 return Mapping; 636 } 637 638 const MachineFunction &MF = *MI.getParent()->getParent(); 639 const MachineRegisterInfo &MRI = MF.getRegInfo(); 640 const TargetSubtargetInfo &STI = MF.getSubtarget(); 641 const TargetRegisterInfo &TRI = *STI.getRegisterInfo(); 642 643 switch (Opc) { 644 // G_{F|S|U}REM are not listed because they are not legal. 645 // Arithmetic ops. 646 case TargetOpcode::G_ADD: 647 case TargetOpcode::G_SUB: 648 case TargetOpcode::G_PTR_ADD: 649 case TargetOpcode::G_MUL: 650 case TargetOpcode::G_SDIV: 651 case TargetOpcode::G_UDIV: 652 // Bitwise ops. 653 case TargetOpcode::G_AND: 654 case TargetOpcode::G_OR: 655 case TargetOpcode::G_XOR: 656 // Floating point ops. 657 case TargetOpcode::G_FADD: 658 case TargetOpcode::G_FSUB: 659 case TargetOpcode::G_FMUL: 660 case TargetOpcode::G_FDIV: 661 case TargetOpcode::G_FMAXIMUM: 662 case TargetOpcode::G_FMINIMUM: 663 return getSameKindOfOperandsMapping(MI); 664 case TargetOpcode::G_FPEXT: { 665 LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); 666 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg()); 667 return getInstructionMapping( 668 DefaultMappingID, /*Cost*/ 1, 669 getFPExtMapping(DstTy.getSizeInBits(), SrcTy.getSizeInBits()), 670 /*NumOperands*/ 2); 671 } 672 // Shifts. 673 case TargetOpcode::G_SHL: 674 case TargetOpcode::G_LSHR: 675 case TargetOpcode::G_ASHR: { 676 LLT ShiftAmtTy = MRI.getType(MI.getOperand(2).getReg()); 677 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg()); 678 if (ShiftAmtTy.getSizeInBits() == 64 && SrcTy.getSizeInBits() == 32) 679 return getInstructionMapping(DefaultMappingID, 1, 680 &ValMappings[Shift64Imm], 3); 681 return getSameKindOfOperandsMapping(MI); 682 } 683 case TargetOpcode::COPY: { 684 Register DstReg = MI.getOperand(0).getReg(); 685 Register SrcReg = MI.getOperand(1).getReg(); 686 // Check if one of the register is not a generic register. 687 if ((DstReg.isPhysical() || !MRI.getType(DstReg).isValid()) || 688 (SrcReg.isPhysical() || !MRI.getType(SrcReg).isValid())) { 689 const RegisterBank *DstRB = getRegBank(DstReg, MRI, TRI); 690 const RegisterBank *SrcRB = getRegBank(SrcReg, MRI, TRI); 691 if (!DstRB) 692 DstRB = SrcRB; 693 else if (!SrcRB) 694 SrcRB = DstRB; 695 // If both RB are null that means both registers are generic. 696 // We shouldn't be here. 697 assert(DstRB && SrcRB && "Both RegBank were nullptr"); 698 TypeSize Size = getSizeInBits(DstReg, MRI, TRI); 699 return getInstructionMapping( 700 DefaultMappingID, copyCost(*DstRB, *SrcRB, Size), 701 getCopyMapping(DstRB->getID(), SrcRB->getID(), Size), 702 // We only care about the mapping of the destination. 703 /*NumOperands*/ 1); 704 } 705 // Both registers are generic, use G_BITCAST. 706 [[fallthrough]]; 707 } 708 case TargetOpcode::G_BITCAST: { 709 LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); 710 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg()); 711 TypeSize Size = DstTy.getSizeInBits(); 712 bool DstIsGPR = !DstTy.isVector() && DstTy.getSizeInBits() <= 64; 713 bool SrcIsGPR = !SrcTy.isVector() && SrcTy.getSizeInBits() <= 64; 714 const RegisterBank &DstRB = 715 DstIsGPR ? AArch64::GPRRegBank : AArch64::FPRRegBank; 716 const RegisterBank &SrcRB = 717 SrcIsGPR ? AArch64::GPRRegBank : AArch64::FPRRegBank; 718 return getInstructionMapping( 719 DefaultMappingID, copyCost(DstRB, SrcRB, Size), 720 getCopyMapping(DstRB.getID(), SrcRB.getID(), Size), 721 // We only care about the mapping of the destination for COPY. 722 /*NumOperands*/ Opc == TargetOpcode::G_BITCAST ? 2 : 1); 723 } 724 default: 725 break; 726 } 727 728 unsigned NumOperands = MI.getNumOperands(); 729 unsigned MappingID = DefaultMappingID; 730 731 // Track the size and bank of each register. We don't do partial mappings. 732 SmallVector<unsigned, 4> OpSize(NumOperands); 733 SmallVector<PartialMappingIdx, 4> OpRegBankIdx(NumOperands); 734 for (unsigned Idx = 0; Idx < NumOperands; ++Idx) { 735 auto &MO = MI.getOperand(Idx); 736 if (!MO.isReg() || !MO.getReg()) 737 continue; 738 739 LLT Ty = MRI.getType(MO.getReg()); 740 if (!Ty.isValid()) 741 continue; 742 OpSize[Idx] = Ty.getSizeInBits().getKnownMinValue(); 743 744 // As a top-level guess, vectors including both scalable and non-scalable 745 // ones go in FPRs, scalars and pointers in GPRs. 746 // For floating-point instructions, scalars go in FPRs. 747 if (Ty.isVector()) 748 OpRegBankIdx[Idx] = PMI_FirstFPR; 749 else if (isPreISelGenericFloatingPointOpcode(Opc) || 750 Ty.getSizeInBits() > 64) 751 OpRegBankIdx[Idx] = PMI_FirstFPR; 752 else 753 OpRegBankIdx[Idx] = PMI_FirstGPR; 754 } 755 756 unsigned Cost = 1; 757 // Some of the floating-point instructions have mixed GPR and FPR operands: 758 // fine-tune the computed mapping. 759 switch (Opc) { 760 case AArch64::G_DUP: { 761 Register ScalarReg = MI.getOperand(1).getReg(); 762 LLT ScalarTy = MRI.getType(ScalarReg); 763 auto ScalarDef = MRI.getVRegDef(ScalarReg); 764 // We want to select dup(load) into LD1R. 765 if (ScalarDef->getOpcode() == TargetOpcode::G_LOAD) 766 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR}; 767 // s8 is an exception for G_DUP, which we always want on gpr. 768 else if (ScalarTy.getSizeInBits() != 8 && 769 (getRegBank(ScalarReg, MRI, TRI) == &AArch64::FPRRegBank || 770 onlyDefinesFP(*ScalarDef, MRI, TRI))) 771 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR}; 772 else { 773 if (ScalarTy.getSizeInBits() < 32 && 774 getRegBank(ScalarReg, MRI, TRI) == &AArch64::GPRRegBank) { 775 // Calls applyMappingImpl() 776 MappingID = CustomMappingID; 777 } 778 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR}; 779 } 780 break; 781 } 782 case TargetOpcode::G_TRUNC: { 783 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg()); 784 if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 128) 785 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR}; 786 break; 787 } 788 case TargetOpcode::G_SITOFP: 789 case TargetOpcode::G_UITOFP: { 790 if (MRI.getType(MI.getOperand(0).getReg()).isVector()) 791 break; 792 // Integer to FP conversions don't necessarily happen between GPR -> FPR 793 // regbanks. They can also be done within an FPR register. 794 Register SrcReg = MI.getOperand(1).getReg(); 795 if (getRegBank(SrcReg, MRI, TRI) == &AArch64::FPRRegBank) 796 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR}; 797 else 798 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR}; 799 break; 800 } 801 case TargetOpcode::G_FPTOSI: 802 case TargetOpcode::G_FPTOUI: 803 case TargetOpcode::G_FPTOSI_SAT: 804 case TargetOpcode::G_FPTOUI_SAT: 805 case TargetOpcode::G_INTRINSIC_LRINT: 806 case TargetOpcode::G_INTRINSIC_LLRINT: 807 if (MRI.getType(MI.getOperand(0).getReg()).isVector()) 808 break; 809 OpRegBankIdx = {PMI_FirstGPR, PMI_FirstFPR}; 810 break; 811 case TargetOpcode::G_FCMP: { 812 // If the result is a vector, it must use a FPR. 813 AArch64GenRegisterBankInfo::PartialMappingIdx Idx0 = 814 MRI.getType(MI.getOperand(0).getReg()).isVector() ? PMI_FirstFPR 815 : PMI_FirstGPR; 816 OpRegBankIdx = {Idx0, 817 /* Predicate */ PMI_None, PMI_FirstFPR, PMI_FirstFPR}; 818 break; 819 } 820 case TargetOpcode::G_BITCAST: 821 // This is going to be a cross register bank copy and this is expensive. 822 if (OpRegBankIdx[0] != OpRegBankIdx[1]) 823 Cost = copyCost( 824 *AArch64GenRegisterBankInfo::PartMappings[OpRegBankIdx[0]].RegBank, 825 *AArch64GenRegisterBankInfo::PartMappings[OpRegBankIdx[1]].RegBank, 826 TypeSize::getFixed(OpSize[0])); 827 break; 828 case TargetOpcode::G_LOAD: { 829 // Loading in vector unit is slightly more expensive. 830 // This is actually only true for the LD1R and co instructions, 831 // but anyway for the fast mode this number does not matter and 832 // for the greedy mode the cost of the cross bank copy will 833 // offset this number. 834 // FIXME: Should be derived from the scheduling model. 835 if (OpRegBankIdx[0] != PMI_FirstGPR) { 836 Cost = 2; 837 break; 838 } 839 840 if (cast<GLoad>(MI).isAtomic()) { 841 // Atomics always use GPR destinations. Don't refine any further. 842 OpRegBankIdx[0] = PMI_FirstGPR; 843 break; 844 } 845 846 // Try to guess the type of the load from the MMO. 847 if (isLoadFromFPType(MI)) { 848 OpRegBankIdx[0] = PMI_FirstFPR; 849 break; 850 } 851 852 // Check if that load feeds fp instructions. 853 // In that case, we want the default mapping to be on FPR 854 // instead of blind map every scalar to GPR. 855 if (any_of(MRI.use_nodbg_instructions(MI.getOperand(0).getReg()), 856 [&](const MachineInstr &UseMI) { 857 // If we have at least one direct or indirect use 858 // in a FP instruction, 859 // assume this was a floating point load in the IR. If it was 860 // not, we would have had a bitcast before reaching that 861 // instruction. 862 // 863 // Int->FP conversion operations are also captured in 864 // onlyDefinesFP(). 865 866 if (isPHIWithFPContraints(UseMI, MRI, TRI)) 867 return true; 868 869 return onlyUsesFP(UseMI, MRI, TRI) || 870 onlyDefinesFP(UseMI, MRI, TRI); 871 })) 872 OpRegBankIdx[0] = PMI_FirstFPR; 873 break; 874 } 875 case TargetOpcode::G_STORE: 876 // Check if that store is fed by fp instructions. 877 if (OpRegBankIdx[0] == PMI_FirstGPR) { 878 Register VReg = MI.getOperand(0).getReg(); 879 if (!VReg) 880 break; 881 MachineInstr *DefMI = MRI.getVRegDef(VReg); 882 if (onlyDefinesFP(*DefMI, MRI, TRI)) 883 OpRegBankIdx[0] = PMI_FirstFPR; 884 break; 885 } 886 break; 887 case TargetOpcode::G_INDEXED_STORE: 888 if (OpRegBankIdx[1] == PMI_FirstGPR) { 889 Register VReg = MI.getOperand(1).getReg(); 890 if (!VReg) 891 break; 892 MachineInstr *DefMI = MRI.getVRegDef(VReg); 893 if (onlyDefinesFP(*DefMI, MRI, TRI)) 894 OpRegBankIdx[1] = PMI_FirstFPR; 895 break; 896 } 897 break; 898 case TargetOpcode::G_INDEXED_SEXTLOAD: 899 case TargetOpcode::G_INDEXED_ZEXTLOAD: 900 // These should always be GPR. 901 OpRegBankIdx[0] = PMI_FirstGPR; 902 break; 903 case TargetOpcode::G_INDEXED_LOAD: { 904 if (isLoadFromFPType(MI)) 905 OpRegBankIdx[0] = PMI_FirstFPR; 906 break; 907 } 908 case TargetOpcode::G_SELECT: { 909 // If the destination is FPR, preserve that. 910 if (OpRegBankIdx[0] != PMI_FirstGPR) 911 break; 912 913 // If we're taking in vectors, we have no choice but to put everything on 914 // FPRs, except for the condition. The condition must always be on a GPR. 915 LLT SrcTy = MRI.getType(MI.getOperand(2).getReg()); 916 if (SrcTy.isVector()) { 917 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR, PMI_FirstFPR, PMI_FirstFPR}; 918 break; 919 } 920 921 // Try to minimize the number of copies. If we have more floating point 922 // constrained values than not, then we'll put everything on FPR. Otherwise, 923 // everything has to be on GPR. 924 unsigned NumFP = 0; 925 926 // Check if the uses of the result always produce floating point values. 927 // 928 // For example: 929 // 930 // %z = G_SELECT %cond %x %y 931 // fpr = G_FOO %z ... 932 if (any_of(MRI.use_nodbg_instructions(MI.getOperand(0).getReg()), 933 [&](MachineInstr &MI) { return onlyUsesFP(MI, MRI, TRI); })) 934 ++NumFP; 935 936 // Check if the defs of the source values always produce floating point 937 // values. 938 // 939 // For example: 940 // 941 // %x = G_SOMETHING_ALWAYS_FLOAT %a ... 942 // %z = G_SELECT %cond %x %y 943 // 944 // Also check whether or not the sources have already been decided to be 945 // FPR. Keep track of this. 946 // 947 // This doesn't check the condition, since it's just whatever is in NZCV. 948 // This isn't passed explicitly in a register to fcsel/csel. 949 for (unsigned Idx = 2; Idx < 4; ++Idx) { 950 Register VReg = MI.getOperand(Idx).getReg(); 951 MachineInstr *DefMI = MRI.getVRegDef(VReg); 952 if (getRegBank(VReg, MRI, TRI) == &AArch64::FPRRegBank || 953 onlyDefinesFP(*DefMI, MRI, TRI)) 954 ++NumFP; 955 } 956 957 // If we have more FP constraints than not, then move everything over to 958 // FPR. 959 if (NumFP >= 2) 960 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR, PMI_FirstFPR, PMI_FirstFPR}; 961 962 break; 963 } 964 case TargetOpcode::G_UNMERGE_VALUES: { 965 // If the first operand belongs to a FPR register bank, then make sure that 966 // we preserve that. 967 if (OpRegBankIdx[0] != PMI_FirstGPR) 968 break; 969 970 LLT SrcTy = MRI.getType(MI.getOperand(MI.getNumOperands()-1).getReg()); 971 // UNMERGE into scalars from a vector should always use FPR. 972 // Likewise if any of the uses are FP instructions. 973 if (SrcTy.isVector() || SrcTy == LLT::scalar(128) || 974 any_of(MRI.use_nodbg_instructions(MI.getOperand(0).getReg()), 975 [&](MachineInstr &MI) { return onlyUsesFP(MI, MRI, TRI); })) { 976 // Set the register bank of every operand to FPR. 977 for (unsigned Idx = 0, NumOperands = MI.getNumOperands(); 978 Idx < NumOperands; ++Idx) 979 OpRegBankIdx[Idx] = PMI_FirstFPR; 980 } 981 break; 982 } 983 case TargetOpcode::G_EXTRACT_VECTOR_ELT: 984 // Destination and source need to be FPRs. 985 OpRegBankIdx[0] = PMI_FirstFPR; 986 OpRegBankIdx[1] = PMI_FirstFPR; 987 988 // Index needs to be a GPR. 989 OpRegBankIdx[2] = PMI_FirstGPR; 990 break; 991 case TargetOpcode::G_INSERT_VECTOR_ELT: 992 OpRegBankIdx[0] = PMI_FirstFPR; 993 OpRegBankIdx[1] = PMI_FirstFPR; 994 995 // The element may be either a GPR or FPR. Preserve that behaviour. 996 if (getRegBank(MI.getOperand(2).getReg(), MRI, TRI) == &AArch64::FPRRegBank) 997 OpRegBankIdx[2] = PMI_FirstFPR; 998 else { 999 // If the type is i8/i16, and the regank will be GPR, then we change the 1000 // type to i32 in applyMappingImpl. 1001 LLT Ty = MRI.getType(MI.getOperand(2).getReg()); 1002 if (Ty.getSizeInBits() == 8 || Ty.getSizeInBits() == 16) { 1003 // Calls applyMappingImpl() 1004 MappingID = CustomMappingID; 1005 } 1006 OpRegBankIdx[2] = PMI_FirstGPR; 1007 } 1008 1009 // Index needs to be a GPR. 1010 OpRegBankIdx[3] = PMI_FirstGPR; 1011 break; 1012 case TargetOpcode::G_EXTRACT: { 1013 // For s128 sources we have to use fpr unless we know otherwise. 1014 auto Src = MI.getOperand(1).getReg(); 1015 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg()); 1016 if (SrcTy.getSizeInBits() != 128) 1017 break; 1018 auto Idx = MRI.getRegClassOrNull(Src) == &AArch64::XSeqPairsClassRegClass 1019 ? PMI_FirstGPR 1020 : PMI_FirstFPR; 1021 OpRegBankIdx[0] = Idx; 1022 OpRegBankIdx[1] = Idx; 1023 break; 1024 } 1025 case TargetOpcode::G_BUILD_VECTOR: { 1026 // If the first source operand belongs to a FPR register bank, then make 1027 // sure that we preserve that. 1028 if (OpRegBankIdx[1] != PMI_FirstGPR) 1029 break; 1030 Register VReg = MI.getOperand(1).getReg(); 1031 if (!VReg) 1032 break; 1033 1034 // Get the instruction that defined the source operand reg, and check if 1035 // it's a floating point operation. Or, if it's a type like s16 which 1036 // doesn't have a exact size gpr register class. The exception is if the 1037 // build_vector has all constant operands, which may be better to leave as 1038 // gpr without copies, so it can be matched in imported patterns. 1039 MachineInstr *DefMI = MRI.getVRegDef(VReg); 1040 unsigned DefOpc = DefMI->getOpcode(); 1041 const LLT SrcTy = MRI.getType(VReg); 1042 if (all_of(MI.operands(), [&](const MachineOperand &Op) { 1043 return Op.isDef() || MRI.getVRegDef(Op.getReg())->getOpcode() == 1044 TargetOpcode::G_CONSTANT; 1045 })) 1046 break; 1047 if (isPreISelGenericFloatingPointOpcode(DefOpc) || 1048 SrcTy.getSizeInBits() < 32 || 1049 getRegBank(VReg, MRI, TRI) == &AArch64::FPRRegBank) { 1050 // Have a floating point op. 1051 // Make sure every operand gets mapped to a FPR register class. 1052 unsigned NumOperands = MI.getNumOperands(); 1053 for (unsigned Idx = 0; Idx < NumOperands; ++Idx) 1054 OpRegBankIdx[Idx] = PMI_FirstFPR; 1055 } 1056 break; 1057 } 1058 case TargetOpcode::G_VECREDUCE_FADD: 1059 case TargetOpcode::G_VECREDUCE_FMUL: 1060 case TargetOpcode::G_VECREDUCE_FMAX: 1061 case TargetOpcode::G_VECREDUCE_FMIN: 1062 case TargetOpcode::G_VECREDUCE_FMAXIMUM: 1063 case TargetOpcode::G_VECREDUCE_FMINIMUM: 1064 case TargetOpcode::G_VECREDUCE_ADD: 1065 case TargetOpcode::G_VECREDUCE_MUL: 1066 case TargetOpcode::G_VECREDUCE_AND: 1067 case TargetOpcode::G_VECREDUCE_OR: 1068 case TargetOpcode::G_VECREDUCE_XOR: 1069 case TargetOpcode::G_VECREDUCE_SMAX: 1070 case TargetOpcode::G_VECREDUCE_SMIN: 1071 case TargetOpcode::G_VECREDUCE_UMAX: 1072 case TargetOpcode::G_VECREDUCE_UMIN: 1073 // Reductions produce a scalar value from a vector, the scalar should be on 1074 // FPR bank. 1075 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR}; 1076 break; 1077 case TargetOpcode::G_VECREDUCE_SEQ_FADD: 1078 case TargetOpcode::G_VECREDUCE_SEQ_FMUL: 1079 // These reductions also take a scalar accumulator input. 1080 // Assign them FPR for now. 1081 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR, PMI_FirstFPR}; 1082 break; 1083 case TargetOpcode::G_INTRINSIC: 1084 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS: { 1085 // Check if we know that the intrinsic has any constraints on its register 1086 // banks. If it does, then update the mapping accordingly. 1087 unsigned Idx = 0; 1088 if (onlyDefinesFP(MI, MRI, TRI)) 1089 for (const auto &Op : MI.defs()) { 1090 if (Op.isReg()) 1091 OpRegBankIdx[Idx] = PMI_FirstFPR; 1092 ++Idx; 1093 } 1094 else 1095 Idx += MI.getNumExplicitDefs(); 1096 1097 if (onlyUsesFP(MI, MRI, TRI)) 1098 for (const auto &Op : MI.explicit_uses()) { 1099 if (Op.isReg()) 1100 OpRegBankIdx[Idx] = PMI_FirstFPR; 1101 ++Idx; 1102 } 1103 break; 1104 } 1105 case TargetOpcode::G_LROUND: 1106 case TargetOpcode::G_LLROUND: { 1107 // Source is always floating point and destination is always integer. 1108 OpRegBankIdx = {PMI_FirstGPR, PMI_FirstFPR}; 1109 break; 1110 } 1111 } 1112 1113 // Finally construct the computed mapping. 1114 SmallVector<const ValueMapping *, 8> OpdsMapping(NumOperands); 1115 for (unsigned Idx = 0; Idx < NumOperands; ++Idx) { 1116 if (MI.getOperand(Idx).isReg() && MI.getOperand(Idx).getReg()) { 1117 LLT Ty = MRI.getType(MI.getOperand(Idx).getReg()); 1118 if (!Ty.isValid()) 1119 continue; 1120 auto Mapping = 1121 getValueMapping(OpRegBankIdx[Idx], TypeSize::getFixed(OpSize[Idx])); 1122 if (!Mapping->isValid()) 1123 return getInvalidInstructionMapping(); 1124 1125 OpdsMapping[Idx] = Mapping; 1126 } 1127 } 1128 1129 return getInstructionMapping(MappingID, Cost, getOperandsMapping(OpdsMapping), 1130 NumOperands); 1131 } 1132