1 //===- AArch64RegisterBankInfo.cpp ----------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// \file 9 /// This file implements the targeting of the RegisterBankInfo class for 10 /// AArch64. 11 /// \todo This should be generated by TableGen. 12 //===----------------------------------------------------------------------===// 13 14 #include "AArch64RegisterBankInfo.h" 15 #include "AArch64RegisterInfo.h" 16 #include "MCTargetDesc/AArch64MCTargetDesc.h" 17 #include "llvm/ADT/STLExtras.h" 18 #include "llvm/ADT/SmallVector.h" 19 #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" 20 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" 21 #include "llvm/CodeGen/GlobalISel/Utils.h" 22 #include "llvm/CodeGen/LowLevelTypeUtils.h" 23 #include "llvm/CodeGen/MachineFunction.h" 24 #include "llvm/CodeGen/MachineInstr.h" 25 #include "llvm/CodeGen/MachineOperand.h" 26 #include "llvm/CodeGen/MachineRegisterInfo.h" 27 #include "llvm/CodeGen/RegisterBank.h" 28 #include "llvm/CodeGen/RegisterBankInfo.h" 29 #include "llvm/CodeGen/TargetOpcodes.h" 30 #include "llvm/CodeGen/TargetRegisterInfo.h" 31 #include "llvm/CodeGen/TargetSubtargetInfo.h" 32 #include "llvm/IR/IntrinsicsAArch64.h" 33 #include "llvm/Support/ErrorHandling.h" 34 #include "llvm/Support/Threading.h" 35 #include <algorithm> 36 #include <cassert> 37 38 #define GET_TARGET_REGBANK_IMPL 39 #include "AArch64GenRegisterBank.inc" 40 41 // This file will be TableGen'ed at some point. 42 #include "AArch64GenRegisterBankInfo.def" 43 44 using namespace llvm; 45 46 AArch64RegisterBankInfo::AArch64RegisterBankInfo( 47 const TargetRegisterInfo &TRI) { 48 static llvm::once_flag InitializeRegisterBankFlag; 49 50 static auto InitializeRegisterBankOnce = [&]() { 51 // We have only one set of register banks, whatever the subtarget 52 // is. Therefore, the initialization of the RegBanks table should be 53 // done only once. Indeed the table of all register banks 54 // (AArch64::RegBanks) is unique in the compiler. At some point, it 55 // will get tablegen'ed and the whole constructor becomes empty. 56 57 const RegisterBank &RBGPR = getRegBank(AArch64::GPRRegBankID); 58 (void)RBGPR; 59 assert(&AArch64::GPRRegBank == &RBGPR && 60 "The order in RegBanks is messed up"); 61 62 const RegisterBank &RBFPR = getRegBank(AArch64::FPRRegBankID); 63 (void)RBFPR; 64 assert(&AArch64::FPRRegBank == &RBFPR && 65 "The order in RegBanks is messed up"); 66 67 const RegisterBank &RBCCR = getRegBank(AArch64::CCRegBankID); 68 (void)RBCCR; 69 assert(&AArch64::CCRegBank == &RBCCR && 70 "The order in RegBanks is messed up"); 71 72 // The GPR register bank is fully defined by all the registers in 73 // GR64all + its subclasses. 74 assert(RBGPR.covers(*TRI.getRegClass(AArch64::GPR32RegClassID)) && 75 "Subclass not added?"); 76 assert(getMaximumSize(RBGPR.getID()) == 128 && 77 "GPRs should hold up to 128-bit"); 78 79 // The FPR register bank is fully defined by all the registers in 80 // GR64all + its subclasses. 81 assert(RBFPR.covers(*TRI.getRegClass(AArch64::QQRegClassID)) && 82 "Subclass not added?"); 83 assert(RBFPR.covers(*TRI.getRegClass(AArch64::FPR64RegClassID)) && 84 "Subclass not added?"); 85 assert(getMaximumSize(RBFPR.getID()) == 512 && 86 "FPRs should hold up to 512-bit via QQQQ sequence"); 87 88 assert(RBCCR.covers(*TRI.getRegClass(AArch64::CCRRegClassID)) && 89 "Class not added?"); 90 assert(getMaximumSize(RBCCR.getID()) == 32 && 91 "CCR should hold up to 32-bit"); 92 93 // Check that the TableGen'ed like file is in sync we our expectations. 94 // First, the Idx. 95 assert(checkPartialMappingIdx(PMI_FirstGPR, PMI_LastGPR, 96 {PMI_GPR32, PMI_GPR64, PMI_GPR128}) && 97 "PartialMappingIdx's are incorrectly ordered"); 98 assert(checkPartialMappingIdx(PMI_FirstFPR, PMI_LastFPR, 99 {PMI_FPR16, PMI_FPR32, PMI_FPR64, PMI_FPR128, 100 PMI_FPR256, PMI_FPR512}) && 101 "PartialMappingIdx's are incorrectly ordered"); 102 // Now, the content. 103 // Check partial mapping. 104 #define CHECK_PARTIALMAP(Idx, ValStartIdx, ValLength, RB) \ 105 do { \ 106 assert( \ 107 checkPartialMap(PartialMappingIdx::Idx, ValStartIdx, ValLength, RB) && \ 108 #Idx " is incorrectly initialized"); \ 109 } while (false) 110 111 CHECK_PARTIALMAP(PMI_GPR32, 0, 32, RBGPR); 112 CHECK_PARTIALMAP(PMI_GPR64, 0, 64, RBGPR); 113 CHECK_PARTIALMAP(PMI_GPR128, 0, 128, RBGPR); 114 CHECK_PARTIALMAP(PMI_FPR16, 0, 16, RBFPR); 115 CHECK_PARTIALMAP(PMI_FPR32, 0, 32, RBFPR); 116 CHECK_PARTIALMAP(PMI_FPR64, 0, 64, RBFPR); 117 CHECK_PARTIALMAP(PMI_FPR128, 0, 128, RBFPR); 118 CHECK_PARTIALMAP(PMI_FPR256, 0, 256, RBFPR); 119 CHECK_PARTIALMAP(PMI_FPR512, 0, 512, RBFPR); 120 121 // Check value mapping. 122 #define CHECK_VALUEMAP_IMPL(RBName, Size, Offset) \ 123 do { \ 124 assert(checkValueMapImpl(PartialMappingIdx::PMI_##RBName##Size, \ 125 PartialMappingIdx::PMI_First##RBName, Size, \ 126 Offset) && \ 127 #RBName #Size " " #Offset " is incorrectly initialized"); \ 128 } while (false) 129 130 #define CHECK_VALUEMAP(RBName, Size) CHECK_VALUEMAP_IMPL(RBName, Size, 0) 131 132 CHECK_VALUEMAP(GPR, 32); 133 CHECK_VALUEMAP(GPR, 64); 134 CHECK_VALUEMAP(GPR, 128); 135 CHECK_VALUEMAP(FPR, 16); 136 CHECK_VALUEMAP(FPR, 32); 137 CHECK_VALUEMAP(FPR, 64); 138 CHECK_VALUEMAP(FPR, 128); 139 CHECK_VALUEMAP(FPR, 256); 140 CHECK_VALUEMAP(FPR, 512); 141 142 // Check the value mapping for 3-operands instructions where all the operands 143 // map to the same value mapping. 144 #define CHECK_VALUEMAP_3OPS(RBName, Size) \ 145 do { \ 146 CHECK_VALUEMAP_IMPL(RBName, Size, 0); \ 147 CHECK_VALUEMAP_IMPL(RBName, Size, 1); \ 148 CHECK_VALUEMAP_IMPL(RBName, Size, 2); \ 149 } while (false) 150 151 CHECK_VALUEMAP_3OPS(GPR, 32); 152 CHECK_VALUEMAP_3OPS(GPR, 64); 153 CHECK_VALUEMAP_3OPS(GPR, 128); 154 CHECK_VALUEMAP_3OPS(FPR, 32); 155 CHECK_VALUEMAP_3OPS(FPR, 64); 156 CHECK_VALUEMAP_3OPS(FPR, 128); 157 CHECK_VALUEMAP_3OPS(FPR, 256); 158 CHECK_VALUEMAP_3OPS(FPR, 512); 159 160 #define CHECK_VALUEMAP_CROSSREGCPY(RBNameDst, RBNameSrc, Size) \ 161 do { \ 162 unsigned PartialMapDstIdx = PMI_##RBNameDst##Size - PMI_Min; \ 163 unsigned PartialMapSrcIdx = PMI_##RBNameSrc##Size - PMI_Min; \ 164 (void)PartialMapDstIdx; \ 165 (void)PartialMapSrcIdx; \ 166 const ValueMapping *Map = getCopyMapping(AArch64::RBNameDst##RegBankID, \ 167 AArch64::RBNameSrc##RegBankID, \ 168 TypeSize::getFixed(Size)); \ 169 (void)Map; \ 170 assert(Map[0].BreakDown == \ 171 &AArch64GenRegisterBankInfo::PartMappings[PartialMapDstIdx] && \ 172 Map[0].NumBreakDowns == 1 && \ 173 #RBNameDst #Size " Dst is incorrectly initialized"); \ 174 assert(Map[1].BreakDown == \ 175 &AArch64GenRegisterBankInfo::PartMappings[PartialMapSrcIdx] && \ 176 Map[1].NumBreakDowns == 1 && \ 177 #RBNameSrc #Size " Src is incorrectly initialized"); \ 178 \ 179 } while (false) 180 181 CHECK_VALUEMAP_CROSSREGCPY(GPR, GPR, 32); 182 CHECK_VALUEMAP_CROSSREGCPY(GPR, FPR, 32); 183 CHECK_VALUEMAP_CROSSREGCPY(GPR, GPR, 64); 184 CHECK_VALUEMAP_CROSSREGCPY(GPR, FPR, 64); 185 CHECK_VALUEMAP_CROSSREGCPY(FPR, FPR, 32); 186 CHECK_VALUEMAP_CROSSREGCPY(FPR, GPR, 32); 187 CHECK_VALUEMAP_CROSSREGCPY(FPR, FPR, 64); 188 CHECK_VALUEMAP_CROSSREGCPY(FPR, GPR, 64); 189 190 #define CHECK_VALUEMAP_FPEXT(DstSize, SrcSize) \ 191 do { \ 192 unsigned PartialMapDstIdx = PMI_FPR##DstSize - PMI_Min; \ 193 unsigned PartialMapSrcIdx = PMI_FPR##SrcSize - PMI_Min; \ 194 (void)PartialMapDstIdx; \ 195 (void)PartialMapSrcIdx; \ 196 const ValueMapping *Map = getFPExtMapping(DstSize, SrcSize); \ 197 (void)Map; \ 198 assert(Map[0].BreakDown == \ 199 &AArch64GenRegisterBankInfo::PartMappings[PartialMapDstIdx] && \ 200 Map[0].NumBreakDowns == 1 && "FPR" #DstSize \ 201 " Dst is incorrectly initialized"); \ 202 assert(Map[1].BreakDown == \ 203 &AArch64GenRegisterBankInfo::PartMappings[PartialMapSrcIdx] && \ 204 Map[1].NumBreakDowns == 1 && "FPR" #SrcSize \ 205 " Src is incorrectly initialized"); \ 206 \ 207 } while (false) 208 209 CHECK_VALUEMAP_FPEXT(32, 16); 210 CHECK_VALUEMAP_FPEXT(64, 16); 211 CHECK_VALUEMAP_FPEXT(64, 32); 212 CHECK_VALUEMAP_FPEXT(128, 64); 213 214 assert(verify(TRI) && "Invalid register bank information"); 215 }; 216 217 llvm::call_once(InitializeRegisterBankFlag, InitializeRegisterBankOnce); 218 } 219 220 unsigned AArch64RegisterBankInfo::copyCost(const RegisterBank &A, 221 const RegisterBank &B, 222 const TypeSize Size) const { 223 // What do we do with different size? 224 // copy are same size. 225 // Will introduce other hooks for different size: 226 // * extract cost. 227 // * build_sequence cost. 228 229 // Copy from (resp. to) GPR to (resp. from) FPR involves FMOV. 230 // FIXME: This should be deduced from the scheduling model. 231 if (&A == &AArch64::GPRRegBank && &B == &AArch64::FPRRegBank) 232 // FMOVXDr or FMOVWSr. 233 return 5; 234 if (&A == &AArch64::FPRRegBank && &B == &AArch64::GPRRegBank) 235 // FMOVDXr or FMOVSWr. 236 return 4; 237 238 return RegisterBankInfo::copyCost(A, B, Size); 239 } 240 241 const RegisterBank & 242 AArch64RegisterBankInfo::getRegBankFromRegClass(const TargetRegisterClass &RC, 243 LLT) const { 244 switch (RC.getID()) { 245 case AArch64::FPR8RegClassID: 246 case AArch64::FPR16RegClassID: 247 case AArch64::FPR16_loRegClassID: 248 case AArch64::FPR32_with_hsub_in_FPR16_loRegClassID: 249 case AArch64::FPR32RegClassID: 250 case AArch64::FPR64RegClassID: 251 case AArch64::FPR128RegClassID: 252 case AArch64::FPR64_loRegClassID: 253 case AArch64::FPR128_loRegClassID: 254 case AArch64::FPR128_0to7RegClassID: 255 case AArch64::DDRegClassID: 256 case AArch64::DDDRegClassID: 257 case AArch64::DDDDRegClassID: 258 case AArch64::QQRegClassID: 259 case AArch64::QQQRegClassID: 260 case AArch64::QQQQRegClassID: 261 case AArch64::ZPRRegClassID: 262 case AArch64::ZPR_3bRegClassID: 263 return getRegBank(AArch64::FPRRegBankID); 264 case AArch64::GPR32commonRegClassID: 265 case AArch64::GPR32RegClassID: 266 case AArch64::GPR32spRegClassID: 267 case AArch64::GPR32sponlyRegClassID: 268 case AArch64::GPR32argRegClassID: 269 case AArch64::GPR32allRegClassID: 270 case AArch64::GPR64commonRegClassID: 271 case AArch64::GPR64RegClassID: 272 case AArch64::GPR64spRegClassID: 273 case AArch64::GPR64sponlyRegClassID: 274 case AArch64::GPR64argRegClassID: 275 case AArch64::GPR64allRegClassID: 276 case AArch64::GPR64noipRegClassID: 277 case AArch64::GPR64common_and_GPR64noipRegClassID: 278 case AArch64::GPR64noip_and_tcGPR64RegClassID: 279 case AArch64::tcGPR64RegClassID: 280 case AArch64::tcGPRx16x17RegClassID: 281 case AArch64::tcGPRx17RegClassID: 282 case AArch64::tcGPRnotx16RegClassID: 283 case AArch64::WSeqPairsClassRegClassID: 284 case AArch64::XSeqPairsClassRegClassID: 285 case AArch64::MatrixIndexGPR32_8_11RegClassID: 286 case AArch64::MatrixIndexGPR32_12_15RegClassID: 287 case AArch64::GPR64_with_sub_32_in_MatrixIndexGPR32_8_11RegClassID: 288 case AArch64::GPR64_with_sub_32_in_MatrixIndexGPR32_12_15RegClassID: 289 return getRegBank(AArch64::GPRRegBankID); 290 case AArch64::CCRRegClassID: 291 return getRegBank(AArch64::CCRegBankID); 292 default: 293 llvm_unreachable("Register class not supported"); 294 } 295 } 296 297 RegisterBankInfo::InstructionMappings 298 AArch64RegisterBankInfo::getInstrAlternativeMappings( 299 const MachineInstr &MI) const { 300 const MachineFunction &MF = *MI.getParent()->getParent(); 301 const TargetSubtargetInfo &STI = MF.getSubtarget(); 302 const TargetRegisterInfo &TRI = *STI.getRegisterInfo(); 303 const MachineRegisterInfo &MRI = MF.getRegInfo(); 304 305 switch (MI.getOpcode()) { 306 case TargetOpcode::G_OR: { 307 // 32 and 64-bit or can be mapped on either FPR or 308 // GPR for the same cost. 309 TypeSize Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, TRI); 310 if (Size != 32 && Size != 64) 311 break; 312 313 // If the instruction has any implicit-defs or uses, 314 // do not mess with it. 315 if (MI.getNumOperands() != 3) 316 break; 317 InstructionMappings AltMappings; 318 const InstructionMapping &GPRMapping = getInstructionMapping( 319 /*ID*/ 1, /*Cost*/ 1, getValueMapping(PMI_FirstGPR, Size), 320 /*NumOperands*/ 3); 321 const InstructionMapping &FPRMapping = getInstructionMapping( 322 /*ID*/ 2, /*Cost*/ 1, getValueMapping(PMI_FirstFPR, Size), 323 /*NumOperands*/ 3); 324 325 AltMappings.push_back(&GPRMapping); 326 AltMappings.push_back(&FPRMapping); 327 return AltMappings; 328 } 329 case TargetOpcode::G_BITCAST: { 330 TypeSize Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, TRI); 331 if (Size != 32 && Size != 64) 332 break; 333 334 // If the instruction has any implicit-defs or uses, 335 // do not mess with it. 336 if (MI.getNumOperands() != 2) 337 break; 338 339 InstructionMappings AltMappings; 340 const InstructionMapping &GPRMapping = getInstructionMapping( 341 /*ID*/ 1, /*Cost*/ 1, 342 getCopyMapping(AArch64::GPRRegBankID, AArch64::GPRRegBankID, Size), 343 /*NumOperands*/ 2); 344 const InstructionMapping &FPRMapping = getInstructionMapping( 345 /*ID*/ 2, /*Cost*/ 1, 346 getCopyMapping(AArch64::FPRRegBankID, AArch64::FPRRegBankID, Size), 347 /*NumOperands*/ 2); 348 const InstructionMapping &GPRToFPRMapping = getInstructionMapping( 349 /*ID*/ 3, 350 /*Cost*/ 351 copyCost(AArch64::GPRRegBank, AArch64::FPRRegBank, 352 TypeSize::getFixed(Size)), 353 getCopyMapping(AArch64::FPRRegBankID, AArch64::GPRRegBankID, Size), 354 /*NumOperands*/ 2); 355 const InstructionMapping &FPRToGPRMapping = getInstructionMapping( 356 /*ID*/ 3, 357 /*Cost*/ 358 copyCost(AArch64::GPRRegBank, AArch64::FPRRegBank, 359 TypeSize::getFixed(Size)), 360 getCopyMapping(AArch64::GPRRegBankID, AArch64::FPRRegBankID, Size), 361 /*NumOperands*/ 2); 362 363 AltMappings.push_back(&GPRMapping); 364 AltMappings.push_back(&FPRMapping); 365 AltMappings.push_back(&GPRToFPRMapping); 366 AltMappings.push_back(&FPRToGPRMapping); 367 return AltMappings; 368 } 369 case TargetOpcode::G_LOAD: { 370 TypeSize Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, TRI); 371 if (Size != 64) 372 break; 373 374 // If the instruction has any implicit-defs or uses, 375 // do not mess with it. 376 if (MI.getNumOperands() != 2) 377 break; 378 379 InstructionMappings AltMappings; 380 const InstructionMapping &GPRMapping = getInstructionMapping( 381 /*ID*/ 1, /*Cost*/ 1, 382 getOperandsMapping( 383 {getValueMapping(PMI_FirstGPR, Size), 384 // Addresses are GPR 64-bit. 385 getValueMapping(PMI_FirstGPR, TypeSize::getFixed(64))}), 386 /*NumOperands*/ 2); 387 const InstructionMapping &FPRMapping = getInstructionMapping( 388 /*ID*/ 2, /*Cost*/ 1, 389 getOperandsMapping( 390 {getValueMapping(PMI_FirstFPR, Size), 391 // Addresses are GPR 64-bit. 392 getValueMapping(PMI_FirstGPR, TypeSize::getFixed(64))}), 393 /*NumOperands*/ 2); 394 395 AltMappings.push_back(&GPRMapping); 396 AltMappings.push_back(&FPRMapping); 397 return AltMappings; 398 } 399 default: 400 break; 401 } 402 return RegisterBankInfo::getInstrAlternativeMappings(MI); 403 } 404 405 void AArch64RegisterBankInfo::applyMappingImpl( 406 MachineIRBuilder &Builder, const OperandsMapper &OpdMapper) const { 407 MachineInstr &MI = OpdMapper.getMI(); 408 MachineRegisterInfo &MRI = OpdMapper.getMRI(); 409 410 switch (MI.getOpcode()) { 411 case TargetOpcode::G_OR: 412 case TargetOpcode::G_BITCAST: 413 case TargetOpcode::G_LOAD: 414 // Those ID must match getInstrAlternativeMappings. 415 assert((OpdMapper.getInstrMapping().getID() >= 1 && 416 OpdMapper.getInstrMapping().getID() <= 4) && 417 "Don't know how to handle that ID"); 418 return applyDefaultMapping(OpdMapper); 419 case TargetOpcode::G_INSERT_VECTOR_ELT: { 420 // Extend smaller gpr operands to 32 bit. 421 Builder.setInsertPt(*MI.getParent(), MI.getIterator()); 422 auto Ext = Builder.buildAnyExt(LLT::scalar(32), MI.getOperand(2).getReg()); 423 MRI.setRegBank(Ext.getReg(0), getRegBank(AArch64::GPRRegBankID)); 424 MI.getOperand(2).setReg(Ext.getReg(0)); 425 return applyDefaultMapping(OpdMapper); 426 } 427 default: 428 llvm_unreachable("Don't know how to handle that operation"); 429 } 430 } 431 432 const RegisterBankInfo::InstructionMapping & 433 AArch64RegisterBankInfo::getSameKindOfOperandsMapping( 434 const MachineInstr &MI) const { 435 const unsigned Opc = MI.getOpcode(); 436 const MachineFunction &MF = *MI.getParent()->getParent(); 437 const MachineRegisterInfo &MRI = MF.getRegInfo(); 438 439 unsigned NumOperands = MI.getNumOperands(); 440 assert(NumOperands <= 3 && 441 "This code is for instructions with 3 or less operands"); 442 443 LLT Ty = MRI.getType(MI.getOperand(0).getReg()); 444 TypeSize Size = Ty.getSizeInBits(); 445 bool IsFPR = Ty.isVector() || isPreISelGenericFloatingPointOpcode(Opc); 446 447 PartialMappingIdx RBIdx = IsFPR ? PMI_FirstFPR : PMI_FirstGPR; 448 449 #ifndef NDEBUG 450 // Make sure all the operands are using similar size and type. 451 // Should probably be checked by the machine verifier. 452 // This code won't catch cases where the number of lanes is 453 // different between the operands. 454 // If we want to go to that level of details, it is probably 455 // best to check that the types are the same, period. 456 // Currently, we just check that the register banks are the same 457 // for each types. 458 for (unsigned Idx = 1; Idx != NumOperands; ++Idx) { 459 LLT OpTy = MRI.getType(MI.getOperand(Idx).getReg()); 460 assert( 461 AArch64GenRegisterBankInfo::getRegBankBaseIdxOffset( 462 RBIdx, OpTy.getSizeInBits()) == 463 AArch64GenRegisterBankInfo::getRegBankBaseIdxOffset(RBIdx, Size) && 464 "Operand has incompatible size"); 465 bool OpIsFPR = OpTy.isVector() || isPreISelGenericFloatingPointOpcode(Opc); 466 (void)OpIsFPR; 467 assert(IsFPR == OpIsFPR && "Operand has incompatible type"); 468 } 469 #endif // End NDEBUG. 470 471 return getInstructionMapping(DefaultMappingID, 1, 472 getValueMapping(RBIdx, Size), NumOperands); 473 } 474 475 /// \returns true if a given intrinsic only uses and defines FPRs. 476 static bool isFPIntrinsic(const MachineRegisterInfo &MRI, 477 const MachineInstr &MI) { 478 // TODO: Add more intrinsics. 479 switch (cast<GIntrinsic>(MI).getIntrinsicID()) { 480 default: 481 return false; 482 case Intrinsic::aarch64_neon_uaddlv: 483 case Intrinsic::aarch64_neon_uaddv: 484 case Intrinsic::aarch64_neon_saddv: 485 case Intrinsic::aarch64_neon_umaxv: 486 case Intrinsic::aarch64_neon_smaxv: 487 case Intrinsic::aarch64_neon_uminv: 488 case Intrinsic::aarch64_neon_sminv: 489 case Intrinsic::aarch64_neon_faddv: 490 case Intrinsic::aarch64_neon_fmaxv: 491 case Intrinsic::aarch64_neon_fminv: 492 case Intrinsic::aarch64_neon_fmaxnmv: 493 case Intrinsic::aarch64_neon_fminnmv: 494 return true; 495 case Intrinsic::aarch64_neon_saddlv: { 496 const LLT SrcTy = MRI.getType(MI.getOperand(2).getReg()); 497 return SrcTy.getElementType().getSizeInBits() >= 16 && 498 SrcTy.getElementCount().getFixedValue() >= 4; 499 } 500 } 501 } 502 503 bool AArch64RegisterBankInfo::isPHIWithFPContraints( 504 const MachineInstr &MI, const MachineRegisterInfo &MRI, 505 const TargetRegisterInfo &TRI, const unsigned Depth) const { 506 if (!MI.isPHI() || Depth > MaxFPRSearchDepth) 507 return false; 508 509 return any_of(MRI.use_nodbg_instructions(MI.getOperand(0).getReg()), 510 [&](const MachineInstr &UseMI) { 511 if (onlyUsesFP(UseMI, MRI, TRI, Depth + 1)) 512 return true; 513 return isPHIWithFPContraints(UseMI, MRI, TRI, Depth + 1); 514 }); 515 } 516 517 bool AArch64RegisterBankInfo::hasFPConstraints(const MachineInstr &MI, 518 const MachineRegisterInfo &MRI, 519 const TargetRegisterInfo &TRI, 520 unsigned Depth) const { 521 unsigned Op = MI.getOpcode(); 522 if (Op == TargetOpcode::G_INTRINSIC && isFPIntrinsic(MRI, MI)) 523 return true; 524 525 // Do we have an explicit floating point instruction? 526 if (isPreISelGenericFloatingPointOpcode(Op)) 527 return true; 528 529 // No. Check if we have a copy-like instruction. If we do, then we could 530 // still be fed by floating point instructions. 531 if (Op != TargetOpcode::COPY && !MI.isPHI() && 532 !isPreISelGenericOptimizationHint(Op)) 533 return false; 534 535 // Check if we already know the register bank. 536 auto *RB = getRegBank(MI.getOperand(0).getReg(), MRI, TRI); 537 if (RB == &AArch64::FPRRegBank) 538 return true; 539 if (RB == &AArch64::GPRRegBank) 540 return false; 541 542 // We don't know anything. 543 // 544 // If we have a phi, we may be able to infer that it will be assigned a FPR 545 // based off of its inputs. 546 if (!MI.isPHI() || Depth > MaxFPRSearchDepth) 547 return false; 548 549 return any_of(MI.explicit_uses(), [&](const MachineOperand &Op) { 550 return Op.isReg() && 551 onlyDefinesFP(*MRI.getVRegDef(Op.getReg()), MRI, TRI, Depth + 1); 552 }); 553 } 554 555 bool AArch64RegisterBankInfo::onlyUsesFP(const MachineInstr &MI, 556 const MachineRegisterInfo &MRI, 557 const TargetRegisterInfo &TRI, 558 unsigned Depth) const { 559 switch (MI.getOpcode()) { 560 case TargetOpcode::G_FPTOSI: 561 case TargetOpcode::G_FPTOUI: 562 case TargetOpcode::G_FCMP: 563 case TargetOpcode::G_LROUND: 564 case TargetOpcode::G_LLROUND: 565 return true; 566 default: 567 break; 568 } 569 return hasFPConstraints(MI, MRI, TRI, Depth); 570 } 571 572 bool AArch64RegisterBankInfo::onlyDefinesFP(const MachineInstr &MI, 573 const MachineRegisterInfo &MRI, 574 const TargetRegisterInfo &TRI, 575 unsigned Depth) const { 576 switch (MI.getOpcode()) { 577 case AArch64::G_DUP: 578 case TargetOpcode::G_SITOFP: 579 case TargetOpcode::G_UITOFP: 580 case TargetOpcode::G_EXTRACT_VECTOR_ELT: 581 case TargetOpcode::G_INSERT_VECTOR_ELT: 582 case TargetOpcode::G_BUILD_VECTOR: 583 case TargetOpcode::G_BUILD_VECTOR_TRUNC: 584 return true; 585 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS: 586 switch (cast<GIntrinsic>(MI).getIntrinsicID()) { 587 case Intrinsic::aarch64_neon_ld1x2: 588 case Intrinsic::aarch64_neon_ld1x3: 589 case Intrinsic::aarch64_neon_ld1x4: 590 case Intrinsic::aarch64_neon_ld2: 591 case Intrinsic::aarch64_neon_ld2lane: 592 case Intrinsic::aarch64_neon_ld2r: 593 case Intrinsic::aarch64_neon_ld3: 594 case Intrinsic::aarch64_neon_ld3lane: 595 case Intrinsic::aarch64_neon_ld3r: 596 case Intrinsic::aarch64_neon_ld4: 597 case Intrinsic::aarch64_neon_ld4lane: 598 case Intrinsic::aarch64_neon_ld4r: 599 return true; 600 default: 601 break; 602 } 603 break; 604 default: 605 break; 606 } 607 return hasFPConstraints(MI, MRI, TRI, Depth); 608 } 609 610 bool AArch64RegisterBankInfo::isLoadFromFPType(const MachineInstr &MI) const { 611 // GMemOperation because we also want to match indexed loads. 612 auto *MemOp = cast<GMemOperation>(&MI); 613 const Value *LdVal = MemOp->getMMO().getValue(); 614 if (!LdVal) 615 return false; 616 617 Type *EltTy = nullptr; 618 if (const GlobalValue *GV = dyn_cast<GlobalValue>(LdVal)) { 619 EltTy = GV->getValueType(); 620 // Look at the first element of the struct to determine the type we are 621 // loading 622 while (StructType *StructEltTy = dyn_cast<StructType>(EltTy)) { 623 if (StructEltTy->getNumElements() == 0) 624 break; 625 EltTy = StructEltTy->getTypeAtIndex(0U); 626 } 627 // Look at the first element of the array to determine its type 628 if (isa<ArrayType>(EltTy)) 629 EltTy = EltTy->getArrayElementType(); 630 } else { 631 // FIXME: grubbing around uses is pretty ugly, but with no more 632 // `getPointerElementType` there's not much else we can do. 633 for (const auto *LdUser : LdVal->users()) { 634 if (isa<LoadInst>(LdUser)) { 635 EltTy = LdUser->getType(); 636 break; 637 } 638 if (isa<StoreInst>(LdUser) && LdUser->getOperand(1) == LdVal) { 639 EltTy = LdUser->getOperand(0)->getType(); 640 break; 641 } 642 } 643 } 644 return EltTy && EltTy->isFPOrFPVectorTy(); 645 } 646 647 const RegisterBankInfo::InstructionMapping & 648 AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { 649 const unsigned Opc = MI.getOpcode(); 650 651 // Try the default logic for non-generic instructions that are either copies 652 // or already have some operands assigned to banks. 653 if ((Opc != TargetOpcode::COPY && !isPreISelGenericOpcode(Opc)) || 654 Opc == TargetOpcode::G_PHI) { 655 const RegisterBankInfo::InstructionMapping &Mapping = 656 getInstrMappingImpl(MI); 657 if (Mapping.isValid()) 658 return Mapping; 659 } 660 661 const MachineFunction &MF = *MI.getParent()->getParent(); 662 const MachineRegisterInfo &MRI = MF.getRegInfo(); 663 const TargetSubtargetInfo &STI = MF.getSubtarget(); 664 const TargetRegisterInfo &TRI = *STI.getRegisterInfo(); 665 666 switch (Opc) { 667 // G_{F|S|U}REM are not listed because they are not legal. 668 // Arithmetic ops. 669 case TargetOpcode::G_ADD: 670 case TargetOpcode::G_SUB: 671 case TargetOpcode::G_PTR_ADD: 672 case TargetOpcode::G_MUL: 673 case TargetOpcode::G_SDIV: 674 case TargetOpcode::G_UDIV: 675 // Bitwise ops. 676 case TargetOpcode::G_AND: 677 case TargetOpcode::G_OR: 678 case TargetOpcode::G_XOR: 679 // Floating point ops. 680 case TargetOpcode::G_FADD: 681 case TargetOpcode::G_FSUB: 682 case TargetOpcode::G_FMUL: 683 case TargetOpcode::G_FDIV: 684 case TargetOpcode::G_FMAXIMUM: 685 case TargetOpcode::G_FMINIMUM: 686 return getSameKindOfOperandsMapping(MI); 687 case TargetOpcode::G_FPEXT: { 688 LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); 689 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg()); 690 return getInstructionMapping( 691 DefaultMappingID, /*Cost*/ 1, 692 getFPExtMapping(DstTy.getSizeInBits(), SrcTy.getSizeInBits()), 693 /*NumOperands*/ 2); 694 } 695 // Shifts. 696 case TargetOpcode::G_SHL: 697 case TargetOpcode::G_LSHR: 698 case TargetOpcode::G_ASHR: { 699 LLT ShiftAmtTy = MRI.getType(MI.getOperand(2).getReg()); 700 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg()); 701 if (ShiftAmtTy.getSizeInBits() == 64 && SrcTy.getSizeInBits() == 32) 702 return getInstructionMapping(DefaultMappingID, 1, 703 &ValMappings[Shift64Imm], 3); 704 return getSameKindOfOperandsMapping(MI); 705 } 706 case TargetOpcode::COPY: { 707 Register DstReg = MI.getOperand(0).getReg(); 708 Register SrcReg = MI.getOperand(1).getReg(); 709 // Check if one of the register is not a generic register. 710 if ((DstReg.isPhysical() || !MRI.getType(DstReg).isValid()) || 711 (SrcReg.isPhysical() || !MRI.getType(SrcReg).isValid())) { 712 const RegisterBank *DstRB = getRegBank(DstReg, MRI, TRI); 713 const RegisterBank *SrcRB = getRegBank(SrcReg, MRI, TRI); 714 if (!DstRB) 715 DstRB = SrcRB; 716 else if (!SrcRB) 717 SrcRB = DstRB; 718 // If both RB are null that means both registers are generic. 719 // We shouldn't be here. 720 assert(DstRB && SrcRB && "Both RegBank were nullptr"); 721 TypeSize Size = getSizeInBits(DstReg, MRI, TRI); 722 return getInstructionMapping( 723 DefaultMappingID, copyCost(*DstRB, *SrcRB, Size), 724 getCopyMapping(DstRB->getID(), SrcRB->getID(), Size), 725 // We only care about the mapping of the destination. 726 /*NumOperands*/ 1); 727 } 728 // Both registers are generic, use G_BITCAST. 729 [[fallthrough]]; 730 } 731 case TargetOpcode::G_BITCAST: { 732 LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); 733 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg()); 734 TypeSize Size = DstTy.getSizeInBits(); 735 bool DstIsGPR = !DstTy.isVector() && DstTy.getSizeInBits() <= 64; 736 bool SrcIsGPR = !SrcTy.isVector() && SrcTy.getSizeInBits() <= 64; 737 const RegisterBank &DstRB = 738 DstIsGPR ? AArch64::GPRRegBank : AArch64::FPRRegBank; 739 const RegisterBank &SrcRB = 740 SrcIsGPR ? AArch64::GPRRegBank : AArch64::FPRRegBank; 741 return getInstructionMapping( 742 DefaultMappingID, copyCost(DstRB, SrcRB, Size), 743 getCopyMapping(DstRB.getID(), SrcRB.getID(), Size), 744 // We only care about the mapping of the destination for COPY. 745 /*NumOperands*/ Opc == TargetOpcode::G_BITCAST ? 2 : 1); 746 } 747 default: 748 break; 749 } 750 751 unsigned NumOperands = MI.getNumOperands(); 752 unsigned MappingID = DefaultMappingID; 753 754 // Track the size and bank of each register. We don't do partial mappings. 755 SmallVector<unsigned, 4> OpSize(NumOperands); 756 SmallVector<PartialMappingIdx, 4> OpRegBankIdx(NumOperands); 757 for (unsigned Idx = 0; Idx < NumOperands; ++Idx) { 758 auto &MO = MI.getOperand(Idx); 759 if (!MO.isReg() || !MO.getReg()) 760 continue; 761 762 LLT Ty = MRI.getType(MO.getReg()); 763 if (!Ty.isValid()) 764 continue; 765 OpSize[Idx] = Ty.getSizeInBits().getKnownMinValue(); 766 767 // As a top-level guess, vectors including both scalable and non-scalable 768 // ones go in FPRs, scalars and pointers in GPRs. 769 // For floating-point instructions, scalars go in FPRs. 770 if (Ty.isVector()) 771 OpRegBankIdx[Idx] = PMI_FirstFPR; 772 else if (isPreISelGenericFloatingPointOpcode(Opc) || 773 Ty.getSizeInBits() > 64) 774 OpRegBankIdx[Idx] = PMI_FirstFPR; 775 else 776 OpRegBankIdx[Idx] = PMI_FirstGPR; 777 } 778 779 unsigned Cost = 1; 780 // Some of the floating-point instructions have mixed GPR and FPR operands: 781 // fine-tune the computed mapping. 782 switch (Opc) { 783 case AArch64::G_DUP: { 784 Register ScalarReg = MI.getOperand(1).getReg(); 785 LLT ScalarTy = MRI.getType(ScalarReg); 786 auto ScalarDef = MRI.getVRegDef(ScalarReg); 787 // We want to select dup(load) into LD1R. 788 if (ScalarDef->getOpcode() == TargetOpcode::G_LOAD) 789 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR}; 790 // s8 is an exception for G_DUP, which we always want on gpr. 791 else if (ScalarTy.getSizeInBits() != 8 && 792 (getRegBank(ScalarReg, MRI, TRI) == &AArch64::FPRRegBank || 793 onlyDefinesFP(*ScalarDef, MRI, TRI))) 794 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR}; 795 else 796 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR}; 797 break; 798 } 799 case TargetOpcode::G_TRUNC: { 800 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg()); 801 if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 128) 802 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR}; 803 break; 804 } 805 case TargetOpcode::G_SITOFP: 806 case TargetOpcode::G_UITOFP: { 807 if (MRI.getType(MI.getOperand(0).getReg()).isVector()) 808 break; 809 // Integer to FP conversions don't necessarily happen between GPR -> FPR 810 // regbanks. They can also be done within an FPR register. 811 Register SrcReg = MI.getOperand(1).getReg(); 812 if (getRegBank(SrcReg, MRI, TRI) == &AArch64::FPRRegBank) 813 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR}; 814 else 815 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR}; 816 break; 817 } 818 case TargetOpcode::G_FPTOSI: 819 case TargetOpcode::G_FPTOUI: 820 case TargetOpcode::G_INTRINSIC_LRINT: 821 case TargetOpcode::G_INTRINSIC_LLRINT: 822 if (MRI.getType(MI.getOperand(0).getReg()).isVector()) 823 break; 824 OpRegBankIdx = {PMI_FirstGPR, PMI_FirstFPR}; 825 break; 826 case TargetOpcode::G_FCMP: { 827 // If the result is a vector, it must use a FPR. 828 AArch64GenRegisterBankInfo::PartialMappingIdx Idx0 = 829 MRI.getType(MI.getOperand(0).getReg()).isVector() ? PMI_FirstFPR 830 : PMI_FirstGPR; 831 OpRegBankIdx = {Idx0, 832 /* Predicate */ PMI_None, PMI_FirstFPR, PMI_FirstFPR}; 833 break; 834 } 835 case TargetOpcode::G_BITCAST: 836 // This is going to be a cross register bank copy and this is expensive. 837 if (OpRegBankIdx[0] != OpRegBankIdx[1]) 838 Cost = copyCost( 839 *AArch64GenRegisterBankInfo::PartMappings[OpRegBankIdx[0]].RegBank, 840 *AArch64GenRegisterBankInfo::PartMappings[OpRegBankIdx[1]].RegBank, 841 TypeSize::getFixed(OpSize[0])); 842 break; 843 case TargetOpcode::G_LOAD: { 844 // Loading in vector unit is slightly more expensive. 845 // This is actually only true for the LD1R and co instructions, 846 // but anyway for the fast mode this number does not matter and 847 // for the greedy mode the cost of the cross bank copy will 848 // offset this number. 849 // FIXME: Should be derived from the scheduling model. 850 if (OpRegBankIdx[0] != PMI_FirstGPR) { 851 Cost = 2; 852 break; 853 } 854 855 if (cast<GLoad>(MI).isAtomic()) { 856 // Atomics always use GPR destinations. Don't refine any further. 857 OpRegBankIdx[0] = PMI_FirstGPR; 858 break; 859 } 860 861 // Try to guess the type of the load from the MMO. 862 if (isLoadFromFPType(MI)) { 863 OpRegBankIdx[0] = PMI_FirstFPR; 864 break; 865 } 866 867 // Check if that load feeds fp instructions. 868 // In that case, we want the default mapping to be on FPR 869 // instead of blind map every scalar to GPR. 870 if (any_of(MRI.use_nodbg_instructions(MI.getOperand(0).getReg()), 871 [&](const MachineInstr &UseMI) { 872 // If we have at least one direct or indirect use 873 // in a FP instruction, 874 // assume this was a floating point load in the IR. If it was 875 // not, we would have had a bitcast before reaching that 876 // instruction. 877 // 878 // Int->FP conversion operations are also captured in 879 // onlyDefinesFP(). 880 881 if (isPHIWithFPContraints(UseMI, MRI, TRI)) 882 return true; 883 884 return onlyUsesFP(UseMI, MRI, TRI) || 885 onlyDefinesFP(UseMI, MRI, TRI); 886 })) 887 OpRegBankIdx[0] = PMI_FirstFPR; 888 break; 889 } 890 case TargetOpcode::G_STORE: 891 // Check if that store is fed by fp instructions. 892 if (OpRegBankIdx[0] == PMI_FirstGPR) { 893 Register VReg = MI.getOperand(0).getReg(); 894 if (!VReg) 895 break; 896 MachineInstr *DefMI = MRI.getVRegDef(VReg); 897 if (onlyDefinesFP(*DefMI, MRI, TRI)) 898 OpRegBankIdx[0] = PMI_FirstFPR; 899 break; 900 } 901 break; 902 case TargetOpcode::G_INDEXED_STORE: 903 if (OpRegBankIdx[1] == PMI_FirstGPR) { 904 Register VReg = MI.getOperand(1).getReg(); 905 if (!VReg) 906 break; 907 MachineInstr *DefMI = MRI.getVRegDef(VReg); 908 if (onlyDefinesFP(*DefMI, MRI, TRI)) 909 OpRegBankIdx[1] = PMI_FirstFPR; 910 break; 911 } 912 break; 913 case TargetOpcode::G_INDEXED_SEXTLOAD: 914 case TargetOpcode::G_INDEXED_ZEXTLOAD: 915 // These should always be GPR. 916 OpRegBankIdx[0] = PMI_FirstGPR; 917 break; 918 case TargetOpcode::G_INDEXED_LOAD: { 919 if (isLoadFromFPType(MI)) 920 OpRegBankIdx[0] = PMI_FirstFPR; 921 break; 922 } 923 case TargetOpcode::G_SELECT: { 924 // If the destination is FPR, preserve that. 925 if (OpRegBankIdx[0] != PMI_FirstGPR) 926 break; 927 928 // If we're taking in vectors, we have no choice but to put everything on 929 // FPRs, except for the condition. The condition must always be on a GPR. 930 LLT SrcTy = MRI.getType(MI.getOperand(2).getReg()); 931 if (SrcTy.isVector()) { 932 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR, PMI_FirstFPR, PMI_FirstFPR}; 933 break; 934 } 935 936 // Try to minimize the number of copies. If we have more floating point 937 // constrained values than not, then we'll put everything on FPR. Otherwise, 938 // everything has to be on GPR. 939 unsigned NumFP = 0; 940 941 // Check if the uses of the result always produce floating point values. 942 // 943 // For example: 944 // 945 // %z = G_SELECT %cond %x %y 946 // fpr = G_FOO %z ... 947 if (any_of(MRI.use_nodbg_instructions(MI.getOperand(0).getReg()), 948 [&](MachineInstr &MI) { return onlyUsesFP(MI, MRI, TRI); })) 949 ++NumFP; 950 951 // Check if the defs of the source values always produce floating point 952 // values. 953 // 954 // For example: 955 // 956 // %x = G_SOMETHING_ALWAYS_FLOAT %a ... 957 // %z = G_SELECT %cond %x %y 958 // 959 // Also check whether or not the sources have already been decided to be 960 // FPR. Keep track of this. 961 // 962 // This doesn't check the condition, since it's just whatever is in NZCV. 963 // This isn't passed explicitly in a register to fcsel/csel. 964 for (unsigned Idx = 2; Idx < 4; ++Idx) { 965 Register VReg = MI.getOperand(Idx).getReg(); 966 MachineInstr *DefMI = MRI.getVRegDef(VReg); 967 if (getRegBank(VReg, MRI, TRI) == &AArch64::FPRRegBank || 968 onlyDefinesFP(*DefMI, MRI, TRI)) 969 ++NumFP; 970 } 971 972 // If we have more FP constraints than not, then move everything over to 973 // FPR. 974 if (NumFP >= 2) 975 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR, PMI_FirstFPR, PMI_FirstFPR}; 976 977 break; 978 } 979 case TargetOpcode::G_UNMERGE_VALUES: { 980 // If the first operand belongs to a FPR register bank, then make sure that 981 // we preserve that. 982 if (OpRegBankIdx[0] != PMI_FirstGPR) 983 break; 984 985 LLT SrcTy = MRI.getType(MI.getOperand(MI.getNumOperands()-1).getReg()); 986 // UNMERGE into scalars from a vector should always use FPR. 987 // Likewise if any of the uses are FP instructions. 988 if (SrcTy.isVector() || SrcTy == LLT::scalar(128) || 989 any_of(MRI.use_nodbg_instructions(MI.getOperand(0).getReg()), 990 [&](MachineInstr &MI) { return onlyUsesFP(MI, MRI, TRI); })) { 991 // Set the register bank of every operand to FPR. 992 for (unsigned Idx = 0, NumOperands = MI.getNumOperands(); 993 Idx < NumOperands; ++Idx) 994 OpRegBankIdx[Idx] = PMI_FirstFPR; 995 } 996 break; 997 } 998 case TargetOpcode::G_EXTRACT_VECTOR_ELT: 999 // Destination and source need to be FPRs. 1000 OpRegBankIdx[0] = PMI_FirstFPR; 1001 OpRegBankIdx[1] = PMI_FirstFPR; 1002 1003 // Index needs to be a GPR. 1004 OpRegBankIdx[2] = PMI_FirstGPR; 1005 break; 1006 case TargetOpcode::G_INSERT_VECTOR_ELT: 1007 OpRegBankIdx[0] = PMI_FirstFPR; 1008 OpRegBankIdx[1] = PMI_FirstFPR; 1009 1010 // The element may be either a GPR or FPR. Preserve that behaviour. 1011 if (getRegBank(MI.getOperand(2).getReg(), MRI, TRI) == &AArch64::FPRRegBank) 1012 OpRegBankIdx[2] = PMI_FirstFPR; 1013 else { 1014 // If the type is i8/i16, and the regank will be GPR, then we change the 1015 // type to i32 in applyMappingImpl. 1016 LLT Ty = MRI.getType(MI.getOperand(2).getReg()); 1017 if (Ty.getSizeInBits() == 8 || Ty.getSizeInBits() == 16) 1018 MappingID = 1; 1019 OpRegBankIdx[2] = PMI_FirstGPR; 1020 } 1021 1022 // Index needs to be a GPR. 1023 OpRegBankIdx[3] = PMI_FirstGPR; 1024 break; 1025 case TargetOpcode::G_EXTRACT: { 1026 // For s128 sources we have to use fpr unless we know otherwise. 1027 auto Src = MI.getOperand(1).getReg(); 1028 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg()); 1029 if (SrcTy.getSizeInBits() != 128) 1030 break; 1031 auto Idx = MRI.getRegClassOrNull(Src) == &AArch64::XSeqPairsClassRegClass 1032 ? PMI_FirstGPR 1033 : PMI_FirstFPR; 1034 OpRegBankIdx[0] = Idx; 1035 OpRegBankIdx[1] = Idx; 1036 break; 1037 } 1038 case TargetOpcode::G_BUILD_VECTOR: { 1039 // If the first source operand belongs to a FPR register bank, then make 1040 // sure that we preserve that. 1041 if (OpRegBankIdx[1] != PMI_FirstGPR) 1042 break; 1043 Register VReg = MI.getOperand(1).getReg(); 1044 if (!VReg) 1045 break; 1046 1047 // Get the instruction that defined the source operand reg, and check if 1048 // it's a floating point operation. Or, if it's a type like s16 which 1049 // doesn't have a exact size gpr register class. The exception is if the 1050 // build_vector has all constant operands, which may be better to leave as 1051 // gpr without copies, so it can be matched in imported patterns. 1052 MachineInstr *DefMI = MRI.getVRegDef(VReg); 1053 unsigned DefOpc = DefMI->getOpcode(); 1054 const LLT SrcTy = MRI.getType(VReg); 1055 if (all_of(MI.operands(), [&](const MachineOperand &Op) { 1056 return Op.isDef() || MRI.getVRegDef(Op.getReg())->getOpcode() == 1057 TargetOpcode::G_CONSTANT; 1058 })) 1059 break; 1060 if (isPreISelGenericFloatingPointOpcode(DefOpc) || 1061 SrcTy.getSizeInBits() < 32 || 1062 getRegBank(VReg, MRI, TRI) == &AArch64::FPRRegBank) { 1063 // Have a floating point op. 1064 // Make sure every operand gets mapped to a FPR register class. 1065 unsigned NumOperands = MI.getNumOperands(); 1066 for (unsigned Idx = 0; Idx < NumOperands; ++Idx) 1067 OpRegBankIdx[Idx] = PMI_FirstFPR; 1068 } 1069 break; 1070 } 1071 case TargetOpcode::G_VECREDUCE_FADD: 1072 case TargetOpcode::G_VECREDUCE_FMUL: 1073 case TargetOpcode::G_VECREDUCE_FMAX: 1074 case TargetOpcode::G_VECREDUCE_FMIN: 1075 case TargetOpcode::G_VECREDUCE_FMAXIMUM: 1076 case TargetOpcode::G_VECREDUCE_FMINIMUM: 1077 case TargetOpcode::G_VECREDUCE_ADD: 1078 case TargetOpcode::G_VECREDUCE_MUL: 1079 case TargetOpcode::G_VECREDUCE_AND: 1080 case TargetOpcode::G_VECREDUCE_OR: 1081 case TargetOpcode::G_VECREDUCE_XOR: 1082 case TargetOpcode::G_VECREDUCE_SMAX: 1083 case TargetOpcode::G_VECREDUCE_SMIN: 1084 case TargetOpcode::G_VECREDUCE_UMAX: 1085 case TargetOpcode::G_VECREDUCE_UMIN: 1086 // Reductions produce a scalar value from a vector, the scalar should be on 1087 // FPR bank. 1088 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR}; 1089 break; 1090 case TargetOpcode::G_VECREDUCE_SEQ_FADD: 1091 case TargetOpcode::G_VECREDUCE_SEQ_FMUL: 1092 // These reductions also take a scalar accumulator input. 1093 // Assign them FPR for now. 1094 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR, PMI_FirstFPR}; 1095 break; 1096 case TargetOpcode::G_INTRINSIC: 1097 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS: { 1098 // Check if we know that the intrinsic has any constraints on its register 1099 // banks. If it does, then update the mapping accordingly. 1100 unsigned Idx = 0; 1101 if (onlyDefinesFP(MI, MRI, TRI)) 1102 for (const auto &Op : MI.defs()) { 1103 if (Op.isReg()) 1104 OpRegBankIdx[Idx] = PMI_FirstFPR; 1105 ++Idx; 1106 } 1107 else 1108 Idx += MI.getNumExplicitDefs(); 1109 1110 if (onlyUsesFP(MI, MRI, TRI)) 1111 for (const auto &Op : MI.explicit_uses()) { 1112 if (Op.isReg()) 1113 OpRegBankIdx[Idx] = PMI_FirstFPR; 1114 ++Idx; 1115 } 1116 break; 1117 } 1118 case TargetOpcode::G_LROUND: 1119 case TargetOpcode::G_LLROUND: { 1120 // Source is always floating point and destination is always integer. 1121 OpRegBankIdx = {PMI_FirstGPR, PMI_FirstFPR}; 1122 break; 1123 } 1124 } 1125 1126 // Finally construct the computed mapping. 1127 SmallVector<const ValueMapping *, 8> OpdsMapping(NumOperands); 1128 for (unsigned Idx = 0; Idx < NumOperands; ++Idx) { 1129 if (MI.getOperand(Idx).isReg() && MI.getOperand(Idx).getReg()) { 1130 LLT Ty = MRI.getType(MI.getOperand(Idx).getReg()); 1131 if (!Ty.isValid()) 1132 continue; 1133 auto Mapping = 1134 getValueMapping(OpRegBankIdx[Idx], TypeSize::getFixed(OpSize[Idx])); 1135 if (!Mapping->isValid()) 1136 return getInvalidInstructionMapping(); 1137 1138 OpdsMapping[Idx] = Mapping; 1139 } 1140 } 1141 1142 return getInstructionMapping(MappingID, Cost, getOperandsMapping(OpdsMapping), 1143 NumOperands); 1144 } 1145