1 //===---- LoongArchMergeBaseOffset.cpp - Optimise address calculations ----===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Merge the offset of address calculation into the offset field 10 // of instructions in a global address lowering sequence. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "LoongArch.h" 15 #include "LoongArchTargetMachine.h" 16 #include "llvm/CodeGen/MachineFunctionPass.h" 17 #include "llvm/CodeGen/Passes.h" 18 #include "llvm/MC/TargetRegistry.h" 19 #include "llvm/Support/Debug.h" 20 #include "llvm/Target/TargetOptions.h" 21 #include <optional> 22 23 using namespace llvm; 24 25 #define DEBUG_TYPE "loongarch-merge-base-offset" 26 #define LoongArch_MERGE_BASE_OFFSET_NAME "LoongArch Merge Base Offset" 27 28 namespace { 29 30 class LoongArchMergeBaseOffsetOpt : public MachineFunctionPass { 31 const LoongArchSubtarget *ST = nullptr; 32 MachineRegisterInfo *MRI; 33 34 public: 35 static char ID; 36 bool runOnMachineFunction(MachineFunction &Fn) override; 37 bool detectFoldable(MachineInstr &Hi20, MachineInstr *&Lo12, 38 MachineInstr *&Lo20, MachineInstr *&Hi12, 39 MachineInstr *&Last); 40 41 bool detectAndFoldOffset(MachineInstr &Hi20, MachineInstr &Lo12, 42 MachineInstr *&Lo20, MachineInstr *&Hi12, 43 MachineInstr *&Last); 44 void foldOffset(MachineInstr &Hi20, MachineInstr &Lo12, MachineInstr *&Lo20, 45 MachineInstr *&Hi12, MachineInstr *&Last, MachineInstr &Tail, 46 int64_t Offset); 47 bool foldLargeOffset(MachineInstr &Hi20, MachineInstr &Lo12, 48 MachineInstr *&Lo20, MachineInstr *&Hi12, 49 MachineInstr *&Last, MachineInstr &TailAdd, 50 Register GAReg); 51 52 bool foldIntoMemoryOps(MachineInstr &Hi20, MachineInstr &Lo12, 53 MachineInstr *&Lo20, MachineInstr *&Hi12, 54 MachineInstr *&Last); 55 56 LoongArchMergeBaseOffsetOpt() : MachineFunctionPass(ID) {} 57 58 MachineFunctionProperties getRequiredProperties() const override { 59 return MachineFunctionProperties().set( 60 MachineFunctionProperties::Property::IsSSA); 61 } 62 63 void getAnalysisUsage(AnalysisUsage &AU) const override { 64 AU.setPreservesCFG(); 65 MachineFunctionPass::getAnalysisUsage(AU); 66 } 67 68 StringRef getPassName() const override { 69 return LoongArch_MERGE_BASE_OFFSET_NAME; 70 } 71 }; 72 } // end anonymous namespace 73 74 char LoongArchMergeBaseOffsetOpt::ID = 0; 75 INITIALIZE_PASS(LoongArchMergeBaseOffsetOpt, DEBUG_TYPE, 76 LoongArch_MERGE_BASE_OFFSET_NAME, false, false) 77 78 // Detect either of the patterns: 79 // 80 // 1. (small/medium): 81 // pcalau12i vreg1, %pc_hi20(s) 82 // addi.d vreg2, vreg1, %pc_lo12(s) 83 // 84 // 2. (large): 85 // pcalau12i vreg1, %pc_hi20(s) 86 // addi.d vreg2, $zero, %pc_lo12(s) 87 // lu32i.d vreg3, vreg2, %pc64_lo20(s) 88 // lu52i.d vreg4, vreg3, %pc64_hi12(s) 89 // add.d vreg5, vreg4, vreg1 90 91 // The pattern is only accepted if: 92 // 1) For small and medium pattern, the first instruction has only one use, 93 // which is the ADDI. 94 // 2) For large pattern, the first four instructions each have only one use, 95 // and the user of the fourth instruction is ADD. 96 // 3) The address operands have the appropriate type, reflecting the 97 // lowering of a global address or constant pool using the pattern. 98 // 4) The offset value in the Global Address or Constant Pool is 0. 99 bool LoongArchMergeBaseOffsetOpt::detectFoldable(MachineInstr &Hi20, 100 MachineInstr *&Lo12, 101 MachineInstr *&Lo20, 102 MachineInstr *&Hi12, 103 MachineInstr *&Last) { 104 if (Hi20.getOpcode() != LoongArch::PCALAU12I) 105 return false; 106 107 const MachineOperand &Hi20Op1 = Hi20.getOperand(1); 108 if (LoongArchII::getDirectFlags(Hi20Op1) != LoongArchII::MO_PCREL_HI) 109 return false; 110 111 auto isGlobalOrCPIOrBlockAddress = [](const MachineOperand &Op) { 112 return Op.isGlobal() || Op.isCPI() || Op.isBlockAddress(); 113 }; 114 115 if (!isGlobalOrCPIOrBlockAddress(Hi20Op1) || Hi20Op1.getOffset() != 0) 116 return false; 117 118 Register HiDestReg = Hi20.getOperand(0).getReg(); 119 if (!MRI->hasOneUse(HiDestReg)) 120 return false; 121 122 MachineInstr *UseInst = &*MRI->use_instr_begin(HiDestReg); 123 if (UseInst->getOpcode() != LoongArch::ADD_D) { 124 Lo12 = UseInst; 125 if ((ST->is64Bit() && Lo12->getOpcode() != LoongArch::ADDI_D) || 126 (!ST->is64Bit() && Lo12->getOpcode() != LoongArch::ADDI_W)) 127 return false; 128 } else { 129 assert(ST->is64Bit()); 130 Last = UseInst; 131 132 Register LastOp1Reg = Last->getOperand(1).getReg(); 133 if (!LastOp1Reg.isVirtual()) 134 return false; 135 Hi12 = MRI->getVRegDef(LastOp1Reg); 136 const MachineOperand &Hi12Op2 = Hi12->getOperand(2); 137 if (Hi12Op2.getTargetFlags() != LoongArchII::MO_PCREL64_HI) 138 return false; 139 if (!isGlobalOrCPIOrBlockAddress(Hi12Op2) || Hi12Op2.getOffset() != 0) 140 return false; 141 if (!MRI->hasOneUse(Hi12->getOperand(0).getReg())) 142 return false; 143 144 Lo20 = MRI->getVRegDef(Hi12->getOperand(1).getReg()); 145 const MachineOperand &Lo20Op2 = Lo20->getOperand(2); 146 if (Lo20Op2.getTargetFlags() != LoongArchII::MO_PCREL64_LO) 147 return false; 148 if (!isGlobalOrCPIOrBlockAddress(Lo20Op2) || Lo20Op2.getOffset() != 0) 149 return false; 150 if (!MRI->hasOneUse(Lo20->getOperand(0).getReg())) 151 return false; 152 153 Lo12 = MRI->getVRegDef(Lo20->getOperand(1).getReg()); 154 if (!MRI->hasOneUse(Lo12->getOperand(0).getReg())) 155 return false; 156 } 157 158 const MachineOperand &Lo12Op2 = Lo12->getOperand(2); 159 assert(Hi20.getOpcode() == LoongArch::PCALAU12I); 160 if (LoongArchII::getDirectFlags(Lo12Op2) != LoongArchII::MO_PCREL_LO || 161 !(isGlobalOrCPIOrBlockAddress(Lo12Op2) || Lo12Op2.isMCSymbol()) || 162 Lo12Op2.getOffset() != 0) 163 return false; 164 165 if (Hi20Op1.isGlobal()) { 166 LLVM_DEBUG(dbgs() << " Found lowered global address: " 167 << *Hi20Op1.getGlobal() << "\n"); 168 } else if (Hi20Op1.isBlockAddress()) { 169 LLVM_DEBUG(dbgs() << " Found lowered basic address: " 170 << *Hi20Op1.getBlockAddress() << "\n"); 171 } else if (Hi20Op1.isCPI()) { 172 LLVM_DEBUG(dbgs() << " Found lowered constant pool: " << Hi20Op1.getIndex() 173 << "\n"); 174 } 175 176 return true; 177 } 178 179 // Update the offset in Hi20, Lo12, Lo20 and Hi12 instructions. 180 // Delete the tail instruction and update all the uses to use the 181 // output from Last. 182 void LoongArchMergeBaseOffsetOpt::foldOffset( 183 MachineInstr &Hi20, MachineInstr &Lo12, MachineInstr *&Lo20, 184 MachineInstr *&Hi12, MachineInstr *&Last, MachineInstr &Tail, 185 int64_t Offset) { 186 // Put the offset back in Hi and the Lo 187 Hi20.getOperand(1).setOffset(Offset); 188 Lo12.getOperand(2).setOffset(Offset); 189 if (Lo20 && Hi12) { 190 Lo20->getOperand(2).setOffset(Offset); 191 Hi12->getOperand(2).setOffset(Offset); 192 } 193 // Delete the tail instruction. 194 MachineInstr *Def = Last ? Last : &Lo12; 195 MRI->constrainRegClass(Def->getOperand(0).getReg(), 196 MRI->getRegClass(Tail.getOperand(0).getReg())); 197 MRI->replaceRegWith(Tail.getOperand(0).getReg(), Def->getOperand(0).getReg()); 198 Tail.eraseFromParent(); 199 LLVM_DEBUG(dbgs() << " Merged offset " << Offset << " into base.\n" 200 << " " << Hi20 << " " << Lo12;); 201 if (Lo20 && Hi12) { 202 LLVM_DEBUG(dbgs() << " " << *Lo20 << " " << *Hi12;); 203 } 204 } 205 206 // Detect patterns for large offsets that are passed into an ADD instruction. 207 // If the pattern is found, updates the offset in Hi20, Lo12, Lo20 and Hi12 208 // instructions and deletes TailAdd and the instructions that produced the 209 // offset. 210 // 211 // (The instructions marked with "!" are not necessarily present) 212 // 213 // Base address lowering is of the form: 214 // Hi20: pcalau12i vreg1, %pc_hi20(s) 215 // +- Lo12: addi.d vreg2, vreg1, %pc_lo12(s) 216 // | Lo20: lu32i.d vreg2, %pc64_lo20(s) ! 217 // +- Hi12: lu52i.d vreg2, vreg2, %pc64_hi12(s) ! 218 // | 219 // | The large offset can be one of the forms: 220 // | 221 // +-> 1) Offset that has non zero bits in Hi20 and Lo12 bits: 222 // | OffsetHi20: lu12i.w vreg3, 4 223 // | OffsetLo12: ori voff, vreg3, 188 ------------------+ 224 // | | 225 // +-> 2) Offset that has non zero bits in Hi20 bits only: | 226 // | OffsetHi20: lu12i.w voff, 128 ------------------+ 227 // | | 228 // +-> 3) Offset that has non zero bits in Lo20 bits: | 229 // | OffsetHi20: lu12i.w vreg3, 121 ! | 230 // | OffsetLo12: ori voff, vreg3, 122 ! | 231 // | OffsetLo20: lu32i.d voff, 123 ------------------+ 232 // +-> 4) Offset that has non zero bits in Hi12 bits: | 233 // OffsetHi20: lu12i.w vreg3, 121 ! | 234 // OffsetLo12: ori voff, vreg3, 122 ! | 235 // OffsetLo20: lu32i.d vreg3, 123 ! | 236 // OffsetHi12: lu52i.d voff, vrg3, 124 ------------------+ 237 // | 238 // TailAdd: add.d vreg4, vreg2, voff <------------------+ 239 // 240 bool LoongArchMergeBaseOffsetOpt::foldLargeOffset( 241 MachineInstr &Hi20, MachineInstr &Lo12, MachineInstr *&Lo20, 242 MachineInstr *&Hi12, MachineInstr *&Last, MachineInstr &TailAdd, 243 Register GAReg) { 244 assert((TailAdd.getOpcode() == LoongArch::ADD_W || 245 TailAdd.getOpcode() == LoongArch::ADD_D) && 246 "Expected ADD instruction!"); 247 Register Rs = TailAdd.getOperand(1).getReg(); 248 Register Rt = TailAdd.getOperand(2).getReg(); 249 Register Reg = Rs == GAReg ? Rt : Rs; 250 SmallVector<MachineInstr *, 4> Instrs; 251 int64_t Offset = 0; 252 int64_t Mask = -1; 253 254 // This can point to one of [ORI, LU12I.W, LU32I.D, LU52I.D]: 255 for (int i = 0; i < 4; i++) { 256 // Handle Reg is R0. 257 if (Reg == LoongArch::R0) 258 break; 259 260 // Can't fold if the register has more than one use. 261 if (!Reg.isVirtual() || !MRI->hasOneUse(Reg)) 262 return false; 263 264 MachineInstr *Curr = MRI->getVRegDef(Reg); 265 if (!Curr) 266 break; 267 268 switch (Curr->getOpcode()) { 269 default: 270 // Can't fold if the instruction opcode is unexpected. 271 return false; 272 case LoongArch::ORI: { 273 MachineOperand ImmOp = Curr->getOperand(2); 274 if (ImmOp.getTargetFlags() != LoongArchII::MO_None) 275 return false; 276 Offset += ImmOp.getImm(); 277 Reg = Curr->getOperand(1).getReg(); 278 Instrs.push_back(Curr); 279 break; 280 } 281 case LoongArch::LU12I_W: { 282 MachineOperand ImmOp = Curr->getOperand(1); 283 if (ImmOp.getTargetFlags() != LoongArchII::MO_None) 284 return false; 285 Offset += SignExtend64<32>(ImmOp.getImm() << 12) & Mask; 286 Reg = LoongArch::R0; 287 Instrs.push_back(Curr); 288 break; 289 } 290 case LoongArch::LU32I_D: { 291 MachineOperand ImmOp = Curr->getOperand(2); 292 if (ImmOp.getTargetFlags() != LoongArchII::MO_None || !Lo20) 293 return false; 294 Offset += SignExtend64<52>(ImmOp.getImm() << 32) & Mask; 295 Mask ^= 0x000FFFFF00000000ULL; 296 Reg = Curr->getOperand(1).getReg(); 297 Instrs.push_back(Curr); 298 break; 299 } 300 case LoongArch::LU52I_D: { 301 MachineOperand ImmOp = Curr->getOperand(2); 302 if (ImmOp.getTargetFlags() != LoongArchII::MO_None || !Hi12) 303 return false; 304 Offset += ImmOp.getImm() << 52; 305 Mask ^= 0xFFF0000000000000ULL; 306 Reg = Curr->getOperand(1).getReg(); 307 Instrs.push_back(Curr); 308 break; 309 } 310 } 311 } 312 313 // Can't fold if the offset is not extracted. 314 if (!Offset) 315 return false; 316 317 foldOffset(Hi20, Lo12, Lo20, Hi12, Last, TailAdd, Offset); 318 LLVM_DEBUG(dbgs() << " Offset Instrs:\n"); 319 for (auto I : Instrs) { 320 LLVM_DEBUG(dbgs() << " " << *I); 321 I->eraseFromParent(); 322 } 323 324 return true; 325 } 326 327 bool LoongArchMergeBaseOffsetOpt::detectAndFoldOffset(MachineInstr &Hi20, 328 MachineInstr &Lo12, 329 MachineInstr *&Lo20, 330 MachineInstr *&Hi12, 331 MachineInstr *&Last) { 332 Register DestReg = 333 Last ? Last->getOperand(0).getReg() : Lo12.getOperand(0).getReg(); 334 335 // Look for arithmetic instructions we can get an offset from. 336 // We might be able to remove the arithmetic instructions by folding the 337 // offset into the PCALAU12I+(ADDI/ADDI+LU32I+LU52I). 338 if (!MRI->hasOneUse(DestReg)) 339 return false; 340 341 // DestReg has only one use. 342 MachineInstr &Tail = *MRI->use_instr_begin(DestReg); 343 switch (Tail.getOpcode()) { 344 default: 345 LLVM_DEBUG(dbgs() << "Don't know how to get offset from this instr:" 346 << Tail); 347 break; 348 case LoongArch::ADDI_W: 349 if (ST->is64Bit()) 350 return false; 351 [[fallthrough]]; 352 case LoongArch::ADDI_D: 353 case LoongArch::ADDU16I_D: { 354 // Offset is simply an immediate operand. 355 int64_t Offset = Tail.getOperand(2).getImm(); 356 if (Tail.getOpcode() == LoongArch::ADDU16I_D) 357 Offset = SignExtend64<32>(Offset << 16); 358 359 // We might have two ADDIs in a row. 360 Register TailDestReg = Tail.getOperand(0).getReg(); 361 if (MRI->hasOneUse(TailDestReg)) { 362 MachineInstr &TailTail = *MRI->use_instr_begin(TailDestReg); 363 if (ST->is64Bit() && TailTail.getOpcode() == LoongArch::ADDI_W) 364 return false; 365 if (TailTail.getOpcode() == LoongArch::ADDI_W || 366 TailTail.getOpcode() == LoongArch::ADDI_D) { 367 Offset += TailTail.getOperand(2).getImm(); 368 LLVM_DEBUG(dbgs() << " Offset Instrs: " << Tail << TailTail); 369 foldOffset(Hi20, Lo12, Lo20, Hi12, Last, TailTail, Offset); 370 Tail.eraseFromParent(); 371 return true; 372 } 373 } 374 375 LLVM_DEBUG(dbgs() << " Offset Instr: " << Tail); 376 foldOffset(Hi20, Lo12, Lo20, Hi12, Last, Tail, Offset); 377 return true; 378 } 379 case LoongArch::ADD_W: 380 if (ST->is64Bit()) 381 return false; 382 [[fallthrough]]; 383 case LoongArch::ADD_D: 384 // The offset is too large to fit in the immediate field of ADDI. 385 return foldLargeOffset(Hi20, Lo12, Lo20, Hi12, Last, Tail, DestReg); 386 break; 387 } 388 389 return false; 390 } 391 392 // Memory access opcode mapping for transforms. 393 static unsigned getNewOpc(unsigned Op, bool isLarge) { 394 switch (Op) { 395 case LoongArch::LD_B: 396 return isLarge ? LoongArch::LDX_B : LoongArch::LD_B; 397 case LoongArch::LD_H: 398 return isLarge ? LoongArch::LDX_H : LoongArch::LD_H; 399 case LoongArch::LD_W: 400 case LoongArch::LDPTR_W: 401 return isLarge ? LoongArch::LDX_W : LoongArch::LD_W; 402 case LoongArch::LD_D: 403 case LoongArch::LDPTR_D: 404 return isLarge ? LoongArch::LDX_D : LoongArch::LD_D; 405 case LoongArch::LD_BU: 406 return isLarge ? LoongArch::LDX_BU : LoongArch::LD_BU; 407 case LoongArch::LD_HU: 408 return isLarge ? LoongArch::LDX_HU : LoongArch::LD_HU; 409 case LoongArch::LD_WU: 410 return isLarge ? LoongArch::LDX_WU : LoongArch::LD_WU; 411 case LoongArch::FLD_S: 412 return isLarge ? LoongArch::FLDX_S : LoongArch::FLD_S; 413 case LoongArch::FLD_D: 414 return isLarge ? LoongArch::FLDX_D : LoongArch::FLD_D; 415 case LoongArch::VLD: 416 return isLarge ? LoongArch::VLDX : LoongArch::VLD; 417 case LoongArch::XVLD: 418 return isLarge ? LoongArch::XVLDX : LoongArch::XVLD; 419 case LoongArch::VLDREPL_B: 420 return LoongArch::VLDREPL_B; 421 case LoongArch::XVLDREPL_B: 422 return LoongArch::XVLDREPL_B; 423 case LoongArch::ST_B: 424 return isLarge ? LoongArch::STX_B : LoongArch::ST_B; 425 case LoongArch::ST_H: 426 return isLarge ? LoongArch::STX_H : LoongArch::ST_H; 427 case LoongArch::ST_W: 428 case LoongArch::STPTR_W: 429 return isLarge ? LoongArch::STX_W : LoongArch::ST_W; 430 case LoongArch::ST_D: 431 case LoongArch::STPTR_D: 432 return isLarge ? LoongArch::STX_D : LoongArch::ST_D; 433 case LoongArch::FST_S: 434 return isLarge ? LoongArch::FSTX_S : LoongArch::FST_S; 435 case LoongArch::FST_D: 436 return isLarge ? LoongArch::FSTX_D : LoongArch::FST_D; 437 case LoongArch::VST: 438 return isLarge ? LoongArch::VSTX : LoongArch::VST; 439 case LoongArch::XVST: 440 return isLarge ? LoongArch::XVSTX : LoongArch::XVST; 441 default: 442 llvm_unreachable("Unexpected opcode for replacement"); 443 } 444 } 445 446 bool LoongArchMergeBaseOffsetOpt::foldIntoMemoryOps(MachineInstr &Hi20, 447 MachineInstr &Lo12, 448 MachineInstr *&Lo20, 449 MachineInstr *&Hi12, 450 MachineInstr *&Last) { 451 Register DestReg = 452 Last ? Last->getOperand(0).getReg() : Lo12.getOperand(0).getReg(); 453 454 // If all the uses are memory ops with the same offset, we can transform: 455 // 456 // 1. (small/medium): 457 // pcalau12i vreg1, %pc_hi20(s) 458 // addi.d vreg2, vreg1, %pc_lo12(s) 459 // ld.w vreg3, 8(vreg2) 460 // 461 // => 462 // 463 // pcalau12i vreg1, %pc_hi20(s+8) 464 // ld.w vreg3, vreg1, %pc_lo12(s+8)(vreg1) 465 // 466 // 2. (large): 467 // pcalau12i vreg1, %pc_hi20(s) 468 // addi.d vreg2, $zero, %pc_lo12(s) 469 // lu32i.d vreg3, vreg2, %pc64_lo20(s) 470 // lu52i.d vreg4, vreg3, %pc64_hi12(s) 471 // add.d vreg5, vreg4, vreg1 472 // ld.w vreg6, 8(vreg5) 473 // 474 // => 475 // 476 // pcalau12i vreg1, %pc_hi20(s+8) 477 // addi.d vreg2, $zero, %pc_lo12(s+8) 478 // lu32i.d vreg3, vreg2, %pc64_lo20(s+8) 479 // lu52i.d vreg4, vreg3, %pc64_hi12(s+8) 480 // ldx.w vreg6, vreg4, vreg1 481 482 std::optional<int64_t> CommonOffset; 483 DenseMap<const MachineInstr *, SmallVector<unsigned>> 484 InlineAsmMemoryOpIndexesMap; 485 for (const MachineInstr &UseMI : MRI->use_instructions(DestReg)) { 486 switch (UseMI.getOpcode()) { 487 default: 488 LLVM_DEBUG(dbgs() << "Not a load or store instruction: " << UseMI); 489 return false; 490 case LoongArch::VLDREPL_B: 491 case LoongArch::XVLDREPL_B: 492 // We can't do this for large pattern. 493 if (Last) 494 return false; 495 [[fallthrough]]; 496 case LoongArch::LD_B: 497 case LoongArch::LD_H: 498 case LoongArch::LD_W: 499 case LoongArch::LD_D: 500 case LoongArch::LD_BU: 501 case LoongArch::LD_HU: 502 case LoongArch::LD_WU: 503 case LoongArch::LDPTR_W: 504 case LoongArch::LDPTR_D: 505 case LoongArch::FLD_S: 506 case LoongArch::FLD_D: 507 case LoongArch::VLD: 508 case LoongArch::XVLD: 509 case LoongArch::ST_B: 510 case LoongArch::ST_H: 511 case LoongArch::ST_W: 512 case LoongArch::ST_D: 513 case LoongArch::STPTR_W: 514 case LoongArch::STPTR_D: 515 case LoongArch::FST_S: 516 case LoongArch::FST_D: 517 case LoongArch::VST: 518 case LoongArch::XVST: { 519 if (UseMI.getOperand(1).isFI()) 520 return false; 521 // Register defined by Lo should not be the value register. 522 if (DestReg == UseMI.getOperand(0).getReg()) 523 return false; 524 assert(DestReg == UseMI.getOperand(1).getReg() && 525 "Expected base address use"); 526 // All load/store instructions must use the same offset. 527 int64_t Offset = UseMI.getOperand(2).getImm(); 528 if (CommonOffset && Offset != CommonOffset) 529 return false; 530 CommonOffset = Offset; 531 break; 532 } 533 case LoongArch::INLINEASM: 534 case LoongArch::INLINEASM_BR: { 535 // We can't do this for large pattern. 536 if (Last) 537 return false; 538 SmallVector<unsigned> InlineAsmMemoryOpIndexes; 539 unsigned NumOps = 0; 540 for (unsigned I = InlineAsm::MIOp_FirstOperand; 541 I < UseMI.getNumOperands(); I += 1 + NumOps) { 542 const MachineOperand &FlagsMO = UseMI.getOperand(I); 543 // Should be an imm. 544 if (!FlagsMO.isImm()) 545 continue; 546 547 const InlineAsm::Flag Flags(FlagsMO.getImm()); 548 NumOps = Flags.getNumOperandRegisters(); 549 550 // Memory constraints have two operands. 551 if (NumOps != 2 || !Flags.isMemKind()) { 552 // If the register is used by something other than a memory contraint, 553 // we should not fold. 554 for (unsigned J = 0; J < NumOps; ++J) { 555 const MachineOperand &MO = UseMI.getOperand(I + 1 + J); 556 if (MO.isReg() && MO.getReg() == DestReg) 557 return false; 558 } 559 continue; 560 } 561 562 // We can only do this for constraint m. 563 if (Flags.getMemoryConstraintID() != InlineAsm::ConstraintCode::m) 564 return false; 565 566 const MachineOperand &AddrMO = UseMI.getOperand(I + 1); 567 if (!AddrMO.isReg() || AddrMO.getReg() != DestReg) 568 continue; 569 570 const MachineOperand &OffsetMO = UseMI.getOperand(I + 2); 571 if (!OffsetMO.isImm()) 572 continue; 573 574 // All inline asm memory operands must use the same offset. 575 int64_t Offset = OffsetMO.getImm(); 576 if (CommonOffset && Offset != CommonOffset) 577 return false; 578 CommonOffset = Offset; 579 InlineAsmMemoryOpIndexes.push_back(I + 1); 580 } 581 InlineAsmMemoryOpIndexesMap.insert( 582 std::make_pair(&UseMI, InlineAsmMemoryOpIndexes)); 583 break; 584 } 585 } 586 } 587 588 // We found a common offset. 589 // Update the offsets in global address lowering. 590 // We may have already folded some arithmetic so we need to add to any 591 // existing offset. 592 int64_t NewOffset = Hi20.getOperand(1).getOffset() + *CommonOffset; 593 // LA32 ignores the upper 32 bits. 594 if (!ST->is64Bit()) 595 NewOffset = SignExtend64<32>(NewOffset); 596 // We can only fold simm32 offsets. 597 if (!isInt<32>(NewOffset)) 598 return false; 599 600 // If optimized by this pass successfully, MO_RELAX bitmask target-flag should 601 // be removed from the code sequence. 602 // 603 // For example: 604 // pcalau12i $a0, %pc_hi20(symbol) 605 // addi.d $a0, $a0, %pc_lo12(symbol) 606 // ld.w $a0, $a0, 0 607 // 608 // => 609 // 610 // pcalau12i $a0, %pc_hi20(symbol) 611 // ld.w $a0, $a0, %pc_lo12(symbol) 612 // 613 // Code sequence optimized before can be relax by linker. But after being 614 // optimized, it cannot be relaxed any more. So MO_RELAX flag should not be 615 // carried by them. 616 Hi20.getOperand(1).setOffset(NewOffset); 617 Hi20.getOperand(1).setTargetFlags( 618 LoongArchII::getDirectFlags(Hi20.getOperand(1))); 619 MachineOperand &ImmOp = Lo12.getOperand(2); 620 ImmOp.setOffset(NewOffset); 621 ImmOp.setTargetFlags(LoongArchII::getDirectFlags(ImmOp)); 622 if (Lo20 && Hi12) { 623 Lo20->getOperand(2).setOffset(NewOffset); 624 Hi12->getOperand(2).setOffset(NewOffset); 625 } 626 627 // Update the immediate in the load/store instructions to add the offset. 628 const LoongArchInstrInfo &TII = *ST->getInstrInfo(); 629 for (MachineInstr &UseMI : 630 llvm::make_early_inc_range(MRI->use_instructions(DestReg))) { 631 if (UseMI.getOpcode() == LoongArch::INLINEASM || 632 UseMI.getOpcode() == LoongArch::INLINEASM_BR) { 633 auto &InlineAsmMemoryOpIndexes = InlineAsmMemoryOpIndexesMap[&UseMI]; 634 for (unsigned I : InlineAsmMemoryOpIndexes) { 635 MachineOperand &MO = UseMI.getOperand(I + 1); 636 switch (ImmOp.getType()) { 637 case MachineOperand::MO_GlobalAddress: 638 MO.ChangeToGA(ImmOp.getGlobal(), ImmOp.getOffset(), 639 LoongArchII::getDirectFlags(ImmOp)); 640 break; 641 case MachineOperand::MO_MCSymbol: 642 MO.ChangeToMCSymbol(ImmOp.getMCSymbol(), 643 LoongArchII::getDirectFlags(ImmOp)); 644 MO.setOffset(ImmOp.getOffset()); 645 break; 646 case MachineOperand::MO_BlockAddress: 647 MO.ChangeToBA(ImmOp.getBlockAddress(), ImmOp.getOffset(), 648 LoongArchII::getDirectFlags(ImmOp)); 649 break; 650 default: 651 report_fatal_error("unsupported machine operand type"); 652 break; 653 } 654 } 655 } else { 656 UseMI.setDesc(TII.get(getNewOpc(UseMI.getOpcode(), Last))); 657 if (Last) { 658 UseMI.removeOperand(2); 659 UseMI.removeOperand(1); 660 UseMI.addOperand(Last->getOperand(1)); 661 UseMI.addOperand(Last->getOperand(2)); 662 UseMI.getOperand(1).setIsKill(false); 663 UseMI.getOperand(2).setIsKill(false); 664 } else { 665 UseMI.removeOperand(2); 666 UseMI.addOperand(ImmOp); 667 } 668 } 669 } 670 671 if (Last) { 672 Last->eraseFromParent(); 673 return true; 674 } 675 676 MRI->replaceRegWith(Lo12.getOperand(0).getReg(), Hi20.getOperand(0).getReg()); 677 Lo12.eraseFromParent(); 678 return true; 679 } 680 681 bool LoongArchMergeBaseOffsetOpt::runOnMachineFunction(MachineFunction &Fn) { 682 if (skipFunction(Fn.getFunction())) 683 return false; 684 685 ST = &Fn.getSubtarget<LoongArchSubtarget>(); 686 687 bool MadeChange = false; 688 MRI = &Fn.getRegInfo(); 689 for (MachineBasicBlock &MBB : Fn) { 690 LLVM_DEBUG(dbgs() << "MBB: " << MBB.getName() << "\n"); 691 for (MachineInstr &Hi20 : MBB) { 692 MachineInstr *Lo12 = nullptr; 693 MachineInstr *Lo20 = nullptr; 694 MachineInstr *Hi12 = nullptr; 695 MachineInstr *Last = nullptr; 696 if (!detectFoldable(Hi20, Lo12, Lo20, Hi12, Last)) 697 continue; 698 MadeChange |= detectAndFoldOffset(Hi20, *Lo12, Lo20, Hi12, Last); 699 MadeChange |= foldIntoMemoryOps(Hi20, *Lo12, Lo20, Hi12, Last); 700 } 701 } 702 703 return MadeChange; 704 } 705 706 /// Returns an instance of the Merge Base Offset Optimization pass. 707 FunctionPass *llvm::createLoongArchMergeBaseOffsetOptPass() { 708 return new LoongArchMergeBaseOffsetOpt(); 709 } 710