1b2e69f52Shev //===---- LoongArchMergeBaseOffset.cpp - Optimise address calculations ----===// 2b2e69f52Shev // 3b2e69f52Shev // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4b2e69f52Shev // See https://llvm.org/LICENSE.txt for license information. 5b2e69f52Shev // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6b2e69f52Shev // 7b2e69f52Shev //===----------------------------------------------------------------------===// 8b2e69f52Shev // 9b2e69f52Shev // Merge the offset of address calculation into the offset field 10b2e69f52Shev // of instructions in a global address lowering sequence. 11b2e69f52Shev // 12b2e69f52Shev //===----------------------------------------------------------------------===// 13b2e69f52Shev 14b2e69f52Shev #include "LoongArch.h" 15b2e69f52Shev #include "LoongArchTargetMachine.h" 16b2e69f52Shev #include "llvm/CodeGen/MachineFunctionPass.h" 17b2e69f52Shev #include "llvm/CodeGen/Passes.h" 18b2e69f52Shev #include "llvm/MC/TargetRegistry.h" 19b2e69f52Shev #include "llvm/Support/Debug.h" 20b2e69f52Shev #include "llvm/Target/TargetOptions.h" 21b2e69f52Shev #include <optional> 22b2e69f52Shev 23b2e69f52Shev using namespace llvm; 24b2e69f52Shev 25b2e69f52Shev #define DEBUG_TYPE "loongarch-merge-base-offset" 26b2e69f52Shev #define LoongArch_MERGE_BASE_OFFSET_NAME "LoongArch Merge Base Offset" 27b2e69f52Shev 28b2e69f52Shev namespace { 29b2e69f52Shev 30b2e69f52Shev class LoongArchMergeBaseOffsetOpt : public MachineFunctionPass { 31b2e69f52Shev const LoongArchSubtarget *ST = nullptr; 32b2e69f52Shev MachineRegisterInfo *MRI; 33b2e69f52Shev 34b2e69f52Shev public: 35b2e69f52Shev static char ID; 36b2e69f52Shev bool runOnMachineFunction(MachineFunction &Fn) override; 37b2e69f52Shev bool detectFoldable(MachineInstr &Hi20, MachineInstr *&Lo12, 38b2e69f52Shev MachineInstr *&Lo20, MachineInstr *&Hi12, 39b2e69f52Shev MachineInstr *&Last); 40b2e69f52Shev 41b2e69f52Shev bool detectAndFoldOffset(MachineInstr &Hi20, MachineInstr &Lo12, 42b2e69f52Shev MachineInstr *&Lo20, MachineInstr *&Hi12, 43b2e69f52Shev MachineInstr *&Last); 44b2e69f52Shev void foldOffset(MachineInstr &Hi20, MachineInstr &Lo12, MachineInstr *&Lo20, 45b2e69f52Shev MachineInstr *&Hi12, MachineInstr *&Last, MachineInstr &Tail, 46b2e69f52Shev int64_t Offset); 47b2e69f52Shev bool foldLargeOffset(MachineInstr &Hi20, MachineInstr &Lo12, 48b2e69f52Shev MachineInstr *&Lo20, MachineInstr *&Hi12, 49b2e69f52Shev MachineInstr *&Last, MachineInstr &TailAdd, 50b2e69f52Shev Register GAReg); 51b2e69f52Shev 52b2e69f52Shev bool foldIntoMemoryOps(MachineInstr &Hi20, MachineInstr &Lo12, 53b2e69f52Shev MachineInstr *&Lo20, MachineInstr *&Hi12, 54b2e69f52Shev MachineInstr *&Last); 55b2e69f52Shev 56b2e69f52Shev LoongArchMergeBaseOffsetOpt() : MachineFunctionPass(ID) {} 57b2e69f52Shev 58b2e69f52Shev MachineFunctionProperties getRequiredProperties() const override { 59b2e69f52Shev return MachineFunctionProperties().set( 60b2e69f52Shev MachineFunctionProperties::Property::IsSSA); 61b2e69f52Shev } 62b2e69f52Shev 63b2e69f52Shev void getAnalysisUsage(AnalysisUsage &AU) const override { 64b2e69f52Shev AU.setPreservesCFG(); 65b2e69f52Shev MachineFunctionPass::getAnalysisUsage(AU); 66b2e69f52Shev } 67b2e69f52Shev 68b2e69f52Shev StringRef getPassName() const override { 69b2e69f52Shev return LoongArch_MERGE_BASE_OFFSET_NAME; 70b2e69f52Shev } 71b2e69f52Shev }; 72b2e69f52Shev } // end anonymous namespace 73b2e69f52Shev 74b2e69f52Shev char LoongArchMergeBaseOffsetOpt::ID = 0; 75b2e69f52Shev INITIALIZE_PASS(LoongArchMergeBaseOffsetOpt, DEBUG_TYPE, 76b2e69f52Shev LoongArch_MERGE_BASE_OFFSET_NAME, false, false) 77b2e69f52Shev 78b2e69f52Shev // Detect either of the patterns: 79b2e69f52Shev // 80b2e69f52Shev // 1. (small/medium): 81b2e69f52Shev // pcalau12i vreg1, %pc_hi20(s) 82b2e69f52Shev // addi.d vreg2, vreg1, %pc_lo12(s) 83b2e69f52Shev // 84b2e69f52Shev // 2. (large): 85b2e69f52Shev // pcalau12i vreg1, %pc_hi20(s) 86b2e69f52Shev // addi.d vreg2, $zero, %pc_lo12(s) 87b2e69f52Shev // lu32i.d vreg3, vreg2, %pc64_lo20(s) 88b2e69f52Shev // lu52i.d vreg4, vreg3, %pc64_hi12(s) 89b2e69f52Shev // add.d vreg5, vreg4, vreg1 90b2e69f52Shev 91b2e69f52Shev // The pattern is only accepted if: 92b2e69f52Shev // 1) For small and medium pattern, the first instruction has only one use, 93b2e69f52Shev // which is the ADDI. 94b2e69f52Shev // 2) For large pattern, the first four instructions each have only one use, 95b2e69f52Shev // and the user of the fourth instruction is ADD. 96b2e69f52Shev // 3) The address operands have the appropriate type, reflecting the 97b2e69f52Shev // lowering of a global address or constant pool using the pattern. 98b2e69f52Shev // 4) The offset value in the Global Address or Constant Pool is 0. 99b2e69f52Shev bool LoongArchMergeBaseOffsetOpt::detectFoldable(MachineInstr &Hi20, 100b2e69f52Shev MachineInstr *&Lo12, 101b2e69f52Shev MachineInstr *&Lo20, 102b2e69f52Shev MachineInstr *&Hi12, 103b2e69f52Shev MachineInstr *&Last) { 104b2e69f52Shev if (Hi20.getOpcode() != LoongArch::PCALAU12I) 105b2e69f52Shev return false; 106b2e69f52Shev 107b2e69f52Shev const MachineOperand &Hi20Op1 = Hi20.getOperand(1); 108*0288d065SZhaoQi if (LoongArchII::getDirectFlags(Hi20Op1) != LoongArchII::MO_PCREL_HI) 109b2e69f52Shev return false; 110b2e69f52Shev 111b2e69f52Shev auto isGlobalOrCPIOrBlockAddress = [](const MachineOperand &Op) { 112b2e69f52Shev return Op.isGlobal() || Op.isCPI() || Op.isBlockAddress(); 113b2e69f52Shev }; 114b2e69f52Shev 115b2e69f52Shev if (!isGlobalOrCPIOrBlockAddress(Hi20Op1) || Hi20Op1.getOffset() != 0) 116b2e69f52Shev return false; 117b2e69f52Shev 118b2e69f52Shev Register HiDestReg = Hi20.getOperand(0).getReg(); 119b2e69f52Shev if (!MRI->hasOneUse(HiDestReg)) 120b2e69f52Shev return false; 121b2e69f52Shev 122b2e69f52Shev MachineInstr *UseInst = &*MRI->use_instr_begin(HiDestReg); 123b2e69f52Shev if (UseInst->getOpcode() != LoongArch::ADD_D) { 124b2e69f52Shev Lo12 = UseInst; 125b2e69f52Shev if ((ST->is64Bit() && Lo12->getOpcode() != LoongArch::ADDI_D) || 126b2e69f52Shev (!ST->is64Bit() && Lo12->getOpcode() != LoongArch::ADDI_W)) 127b2e69f52Shev return false; 128b2e69f52Shev } else { 129b2e69f52Shev assert(ST->is64Bit()); 130b2e69f52Shev Last = UseInst; 131b2e69f52Shev 132b2e69f52Shev Register LastOp1Reg = Last->getOperand(1).getReg(); 133b2e69f52Shev if (!LastOp1Reg.isVirtual()) 134b2e69f52Shev return false; 135b2e69f52Shev Hi12 = MRI->getVRegDef(LastOp1Reg); 136b2e69f52Shev const MachineOperand &Hi12Op2 = Hi12->getOperand(2); 137b2e69f52Shev if (Hi12Op2.getTargetFlags() != LoongArchII::MO_PCREL64_HI) 138b2e69f52Shev return false; 139b2e69f52Shev if (!isGlobalOrCPIOrBlockAddress(Hi12Op2) || Hi12Op2.getOffset() != 0) 140b2e69f52Shev return false; 141b2e69f52Shev if (!MRI->hasOneUse(Hi12->getOperand(0).getReg())) 142b2e69f52Shev return false; 143b2e69f52Shev 144b2e69f52Shev Lo20 = MRI->getVRegDef(Hi12->getOperand(1).getReg()); 145b2e69f52Shev const MachineOperand &Lo20Op2 = Lo20->getOperand(2); 146b2e69f52Shev if (Lo20Op2.getTargetFlags() != LoongArchII::MO_PCREL64_LO) 147b2e69f52Shev return false; 148b2e69f52Shev if (!isGlobalOrCPIOrBlockAddress(Lo20Op2) || Lo20Op2.getOffset() != 0) 149b2e69f52Shev return false; 150b2e69f52Shev if (!MRI->hasOneUse(Lo20->getOperand(0).getReg())) 151b2e69f52Shev return false; 152b2e69f52Shev 153b2e69f52Shev Lo12 = MRI->getVRegDef(Lo20->getOperand(1).getReg()); 154b2e69f52Shev if (!MRI->hasOneUse(Lo12->getOperand(0).getReg())) 155b2e69f52Shev return false; 156b2e69f52Shev } 157b2e69f52Shev 158b2e69f52Shev const MachineOperand &Lo12Op2 = Lo12->getOperand(2); 159b2e69f52Shev assert(Hi20.getOpcode() == LoongArch::PCALAU12I); 160*0288d065SZhaoQi if (LoongArchII::getDirectFlags(Lo12Op2) != LoongArchII::MO_PCREL_LO || 161b2e69f52Shev !(isGlobalOrCPIOrBlockAddress(Lo12Op2) || Lo12Op2.isMCSymbol()) || 162b2e69f52Shev Lo12Op2.getOffset() != 0) 163b2e69f52Shev return false; 164b2e69f52Shev 165b2e69f52Shev if (Hi20Op1.isGlobal()) { 166b2e69f52Shev LLVM_DEBUG(dbgs() << " Found lowered global address: " 167b2e69f52Shev << *Hi20Op1.getGlobal() << "\n"); 168b2e69f52Shev } else if (Hi20Op1.isBlockAddress()) { 169b2e69f52Shev LLVM_DEBUG(dbgs() << " Found lowered basic address: " 170b2e69f52Shev << *Hi20Op1.getBlockAddress() << "\n"); 171b2e69f52Shev } else if (Hi20Op1.isCPI()) { 172b2e69f52Shev LLVM_DEBUG(dbgs() << " Found lowered constant pool: " << Hi20Op1.getIndex() 173b2e69f52Shev << "\n"); 174b2e69f52Shev } 175b2e69f52Shev 176b2e69f52Shev return true; 177b2e69f52Shev } 178b2e69f52Shev 179b2e69f52Shev // Update the offset in Hi20, Lo12, Lo20 and Hi12 instructions. 180b2e69f52Shev // Delete the tail instruction and update all the uses to use the 181b2e69f52Shev // output from Last. 182b2e69f52Shev void LoongArchMergeBaseOffsetOpt::foldOffset( 183b2e69f52Shev MachineInstr &Hi20, MachineInstr &Lo12, MachineInstr *&Lo20, 184b2e69f52Shev MachineInstr *&Hi12, MachineInstr *&Last, MachineInstr &Tail, 185b2e69f52Shev int64_t Offset) { 186b2e69f52Shev // Put the offset back in Hi and the Lo 187b2e69f52Shev Hi20.getOperand(1).setOffset(Offset); 188b2e69f52Shev Lo12.getOperand(2).setOffset(Offset); 189b2e69f52Shev if (Lo20 && Hi12) { 190b2e69f52Shev Lo20->getOperand(2).setOffset(Offset); 191b2e69f52Shev Hi12->getOperand(2).setOffset(Offset); 192b2e69f52Shev } 193b2e69f52Shev // Delete the tail instruction. 194b2e69f52Shev MachineInstr *Def = Last ? Last : &Lo12; 195b2e69f52Shev MRI->constrainRegClass(Def->getOperand(0).getReg(), 196b2e69f52Shev MRI->getRegClass(Tail.getOperand(0).getReg())); 197b2e69f52Shev MRI->replaceRegWith(Tail.getOperand(0).getReg(), Def->getOperand(0).getReg()); 198b2e69f52Shev Tail.eraseFromParent(); 199b2e69f52Shev LLVM_DEBUG(dbgs() << " Merged offset " << Offset << " into base.\n" 200b2e69f52Shev << " " << Hi20 << " " << Lo12;); 201b2e69f52Shev if (Lo20 && Hi12) { 202b2e69f52Shev LLVM_DEBUG(dbgs() << " " << *Lo20 << " " << *Hi12;); 203b2e69f52Shev } 204b2e69f52Shev } 205b2e69f52Shev 206b2e69f52Shev // Detect patterns for large offsets that are passed into an ADD instruction. 207b2e69f52Shev // If the pattern is found, updates the offset in Hi20, Lo12, Lo20 and Hi12 208b2e69f52Shev // instructions and deletes TailAdd and the instructions that produced the 209b2e69f52Shev // offset. 210b2e69f52Shev // 211b225b15aShev // (The instructions marked with "!" are not necessarily present) 212b225b15aShev // 213b2e69f52Shev // Base address lowering is of the form: 214b2e69f52Shev // Hi20: pcalau12i vreg1, %pc_hi20(s) 215b225b15aShev // +- Lo12: addi.d vreg2, vreg1, %pc_lo12(s) 216b225b15aShev // | Lo20: lu32i.d vreg2, %pc64_lo20(s) ! 217b225b15aShev // +- Hi12: lu52i.d vreg2, vreg2, %pc64_hi12(s) ! 218b225b15aShev // | 219b225b15aShev // | The large offset can be one of the forms: 220b225b15aShev // | 221b225b15aShev // +-> 1) Offset that has non zero bits in Hi20 and Lo12 bits: 222b225b15aShev // | OffsetHi20: lu12i.w vreg3, 4 223b225b15aShev // | OffsetLo12: ori voff, vreg3, 188 ------------------+ 224b225b15aShev // | | 225b225b15aShev // +-> 2) Offset that has non zero bits in Hi20 bits only: | 226b225b15aShev // | OffsetHi20: lu12i.w voff, 128 ------------------+ 227b225b15aShev // | | 228b225b15aShev // +-> 3) Offset that has non zero bits in Lo20 bits: | 229b225b15aShev // | OffsetHi20: lu12i.w vreg3, 121 ! | 230b225b15aShev // | OffsetLo12: ori voff, vreg3, 122 ! | 231b225b15aShev // | OffsetLo20: lu32i.d voff, 123 ------------------+ 232b225b15aShev // +-> 4) Offset that has non zero bits in Hi12 bits: | 233b225b15aShev // OffsetHi20: lu12i.w vreg3, 121 ! | 234b225b15aShev // OffsetLo12: ori voff, vreg3, 122 ! | 235b225b15aShev // OffsetLo20: lu32i.d vreg3, 123 ! | 236b225b15aShev // OffsetHi12: lu52i.d voff, vrg3, 124 ------------------+ 237b225b15aShev // | 238b225b15aShev // TailAdd: add.d vreg4, vreg2, voff <------------------+ 239b225b15aShev // 240b2e69f52Shev bool LoongArchMergeBaseOffsetOpt::foldLargeOffset( 241b2e69f52Shev MachineInstr &Hi20, MachineInstr &Lo12, MachineInstr *&Lo20, 242b2e69f52Shev MachineInstr *&Hi12, MachineInstr *&Last, MachineInstr &TailAdd, 243b2e69f52Shev Register GAReg) { 244b2e69f52Shev assert((TailAdd.getOpcode() == LoongArch::ADD_W || 245b2e69f52Shev TailAdd.getOpcode() == LoongArch::ADD_D) && 246b2e69f52Shev "Expected ADD instruction!"); 247b2e69f52Shev Register Rs = TailAdd.getOperand(1).getReg(); 248b2e69f52Shev Register Rt = TailAdd.getOperand(2).getReg(); 249b2e69f52Shev Register Reg = Rs == GAReg ? Rt : Rs; 250b225b15aShev SmallVector<MachineInstr *, 4> Instrs; 251b225b15aShev int64_t Offset = 0; 252b225b15aShev int64_t Mask = -1; 253b225b15aShev 254b225b15aShev // This can point to one of [ORI, LU12I.W, LU32I.D, LU52I.D]: 255b225b15aShev for (int i = 0; i < 4; i++) { 256b225b15aShev // Handle Reg is R0. 257b225b15aShev if (Reg == LoongArch::R0) 258b225b15aShev break; 259b2e69f52Shev 260b2e69f52Shev // Can't fold if the register has more than one use. 261b2e69f52Shev if (!Reg.isVirtual() || !MRI->hasOneUse(Reg)) 262b2e69f52Shev return false; 263b2e69f52Shev 264b225b15aShev MachineInstr *Curr = MRI->getVRegDef(Reg); 265b225b15aShev if (!Curr) 266b225b15aShev break; 267b225b15aShev 268b225b15aShev switch (Curr->getOpcode()) { 269b225b15aShev default: 270b225b15aShev // Can't fold if the instruction opcode is unexpected. 271b225b15aShev return false; 272b225b15aShev case LoongArch::ORI: { 273b225b15aShev MachineOperand ImmOp = Curr->getOperand(2); 274b225b15aShev if (ImmOp.getTargetFlags() != LoongArchII::MO_None) 275b225b15aShev return false; 276b225b15aShev Offset += ImmOp.getImm(); 277b225b15aShev Reg = Curr->getOperand(1).getReg(); 278b225b15aShev Instrs.push_back(Curr); 279b225b15aShev break; 280b225b15aShev } 281b225b15aShev case LoongArch::LU12I_W: { 282b225b15aShev MachineOperand ImmOp = Curr->getOperand(1); 283b225b15aShev if (ImmOp.getTargetFlags() != LoongArchII::MO_None) 284b225b15aShev return false; 285b225b15aShev Offset += SignExtend64<32>(ImmOp.getImm() << 12) & Mask; 286b225b15aShev Reg = LoongArch::R0; 287b225b15aShev Instrs.push_back(Curr); 288b225b15aShev break; 289b225b15aShev } 290b225b15aShev case LoongArch::LU32I_D: { 291b225b15aShev MachineOperand ImmOp = Curr->getOperand(2); 292b225b15aShev if (ImmOp.getTargetFlags() != LoongArchII::MO_None || !Lo20) 293b225b15aShev return false; 294b225b15aShev Offset += SignExtend64<52>(ImmOp.getImm() << 32) & Mask; 295b225b15aShev Mask ^= 0x000FFFFF00000000ULL; 296b225b15aShev Reg = Curr->getOperand(1).getReg(); 297b225b15aShev Instrs.push_back(Curr); 298b225b15aShev break; 299b225b15aShev } 300b225b15aShev case LoongArch::LU52I_D: { 301b225b15aShev MachineOperand ImmOp = Curr->getOperand(2); 302b225b15aShev if (ImmOp.getTargetFlags() != LoongArchII::MO_None || !Hi12) 303b225b15aShev return false; 304b225b15aShev Offset += ImmOp.getImm() << 52; 305b225b15aShev Mask ^= 0xFFF0000000000000ULL; 306b225b15aShev Reg = Curr->getOperand(1).getReg(); 307b225b15aShev Instrs.push_back(Curr); 308b225b15aShev break; 309b225b15aShev } 310b225b15aShev } 311b2e69f52Shev } 312b2e69f52Shev 313b225b15aShev // Can't fold if the offset is not extracted. 314b225b15aShev if (!Offset) 315b2e69f52Shev return false; 316b225b15aShev 317b2e69f52Shev foldOffset(Hi20, Lo12, Lo20, Hi12, Last, TailAdd, Offset); 318b225b15aShev LLVM_DEBUG(dbgs() << " Offset Instrs:\n"); 319b225b15aShev for (auto I : Instrs) { 320b225b15aShev LLVM_DEBUG(dbgs() << " " << *I); 321b225b15aShev I->eraseFromParent(); 322b2e69f52Shev } 323b225b15aShev 324b225b15aShev return true; 325b2e69f52Shev } 326b2e69f52Shev 327b2e69f52Shev bool LoongArchMergeBaseOffsetOpt::detectAndFoldOffset(MachineInstr &Hi20, 328b2e69f52Shev MachineInstr &Lo12, 329b2e69f52Shev MachineInstr *&Lo20, 330b2e69f52Shev MachineInstr *&Hi12, 331b2e69f52Shev MachineInstr *&Last) { 332b2e69f52Shev Register DestReg = 333b2e69f52Shev Last ? Last->getOperand(0).getReg() : Lo12.getOperand(0).getReg(); 334b2e69f52Shev 335b2e69f52Shev // Look for arithmetic instructions we can get an offset from. 336b2e69f52Shev // We might be able to remove the arithmetic instructions by folding the 337b2e69f52Shev // offset into the PCALAU12I+(ADDI/ADDI+LU32I+LU52I). 338b2e69f52Shev if (!MRI->hasOneUse(DestReg)) 339b2e69f52Shev return false; 340b2e69f52Shev 341b2e69f52Shev // DestReg has only one use. 342b2e69f52Shev MachineInstr &Tail = *MRI->use_instr_begin(DestReg); 343b2e69f52Shev switch (Tail.getOpcode()) { 344b2e69f52Shev default: 345b2e69f52Shev LLVM_DEBUG(dbgs() << "Don't know how to get offset from this instr:" 346b2e69f52Shev << Tail); 347b2e69f52Shev break; 348b2e69f52Shev case LoongArch::ADDI_W: 349b2e69f52Shev if (ST->is64Bit()) 350b2e69f52Shev return false; 351b2e69f52Shev [[fallthrough]]; 352b2e69f52Shev case LoongArch::ADDI_D: 353b2e69f52Shev case LoongArch::ADDU16I_D: { 354b2e69f52Shev // Offset is simply an immediate operand. 355b2e69f52Shev int64_t Offset = Tail.getOperand(2).getImm(); 356b2e69f52Shev if (Tail.getOpcode() == LoongArch::ADDU16I_D) 357b2e69f52Shev Offset = SignExtend64<32>(Offset << 16); 358b2e69f52Shev 359b2e69f52Shev // We might have two ADDIs in a row. 360b2e69f52Shev Register TailDestReg = Tail.getOperand(0).getReg(); 361b2e69f52Shev if (MRI->hasOneUse(TailDestReg)) { 362b2e69f52Shev MachineInstr &TailTail = *MRI->use_instr_begin(TailDestReg); 363b2e69f52Shev if (ST->is64Bit() && TailTail.getOpcode() == LoongArch::ADDI_W) 364b2e69f52Shev return false; 365b2e69f52Shev if (TailTail.getOpcode() == LoongArch::ADDI_W || 366b2e69f52Shev TailTail.getOpcode() == LoongArch::ADDI_D) { 367b2e69f52Shev Offset += TailTail.getOperand(2).getImm(); 368b2e69f52Shev LLVM_DEBUG(dbgs() << " Offset Instrs: " << Tail << TailTail); 369b2e69f52Shev foldOffset(Hi20, Lo12, Lo20, Hi12, Last, TailTail, Offset); 370b2e69f52Shev Tail.eraseFromParent(); 371b2e69f52Shev return true; 372b2e69f52Shev } 373b2e69f52Shev } 374b2e69f52Shev 375b2e69f52Shev LLVM_DEBUG(dbgs() << " Offset Instr: " << Tail); 376b2e69f52Shev foldOffset(Hi20, Lo12, Lo20, Hi12, Last, Tail, Offset); 377b2e69f52Shev return true; 378b2e69f52Shev } 379b2e69f52Shev case LoongArch::ADD_W: 380b2e69f52Shev if (ST->is64Bit()) 381b2e69f52Shev return false; 382b2e69f52Shev [[fallthrough]]; 383b2e69f52Shev case LoongArch::ADD_D: 384b2e69f52Shev // The offset is too large to fit in the immediate field of ADDI. 385b2e69f52Shev return foldLargeOffset(Hi20, Lo12, Lo20, Hi12, Last, Tail, DestReg); 386b2e69f52Shev break; 387b2e69f52Shev } 388b2e69f52Shev 389b2e69f52Shev return false; 390b2e69f52Shev } 391b2e69f52Shev 392b2e69f52Shev // Memory access opcode mapping for transforms. 393b2e69f52Shev static unsigned getNewOpc(unsigned Op, bool isLarge) { 394b2e69f52Shev switch (Op) { 395b2e69f52Shev case LoongArch::LD_B: 396b2e69f52Shev return isLarge ? LoongArch::LDX_B : LoongArch::LD_B; 397b2e69f52Shev case LoongArch::LD_H: 398b2e69f52Shev return isLarge ? LoongArch::LDX_H : LoongArch::LD_H; 399b2e69f52Shev case LoongArch::LD_W: 400b2e69f52Shev case LoongArch::LDPTR_W: 401b2e69f52Shev return isLarge ? LoongArch::LDX_W : LoongArch::LD_W; 402b2e69f52Shev case LoongArch::LD_D: 403b2e69f52Shev case LoongArch::LDPTR_D: 404b2e69f52Shev return isLarge ? LoongArch::LDX_D : LoongArch::LD_D; 405b2e69f52Shev case LoongArch::LD_BU: 406b2e69f52Shev return isLarge ? LoongArch::LDX_BU : LoongArch::LD_BU; 407b2e69f52Shev case LoongArch::LD_HU: 408b2e69f52Shev return isLarge ? LoongArch::LDX_HU : LoongArch::LD_HU; 409b2e69f52Shev case LoongArch::LD_WU: 410b2e69f52Shev return isLarge ? LoongArch::LDX_WU : LoongArch::LD_WU; 411b2e69f52Shev case LoongArch::FLD_S: 412b2e69f52Shev return isLarge ? LoongArch::FLDX_S : LoongArch::FLD_S; 413b2e69f52Shev case LoongArch::FLD_D: 414b2e69f52Shev return isLarge ? LoongArch::FLDX_D : LoongArch::FLD_D; 415985d64b0Shev case LoongArch::VLD: 416985d64b0Shev return isLarge ? LoongArch::VLDX : LoongArch::VLD; 417985d64b0Shev case LoongArch::XVLD: 418985d64b0Shev return isLarge ? LoongArch::XVLDX : LoongArch::XVLD; 419985d64b0Shev case LoongArch::VLDREPL_B: 420985d64b0Shev return LoongArch::VLDREPL_B; 421985d64b0Shev case LoongArch::XVLDREPL_B: 422985d64b0Shev return LoongArch::XVLDREPL_B; 423b2e69f52Shev case LoongArch::ST_B: 424b2e69f52Shev return isLarge ? LoongArch::STX_B : LoongArch::ST_B; 425b2e69f52Shev case LoongArch::ST_H: 426b2e69f52Shev return isLarge ? LoongArch::STX_H : LoongArch::ST_H; 427b2e69f52Shev case LoongArch::ST_W: 428b2e69f52Shev case LoongArch::STPTR_W: 429b2e69f52Shev return isLarge ? LoongArch::STX_W : LoongArch::ST_W; 430b2e69f52Shev case LoongArch::ST_D: 431b2e69f52Shev case LoongArch::STPTR_D: 432b2e69f52Shev return isLarge ? LoongArch::STX_D : LoongArch::ST_D; 433b2e69f52Shev case LoongArch::FST_S: 434b2e69f52Shev return isLarge ? LoongArch::FSTX_S : LoongArch::FST_S; 435b2e69f52Shev case LoongArch::FST_D: 436b2e69f52Shev return isLarge ? LoongArch::FSTX_D : LoongArch::FST_D; 437985d64b0Shev case LoongArch::VST: 438985d64b0Shev return isLarge ? LoongArch::VSTX : LoongArch::VST; 439985d64b0Shev case LoongArch::XVST: 440985d64b0Shev return isLarge ? LoongArch::XVSTX : LoongArch::XVST; 441b2e69f52Shev default: 442b2e69f52Shev llvm_unreachable("Unexpected opcode for replacement"); 443b2e69f52Shev } 444b2e69f52Shev } 445b2e69f52Shev 446b2e69f52Shev bool LoongArchMergeBaseOffsetOpt::foldIntoMemoryOps(MachineInstr &Hi20, 447b2e69f52Shev MachineInstr &Lo12, 448b2e69f52Shev MachineInstr *&Lo20, 449b2e69f52Shev MachineInstr *&Hi12, 450b2e69f52Shev MachineInstr *&Last) { 451b2e69f52Shev Register DestReg = 452b2e69f52Shev Last ? Last->getOperand(0).getReg() : Lo12.getOperand(0).getReg(); 453b2e69f52Shev 454b2e69f52Shev // If all the uses are memory ops with the same offset, we can transform: 455b2e69f52Shev // 456b2e69f52Shev // 1. (small/medium): 457b2e69f52Shev // pcalau12i vreg1, %pc_hi20(s) 458b2e69f52Shev // addi.d vreg2, vreg1, %pc_lo12(s) 459b2e69f52Shev // ld.w vreg3, 8(vreg2) 460b2e69f52Shev // 461b2e69f52Shev // => 462b2e69f52Shev // 463b2e69f52Shev // pcalau12i vreg1, %pc_hi20(s+8) 464b2e69f52Shev // ld.w vreg3, vreg1, %pc_lo12(s+8)(vreg1) 465b2e69f52Shev // 466b2e69f52Shev // 2. (large): 467b2e69f52Shev // pcalau12i vreg1, %pc_hi20(s) 468b2e69f52Shev // addi.d vreg2, $zero, %pc_lo12(s) 469b2e69f52Shev // lu32i.d vreg3, vreg2, %pc64_lo20(s) 470b2e69f52Shev // lu52i.d vreg4, vreg3, %pc64_hi12(s) 471b2e69f52Shev // add.d vreg5, vreg4, vreg1 472b2e69f52Shev // ld.w vreg6, 8(vreg5) 473b2e69f52Shev // 474b2e69f52Shev // => 475b2e69f52Shev // 476b2e69f52Shev // pcalau12i vreg1, %pc_hi20(s+8) 477b2e69f52Shev // addi.d vreg2, $zero, %pc_lo12(s+8) 478b2e69f52Shev // lu32i.d vreg3, vreg2, %pc64_lo20(s+8) 479b2e69f52Shev // lu52i.d vreg4, vreg3, %pc64_hi12(s+8) 480b2e69f52Shev // ldx.w vreg6, vreg4, vreg1 481b2e69f52Shev 482b2e69f52Shev std::optional<int64_t> CommonOffset; 483b2e69f52Shev DenseMap<const MachineInstr *, SmallVector<unsigned>> 484b2e69f52Shev InlineAsmMemoryOpIndexesMap; 485b2e69f52Shev for (const MachineInstr &UseMI : MRI->use_instructions(DestReg)) { 486b2e69f52Shev switch (UseMI.getOpcode()) { 487b2e69f52Shev default: 488b2e69f52Shev LLVM_DEBUG(dbgs() << "Not a load or store instruction: " << UseMI); 489b2e69f52Shev return false; 490985d64b0Shev case LoongArch::VLDREPL_B: 491985d64b0Shev case LoongArch::XVLDREPL_B: 492985d64b0Shev // We can't do this for large pattern. 493985d64b0Shev if (Last) 494985d64b0Shev return false; 495985d64b0Shev [[fallthrough]]; 496b2e69f52Shev case LoongArch::LD_B: 497b2e69f52Shev case LoongArch::LD_H: 498b2e69f52Shev case LoongArch::LD_W: 499b2e69f52Shev case LoongArch::LD_D: 500b2e69f52Shev case LoongArch::LD_BU: 501b2e69f52Shev case LoongArch::LD_HU: 502b2e69f52Shev case LoongArch::LD_WU: 503b2e69f52Shev case LoongArch::LDPTR_W: 504b2e69f52Shev case LoongArch::LDPTR_D: 505b2e69f52Shev case LoongArch::FLD_S: 506b2e69f52Shev case LoongArch::FLD_D: 507985d64b0Shev case LoongArch::VLD: 508985d64b0Shev case LoongArch::XVLD: 509b2e69f52Shev case LoongArch::ST_B: 510b2e69f52Shev case LoongArch::ST_H: 511b2e69f52Shev case LoongArch::ST_W: 512b2e69f52Shev case LoongArch::ST_D: 513b2e69f52Shev case LoongArch::STPTR_W: 514b2e69f52Shev case LoongArch::STPTR_D: 515b2e69f52Shev case LoongArch::FST_S: 516985d64b0Shev case LoongArch::FST_D: 517985d64b0Shev case LoongArch::VST: 518985d64b0Shev case LoongArch::XVST: { 519b2e69f52Shev if (UseMI.getOperand(1).isFI()) 520b2e69f52Shev return false; 521b2e69f52Shev // Register defined by Lo should not be the value register. 522b2e69f52Shev if (DestReg == UseMI.getOperand(0).getReg()) 523b2e69f52Shev return false; 524b2e69f52Shev assert(DestReg == UseMI.getOperand(1).getReg() && 525b2e69f52Shev "Expected base address use"); 526b2e69f52Shev // All load/store instructions must use the same offset. 527b2e69f52Shev int64_t Offset = UseMI.getOperand(2).getImm(); 528b2e69f52Shev if (CommonOffset && Offset != CommonOffset) 529b2e69f52Shev return false; 530b2e69f52Shev CommonOffset = Offset; 531b2e69f52Shev break; 532b2e69f52Shev } 533b2e69f52Shev case LoongArch::INLINEASM: 534b2e69f52Shev case LoongArch::INLINEASM_BR: { 535b2e69f52Shev // We can't do this for large pattern. 536b2e69f52Shev if (Last) 537b2e69f52Shev return false; 538b2e69f52Shev SmallVector<unsigned> InlineAsmMemoryOpIndexes; 539b2e69f52Shev unsigned NumOps = 0; 540b2e69f52Shev for (unsigned I = InlineAsm::MIOp_FirstOperand; 541b2e69f52Shev I < UseMI.getNumOperands(); I += 1 + NumOps) { 542b2e69f52Shev const MachineOperand &FlagsMO = UseMI.getOperand(I); 543b2e69f52Shev // Should be an imm. 544b2e69f52Shev if (!FlagsMO.isImm()) 545b2e69f52Shev continue; 546b2e69f52Shev 547b2e69f52Shev const InlineAsm::Flag Flags(FlagsMO.getImm()); 548b2e69f52Shev NumOps = Flags.getNumOperandRegisters(); 549b2e69f52Shev 550b2e69f52Shev // Memory constraints have two operands. 551b2e69f52Shev if (NumOps != 2 || !Flags.isMemKind()) { 552b2e69f52Shev // If the register is used by something other than a memory contraint, 553b2e69f52Shev // we should not fold. 554b2e69f52Shev for (unsigned J = 0; J < NumOps; ++J) { 555b2e69f52Shev const MachineOperand &MO = UseMI.getOperand(I + 1 + J); 556b2e69f52Shev if (MO.isReg() && MO.getReg() == DestReg) 557b2e69f52Shev return false; 558b2e69f52Shev } 559b2e69f52Shev continue; 560b2e69f52Shev } 561b2e69f52Shev 562b2e69f52Shev // We can only do this for constraint m. 563b2e69f52Shev if (Flags.getMemoryConstraintID() != InlineAsm::ConstraintCode::m) 564b2e69f52Shev return false; 565b2e69f52Shev 566b2e69f52Shev const MachineOperand &AddrMO = UseMI.getOperand(I + 1); 567b2e69f52Shev if (!AddrMO.isReg() || AddrMO.getReg() != DestReg) 568b2e69f52Shev continue; 569b2e69f52Shev 570b2e69f52Shev const MachineOperand &OffsetMO = UseMI.getOperand(I + 2); 571b2e69f52Shev if (!OffsetMO.isImm()) 572b2e69f52Shev continue; 573b2e69f52Shev 574b2e69f52Shev // All inline asm memory operands must use the same offset. 575b2e69f52Shev int64_t Offset = OffsetMO.getImm(); 576b2e69f52Shev if (CommonOffset && Offset != CommonOffset) 577b2e69f52Shev return false; 578b2e69f52Shev CommonOffset = Offset; 579b2e69f52Shev InlineAsmMemoryOpIndexes.push_back(I + 1); 580b2e69f52Shev } 581b2e69f52Shev InlineAsmMemoryOpIndexesMap.insert( 582b2e69f52Shev std::make_pair(&UseMI, InlineAsmMemoryOpIndexes)); 583b2e69f52Shev break; 584b2e69f52Shev } 585b2e69f52Shev } 586b2e69f52Shev } 587b2e69f52Shev 588b2e69f52Shev // We found a common offset. 589b2e69f52Shev // Update the offsets in global address lowering. 590b2e69f52Shev // We may have already folded some arithmetic so we need to add to any 591b2e69f52Shev // existing offset. 592b2e69f52Shev int64_t NewOffset = Hi20.getOperand(1).getOffset() + *CommonOffset; 593b2e69f52Shev // LA32 ignores the upper 32 bits. 594b2e69f52Shev if (!ST->is64Bit()) 595b2e69f52Shev NewOffset = SignExtend64<32>(NewOffset); 596b2e69f52Shev // We can only fold simm32 offsets. 597b2e69f52Shev if (!isInt<32>(NewOffset)) 598b2e69f52Shev return false; 599b2e69f52Shev 600*0288d065SZhaoQi // If optimized by this pass successfully, MO_RELAX bitmask target-flag should 601*0288d065SZhaoQi // be removed from the code sequence. 602*0288d065SZhaoQi // 603*0288d065SZhaoQi // For example: 604*0288d065SZhaoQi // pcalau12i $a0, %pc_hi20(symbol) 605*0288d065SZhaoQi // addi.d $a0, $a0, %pc_lo12(symbol) 606*0288d065SZhaoQi // ld.w $a0, $a0, 0 607*0288d065SZhaoQi // 608*0288d065SZhaoQi // => 609*0288d065SZhaoQi // 610*0288d065SZhaoQi // pcalau12i $a0, %pc_hi20(symbol) 611*0288d065SZhaoQi // ld.w $a0, $a0, %pc_lo12(symbol) 612*0288d065SZhaoQi // 613*0288d065SZhaoQi // Code sequence optimized before can be relax by linker. But after being 614*0288d065SZhaoQi // optimized, it cannot be relaxed any more. So MO_RELAX flag should not be 615*0288d065SZhaoQi // carried by them. 616b2e69f52Shev Hi20.getOperand(1).setOffset(NewOffset); 617*0288d065SZhaoQi Hi20.getOperand(1).setTargetFlags( 618*0288d065SZhaoQi LoongArchII::getDirectFlags(Hi20.getOperand(1))); 619b2e69f52Shev MachineOperand &ImmOp = Lo12.getOperand(2); 620b2e69f52Shev ImmOp.setOffset(NewOffset); 621*0288d065SZhaoQi ImmOp.setTargetFlags(LoongArchII::getDirectFlags(ImmOp)); 622b2e69f52Shev if (Lo20 && Hi12) { 623b2e69f52Shev Lo20->getOperand(2).setOffset(NewOffset); 624b2e69f52Shev Hi12->getOperand(2).setOffset(NewOffset); 625b2e69f52Shev } 626b2e69f52Shev 627b2e69f52Shev // Update the immediate in the load/store instructions to add the offset. 628b2e69f52Shev const LoongArchInstrInfo &TII = *ST->getInstrInfo(); 629b2e69f52Shev for (MachineInstr &UseMI : 630b2e69f52Shev llvm::make_early_inc_range(MRI->use_instructions(DestReg))) { 631b2e69f52Shev if (UseMI.getOpcode() == LoongArch::INLINEASM || 632b2e69f52Shev UseMI.getOpcode() == LoongArch::INLINEASM_BR) { 633b2e69f52Shev auto &InlineAsmMemoryOpIndexes = InlineAsmMemoryOpIndexesMap[&UseMI]; 634b2e69f52Shev for (unsigned I : InlineAsmMemoryOpIndexes) { 635b2e69f52Shev MachineOperand &MO = UseMI.getOperand(I + 1); 636b2e69f52Shev switch (ImmOp.getType()) { 637b2e69f52Shev case MachineOperand::MO_GlobalAddress: 638b2e69f52Shev MO.ChangeToGA(ImmOp.getGlobal(), ImmOp.getOffset(), 639*0288d065SZhaoQi LoongArchII::getDirectFlags(ImmOp)); 640b2e69f52Shev break; 641b2e69f52Shev case MachineOperand::MO_MCSymbol: 642*0288d065SZhaoQi MO.ChangeToMCSymbol(ImmOp.getMCSymbol(), 643*0288d065SZhaoQi LoongArchII::getDirectFlags(ImmOp)); 644b2e69f52Shev MO.setOffset(ImmOp.getOffset()); 645b2e69f52Shev break; 646b2e69f52Shev case MachineOperand::MO_BlockAddress: 647b2e69f52Shev MO.ChangeToBA(ImmOp.getBlockAddress(), ImmOp.getOffset(), 648*0288d065SZhaoQi LoongArchII::getDirectFlags(ImmOp)); 649b2e69f52Shev break; 650b2e69f52Shev default: 651b2e69f52Shev report_fatal_error("unsupported machine operand type"); 652b2e69f52Shev break; 653b2e69f52Shev } 654b2e69f52Shev } 655b2e69f52Shev } else { 656b2e69f52Shev UseMI.setDesc(TII.get(getNewOpc(UseMI.getOpcode(), Last))); 657b2e69f52Shev if (Last) { 658b2e69f52Shev UseMI.removeOperand(2); 659b2e69f52Shev UseMI.removeOperand(1); 660b2e69f52Shev UseMI.addOperand(Last->getOperand(1)); 661b2e69f52Shev UseMI.addOperand(Last->getOperand(2)); 662b2e69f52Shev UseMI.getOperand(1).setIsKill(false); 663b2e69f52Shev UseMI.getOperand(2).setIsKill(false); 664b2e69f52Shev } else { 665b2e69f52Shev UseMI.removeOperand(2); 666b2e69f52Shev UseMI.addOperand(ImmOp); 667b2e69f52Shev } 668b2e69f52Shev } 669b2e69f52Shev } 670b2e69f52Shev 671b2e69f52Shev if (Last) { 672b2e69f52Shev Last->eraseFromParent(); 673b2e69f52Shev return true; 674b2e69f52Shev } 675b2e69f52Shev 676b2e69f52Shev MRI->replaceRegWith(Lo12.getOperand(0).getReg(), Hi20.getOperand(0).getReg()); 677b2e69f52Shev Lo12.eraseFromParent(); 678b2e69f52Shev return true; 679b2e69f52Shev } 680b2e69f52Shev 681b2e69f52Shev bool LoongArchMergeBaseOffsetOpt::runOnMachineFunction(MachineFunction &Fn) { 682b2e69f52Shev if (skipFunction(Fn.getFunction())) 683b2e69f52Shev return false; 684b2e69f52Shev 685b2e69f52Shev ST = &Fn.getSubtarget<LoongArchSubtarget>(); 686b2e69f52Shev 687b2e69f52Shev bool MadeChange = false; 688b2e69f52Shev MRI = &Fn.getRegInfo(); 689b2e69f52Shev for (MachineBasicBlock &MBB : Fn) { 690b2e69f52Shev LLVM_DEBUG(dbgs() << "MBB: " << MBB.getName() << "\n"); 691b2e69f52Shev for (MachineInstr &Hi20 : MBB) { 692b2e69f52Shev MachineInstr *Lo12 = nullptr; 693b2e69f52Shev MachineInstr *Lo20 = nullptr; 694b2e69f52Shev MachineInstr *Hi12 = nullptr; 695b2e69f52Shev MachineInstr *Last = nullptr; 696b2e69f52Shev if (!detectFoldable(Hi20, Lo12, Lo20, Hi12, Last)) 697b2e69f52Shev continue; 698b2e69f52Shev MadeChange |= detectAndFoldOffset(Hi20, *Lo12, Lo20, Hi12, Last); 699b2e69f52Shev MadeChange |= foldIntoMemoryOps(Hi20, *Lo12, Lo20, Hi12, Last); 700b2e69f52Shev } 701b2e69f52Shev } 702b2e69f52Shev 703b2e69f52Shev return MadeChange; 704b2e69f52Shev } 705b2e69f52Shev 706b2e69f52Shev /// Returns an instance of the Merge Base Offset Optimization pass. 707b2e69f52Shev FunctionPass *llvm::createLoongArchMergeBaseOffsetOptPass() { 708b2e69f52Shev return new LoongArchMergeBaseOffsetOpt(); 709b2e69f52Shev } 710