xref: /llvm-project/llvm/lib/Target/LoongArch/LoongArchMergeBaseOffset.cpp (revision 0288d065eecb1208971dc4cdcc71731e34c6fca0)
1b2e69f52Shev //===---- LoongArchMergeBaseOffset.cpp - Optimise address calculations ----===//
2b2e69f52Shev //
3b2e69f52Shev // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4b2e69f52Shev // See https://llvm.org/LICENSE.txt for license information.
5b2e69f52Shev // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6b2e69f52Shev //
7b2e69f52Shev //===----------------------------------------------------------------------===//
8b2e69f52Shev //
9b2e69f52Shev // Merge the offset of address calculation into the offset field
10b2e69f52Shev // of instructions in a global address lowering sequence.
11b2e69f52Shev //
12b2e69f52Shev //===----------------------------------------------------------------------===//
13b2e69f52Shev 
14b2e69f52Shev #include "LoongArch.h"
15b2e69f52Shev #include "LoongArchTargetMachine.h"
16b2e69f52Shev #include "llvm/CodeGen/MachineFunctionPass.h"
17b2e69f52Shev #include "llvm/CodeGen/Passes.h"
18b2e69f52Shev #include "llvm/MC/TargetRegistry.h"
19b2e69f52Shev #include "llvm/Support/Debug.h"
20b2e69f52Shev #include "llvm/Target/TargetOptions.h"
21b2e69f52Shev #include <optional>
22b2e69f52Shev 
23b2e69f52Shev using namespace llvm;
24b2e69f52Shev 
25b2e69f52Shev #define DEBUG_TYPE "loongarch-merge-base-offset"
26b2e69f52Shev #define LoongArch_MERGE_BASE_OFFSET_NAME "LoongArch Merge Base Offset"
27b2e69f52Shev 
28b2e69f52Shev namespace {
29b2e69f52Shev 
30b2e69f52Shev class LoongArchMergeBaseOffsetOpt : public MachineFunctionPass {
31b2e69f52Shev   const LoongArchSubtarget *ST = nullptr;
32b2e69f52Shev   MachineRegisterInfo *MRI;
33b2e69f52Shev 
34b2e69f52Shev public:
35b2e69f52Shev   static char ID;
36b2e69f52Shev   bool runOnMachineFunction(MachineFunction &Fn) override;
37b2e69f52Shev   bool detectFoldable(MachineInstr &Hi20, MachineInstr *&Lo12,
38b2e69f52Shev                       MachineInstr *&Lo20, MachineInstr *&Hi12,
39b2e69f52Shev                       MachineInstr *&Last);
40b2e69f52Shev 
41b2e69f52Shev   bool detectAndFoldOffset(MachineInstr &Hi20, MachineInstr &Lo12,
42b2e69f52Shev                            MachineInstr *&Lo20, MachineInstr *&Hi12,
43b2e69f52Shev                            MachineInstr *&Last);
44b2e69f52Shev   void foldOffset(MachineInstr &Hi20, MachineInstr &Lo12, MachineInstr *&Lo20,
45b2e69f52Shev                   MachineInstr *&Hi12, MachineInstr *&Last, MachineInstr &Tail,
46b2e69f52Shev                   int64_t Offset);
47b2e69f52Shev   bool foldLargeOffset(MachineInstr &Hi20, MachineInstr &Lo12,
48b2e69f52Shev                        MachineInstr *&Lo20, MachineInstr *&Hi12,
49b2e69f52Shev                        MachineInstr *&Last, MachineInstr &TailAdd,
50b2e69f52Shev                        Register GAReg);
51b2e69f52Shev 
52b2e69f52Shev   bool foldIntoMemoryOps(MachineInstr &Hi20, MachineInstr &Lo12,
53b2e69f52Shev                          MachineInstr *&Lo20, MachineInstr *&Hi12,
54b2e69f52Shev                          MachineInstr *&Last);
55b2e69f52Shev 
56b2e69f52Shev   LoongArchMergeBaseOffsetOpt() : MachineFunctionPass(ID) {}
57b2e69f52Shev 
58b2e69f52Shev   MachineFunctionProperties getRequiredProperties() const override {
59b2e69f52Shev     return MachineFunctionProperties().set(
60b2e69f52Shev         MachineFunctionProperties::Property::IsSSA);
61b2e69f52Shev   }
62b2e69f52Shev 
63b2e69f52Shev   void getAnalysisUsage(AnalysisUsage &AU) const override {
64b2e69f52Shev     AU.setPreservesCFG();
65b2e69f52Shev     MachineFunctionPass::getAnalysisUsage(AU);
66b2e69f52Shev   }
67b2e69f52Shev 
68b2e69f52Shev   StringRef getPassName() const override {
69b2e69f52Shev     return LoongArch_MERGE_BASE_OFFSET_NAME;
70b2e69f52Shev   }
71b2e69f52Shev };
72b2e69f52Shev } // end anonymous namespace
73b2e69f52Shev 
74b2e69f52Shev char LoongArchMergeBaseOffsetOpt::ID = 0;
75b2e69f52Shev INITIALIZE_PASS(LoongArchMergeBaseOffsetOpt, DEBUG_TYPE,
76b2e69f52Shev                 LoongArch_MERGE_BASE_OFFSET_NAME, false, false)
77b2e69f52Shev 
78b2e69f52Shev // Detect either of the patterns:
79b2e69f52Shev //
80b2e69f52Shev // 1. (small/medium):
81b2e69f52Shev //   pcalau12i vreg1, %pc_hi20(s)
82b2e69f52Shev //   addi.d    vreg2, vreg1, %pc_lo12(s)
83b2e69f52Shev //
84b2e69f52Shev // 2. (large):
85b2e69f52Shev //   pcalau12i vreg1, %pc_hi20(s)
86b2e69f52Shev //   addi.d    vreg2, $zero, %pc_lo12(s)
87b2e69f52Shev //   lu32i.d   vreg3, vreg2, %pc64_lo20(s)
88b2e69f52Shev //   lu52i.d   vreg4, vreg3, %pc64_hi12(s)
89b2e69f52Shev //   add.d     vreg5, vreg4, vreg1
90b2e69f52Shev 
91b2e69f52Shev // The pattern is only accepted if:
92b2e69f52Shev //    1) For small and medium pattern, the first instruction has only one use,
93b2e69f52Shev //       which is the ADDI.
94b2e69f52Shev //    2) For large pattern, the first four instructions each have only one use,
95b2e69f52Shev //       and the user of the fourth instruction is ADD.
96b2e69f52Shev //    3) The address operands have the appropriate type, reflecting the
97b2e69f52Shev //       lowering of a global address or constant pool using the pattern.
98b2e69f52Shev //    4) The offset value in the Global Address or Constant Pool is 0.
99b2e69f52Shev bool LoongArchMergeBaseOffsetOpt::detectFoldable(MachineInstr &Hi20,
100b2e69f52Shev                                                  MachineInstr *&Lo12,
101b2e69f52Shev                                                  MachineInstr *&Lo20,
102b2e69f52Shev                                                  MachineInstr *&Hi12,
103b2e69f52Shev                                                  MachineInstr *&Last) {
104b2e69f52Shev   if (Hi20.getOpcode() != LoongArch::PCALAU12I)
105b2e69f52Shev     return false;
106b2e69f52Shev 
107b2e69f52Shev   const MachineOperand &Hi20Op1 = Hi20.getOperand(1);
108*0288d065SZhaoQi   if (LoongArchII::getDirectFlags(Hi20Op1) != LoongArchII::MO_PCREL_HI)
109b2e69f52Shev     return false;
110b2e69f52Shev 
111b2e69f52Shev   auto isGlobalOrCPIOrBlockAddress = [](const MachineOperand &Op) {
112b2e69f52Shev     return Op.isGlobal() || Op.isCPI() || Op.isBlockAddress();
113b2e69f52Shev   };
114b2e69f52Shev 
115b2e69f52Shev   if (!isGlobalOrCPIOrBlockAddress(Hi20Op1) || Hi20Op1.getOffset() != 0)
116b2e69f52Shev     return false;
117b2e69f52Shev 
118b2e69f52Shev   Register HiDestReg = Hi20.getOperand(0).getReg();
119b2e69f52Shev   if (!MRI->hasOneUse(HiDestReg))
120b2e69f52Shev     return false;
121b2e69f52Shev 
122b2e69f52Shev   MachineInstr *UseInst = &*MRI->use_instr_begin(HiDestReg);
123b2e69f52Shev   if (UseInst->getOpcode() != LoongArch::ADD_D) {
124b2e69f52Shev     Lo12 = UseInst;
125b2e69f52Shev     if ((ST->is64Bit() && Lo12->getOpcode() != LoongArch::ADDI_D) ||
126b2e69f52Shev         (!ST->is64Bit() && Lo12->getOpcode() != LoongArch::ADDI_W))
127b2e69f52Shev       return false;
128b2e69f52Shev   } else {
129b2e69f52Shev     assert(ST->is64Bit());
130b2e69f52Shev     Last = UseInst;
131b2e69f52Shev 
132b2e69f52Shev     Register LastOp1Reg = Last->getOperand(1).getReg();
133b2e69f52Shev     if (!LastOp1Reg.isVirtual())
134b2e69f52Shev       return false;
135b2e69f52Shev     Hi12 = MRI->getVRegDef(LastOp1Reg);
136b2e69f52Shev     const MachineOperand &Hi12Op2 = Hi12->getOperand(2);
137b2e69f52Shev     if (Hi12Op2.getTargetFlags() != LoongArchII::MO_PCREL64_HI)
138b2e69f52Shev       return false;
139b2e69f52Shev     if (!isGlobalOrCPIOrBlockAddress(Hi12Op2) || Hi12Op2.getOffset() != 0)
140b2e69f52Shev       return false;
141b2e69f52Shev     if (!MRI->hasOneUse(Hi12->getOperand(0).getReg()))
142b2e69f52Shev       return false;
143b2e69f52Shev 
144b2e69f52Shev     Lo20 = MRI->getVRegDef(Hi12->getOperand(1).getReg());
145b2e69f52Shev     const MachineOperand &Lo20Op2 = Lo20->getOperand(2);
146b2e69f52Shev     if (Lo20Op2.getTargetFlags() != LoongArchII::MO_PCREL64_LO)
147b2e69f52Shev       return false;
148b2e69f52Shev     if (!isGlobalOrCPIOrBlockAddress(Lo20Op2) || Lo20Op2.getOffset() != 0)
149b2e69f52Shev       return false;
150b2e69f52Shev     if (!MRI->hasOneUse(Lo20->getOperand(0).getReg()))
151b2e69f52Shev       return false;
152b2e69f52Shev 
153b2e69f52Shev     Lo12 = MRI->getVRegDef(Lo20->getOperand(1).getReg());
154b2e69f52Shev     if (!MRI->hasOneUse(Lo12->getOperand(0).getReg()))
155b2e69f52Shev       return false;
156b2e69f52Shev   }
157b2e69f52Shev 
158b2e69f52Shev   const MachineOperand &Lo12Op2 = Lo12->getOperand(2);
159b2e69f52Shev   assert(Hi20.getOpcode() == LoongArch::PCALAU12I);
160*0288d065SZhaoQi   if (LoongArchII::getDirectFlags(Lo12Op2) != LoongArchII::MO_PCREL_LO ||
161b2e69f52Shev       !(isGlobalOrCPIOrBlockAddress(Lo12Op2) || Lo12Op2.isMCSymbol()) ||
162b2e69f52Shev       Lo12Op2.getOffset() != 0)
163b2e69f52Shev     return false;
164b2e69f52Shev 
165b2e69f52Shev   if (Hi20Op1.isGlobal()) {
166b2e69f52Shev     LLVM_DEBUG(dbgs() << "  Found lowered global address: "
167b2e69f52Shev                       << *Hi20Op1.getGlobal() << "\n");
168b2e69f52Shev   } else if (Hi20Op1.isBlockAddress()) {
169b2e69f52Shev     LLVM_DEBUG(dbgs() << "  Found lowered basic address: "
170b2e69f52Shev                       << *Hi20Op1.getBlockAddress() << "\n");
171b2e69f52Shev   } else if (Hi20Op1.isCPI()) {
172b2e69f52Shev     LLVM_DEBUG(dbgs() << "  Found lowered constant pool: " << Hi20Op1.getIndex()
173b2e69f52Shev                       << "\n");
174b2e69f52Shev   }
175b2e69f52Shev 
176b2e69f52Shev   return true;
177b2e69f52Shev }
178b2e69f52Shev 
179b2e69f52Shev // Update the offset in Hi20, Lo12, Lo20 and Hi12 instructions.
180b2e69f52Shev // Delete the tail instruction and update all the uses to use the
181b2e69f52Shev // output from Last.
182b2e69f52Shev void LoongArchMergeBaseOffsetOpt::foldOffset(
183b2e69f52Shev     MachineInstr &Hi20, MachineInstr &Lo12, MachineInstr *&Lo20,
184b2e69f52Shev     MachineInstr *&Hi12, MachineInstr *&Last, MachineInstr &Tail,
185b2e69f52Shev     int64_t Offset) {
186b2e69f52Shev   // Put the offset back in Hi and the Lo
187b2e69f52Shev   Hi20.getOperand(1).setOffset(Offset);
188b2e69f52Shev   Lo12.getOperand(2).setOffset(Offset);
189b2e69f52Shev   if (Lo20 && Hi12) {
190b2e69f52Shev     Lo20->getOperand(2).setOffset(Offset);
191b2e69f52Shev     Hi12->getOperand(2).setOffset(Offset);
192b2e69f52Shev   }
193b2e69f52Shev   // Delete the tail instruction.
194b2e69f52Shev   MachineInstr *Def = Last ? Last : &Lo12;
195b2e69f52Shev   MRI->constrainRegClass(Def->getOperand(0).getReg(),
196b2e69f52Shev                          MRI->getRegClass(Tail.getOperand(0).getReg()));
197b2e69f52Shev   MRI->replaceRegWith(Tail.getOperand(0).getReg(), Def->getOperand(0).getReg());
198b2e69f52Shev   Tail.eraseFromParent();
199b2e69f52Shev   LLVM_DEBUG(dbgs() << "  Merged offset " << Offset << " into base.\n"
200b2e69f52Shev                     << "     " << Hi20 << "     " << Lo12;);
201b2e69f52Shev   if (Lo20 && Hi12) {
202b2e69f52Shev     LLVM_DEBUG(dbgs() << "     " << *Lo20 << "     " << *Hi12;);
203b2e69f52Shev   }
204b2e69f52Shev }
205b2e69f52Shev 
206b2e69f52Shev // Detect patterns for large offsets that are passed into an ADD instruction.
207b2e69f52Shev // If the pattern is found, updates the offset in Hi20, Lo12, Lo20 and Hi12
208b2e69f52Shev // instructions and deletes TailAdd and the instructions that produced the
209b2e69f52Shev // offset.
210b2e69f52Shev //
211b225b15aShev //   (The instructions marked with "!" are not necessarily present)
212b225b15aShev //
213b2e69f52Shev //        Base address lowering is of the form:
214b2e69f52Shev //           Hi20:  pcalau12i vreg1, %pc_hi20(s)
215b225b15aShev //        +- Lo12:  addi.d vreg2, vreg1, %pc_lo12(s)
216b225b15aShev //        |  Lo20:  lu32i.d vreg2, %pc64_lo20(s) !
217b225b15aShev //        +- Hi12:  lu52i.d vreg2, vreg2, %pc64_hi12(s) !
218b225b15aShev //        |
219b225b15aShev //        | The large offset can be one of the forms:
220b225b15aShev //        |
221b225b15aShev //        +-> 1) Offset that has non zero bits in Hi20 and Lo12 bits:
222b225b15aShev //        |     OffsetHi20: lu12i.w vreg3, 4
223b225b15aShev //        |     OffsetLo12: ori voff, vreg3, 188    ------------------+
224b225b15aShev //        |                                                           |
225b225b15aShev //        +-> 2) Offset that has non zero bits in Hi20 bits only:     |
226b225b15aShev //        |     OffsetHi20: lu12i.w voff, 128       ------------------+
227b225b15aShev //        |                                                           |
228b225b15aShev //        +-> 3) Offset that has non zero bits in Lo20 bits:          |
229b225b15aShev //        |     OffsetHi20: lu12i.w vreg3, 121 !                      |
230b225b15aShev //        |     OffsetLo12: ori voff, vreg3, 122 !                    |
231b225b15aShev //        |     OffsetLo20: lu32i.d voff, 123       ------------------+
232b225b15aShev //        +-> 4) Offset that has non zero bits in Hi12 bits:          |
233b225b15aShev //              OffsetHi20: lu12i.w vreg3, 121 !                      |
234b225b15aShev //              OffsetLo12: ori voff, vreg3, 122 !                    |
235b225b15aShev //              OffsetLo20: lu32i.d vreg3, 123 !                      |
236b225b15aShev //              OffsetHi12: lu52i.d voff, vrg3, 124 ------------------+
237b225b15aShev //                                                                    |
238b225b15aShev //        TailAdd: add.d  vreg4, vreg2, voff       <------------------+
239b225b15aShev //
240b2e69f52Shev bool LoongArchMergeBaseOffsetOpt::foldLargeOffset(
241b2e69f52Shev     MachineInstr &Hi20, MachineInstr &Lo12, MachineInstr *&Lo20,
242b2e69f52Shev     MachineInstr *&Hi12, MachineInstr *&Last, MachineInstr &TailAdd,
243b2e69f52Shev     Register GAReg) {
244b2e69f52Shev   assert((TailAdd.getOpcode() == LoongArch::ADD_W ||
245b2e69f52Shev           TailAdd.getOpcode() == LoongArch::ADD_D) &&
246b2e69f52Shev          "Expected ADD instruction!");
247b2e69f52Shev   Register Rs = TailAdd.getOperand(1).getReg();
248b2e69f52Shev   Register Rt = TailAdd.getOperand(2).getReg();
249b2e69f52Shev   Register Reg = Rs == GAReg ? Rt : Rs;
250b225b15aShev   SmallVector<MachineInstr *, 4> Instrs;
251b225b15aShev   int64_t Offset = 0;
252b225b15aShev   int64_t Mask = -1;
253b225b15aShev 
254b225b15aShev   // This can point to one of [ORI, LU12I.W, LU32I.D, LU52I.D]:
255b225b15aShev   for (int i = 0; i < 4; i++) {
256b225b15aShev     // Handle Reg is R0.
257b225b15aShev     if (Reg == LoongArch::R0)
258b225b15aShev       break;
259b2e69f52Shev 
260b2e69f52Shev     // Can't fold if the register has more than one use.
261b2e69f52Shev     if (!Reg.isVirtual() || !MRI->hasOneUse(Reg))
262b2e69f52Shev       return false;
263b2e69f52Shev 
264b225b15aShev     MachineInstr *Curr = MRI->getVRegDef(Reg);
265b225b15aShev     if (!Curr)
266b225b15aShev       break;
267b225b15aShev 
268b225b15aShev     switch (Curr->getOpcode()) {
269b225b15aShev     default:
270b225b15aShev       // Can't fold if the instruction opcode is unexpected.
271b225b15aShev       return false;
272b225b15aShev     case LoongArch::ORI: {
273b225b15aShev       MachineOperand ImmOp = Curr->getOperand(2);
274b225b15aShev       if (ImmOp.getTargetFlags() != LoongArchII::MO_None)
275b225b15aShev         return false;
276b225b15aShev       Offset += ImmOp.getImm();
277b225b15aShev       Reg = Curr->getOperand(1).getReg();
278b225b15aShev       Instrs.push_back(Curr);
279b225b15aShev       break;
280b225b15aShev     }
281b225b15aShev     case LoongArch::LU12I_W: {
282b225b15aShev       MachineOperand ImmOp = Curr->getOperand(1);
283b225b15aShev       if (ImmOp.getTargetFlags() != LoongArchII::MO_None)
284b225b15aShev         return false;
285b225b15aShev       Offset += SignExtend64<32>(ImmOp.getImm() << 12) & Mask;
286b225b15aShev       Reg = LoongArch::R0;
287b225b15aShev       Instrs.push_back(Curr);
288b225b15aShev       break;
289b225b15aShev     }
290b225b15aShev     case LoongArch::LU32I_D: {
291b225b15aShev       MachineOperand ImmOp = Curr->getOperand(2);
292b225b15aShev       if (ImmOp.getTargetFlags() != LoongArchII::MO_None || !Lo20)
293b225b15aShev         return false;
294b225b15aShev       Offset += SignExtend64<52>(ImmOp.getImm() << 32) & Mask;
295b225b15aShev       Mask ^= 0x000FFFFF00000000ULL;
296b225b15aShev       Reg = Curr->getOperand(1).getReg();
297b225b15aShev       Instrs.push_back(Curr);
298b225b15aShev       break;
299b225b15aShev     }
300b225b15aShev     case LoongArch::LU52I_D: {
301b225b15aShev       MachineOperand ImmOp = Curr->getOperand(2);
302b225b15aShev       if (ImmOp.getTargetFlags() != LoongArchII::MO_None || !Hi12)
303b225b15aShev         return false;
304b225b15aShev       Offset += ImmOp.getImm() << 52;
305b225b15aShev       Mask ^= 0xFFF0000000000000ULL;
306b225b15aShev       Reg = Curr->getOperand(1).getReg();
307b225b15aShev       Instrs.push_back(Curr);
308b225b15aShev       break;
309b225b15aShev     }
310b225b15aShev     }
311b2e69f52Shev   }
312b2e69f52Shev 
313b225b15aShev   // Can't fold if the offset is not extracted.
314b225b15aShev   if (!Offset)
315b2e69f52Shev     return false;
316b225b15aShev 
317b2e69f52Shev   foldOffset(Hi20, Lo12, Lo20, Hi12, Last, TailAdd, Offset);
318b225b15aShev   LLVM_DEBUG(dbgs() << "  Offset Instrs:\n");
319b225b15aShev   for (auto I : Instrs) {
320b225b15aShev     LLVM_DEBUG(dbgs() << "                 " << *I);
321b225b15aShev     I->eraseFromParent();
322b2e69f52Shev   }
323b225b15aShev 
324b225b15aShev   return true;
325b2e69f52Shev }
326b2e69f52Shev 
327b2e69f52Shev bool LoongArchMergeBaseOffsetOpt::detectAndFoldOffset(MachineInstr &Hi20,
328b2e69f52Shev                                                       MachineInstr &Lo12,
329b2e69f52Shev                                                       MachineInstr *&Lo20,
330b2e69f52Shev                                                       MachineInstr *&Hi12,
331b2e69f52Shev                                                       MachineInstr *&Last) {
332b2e69f52Shev   Register DestReg =
333b2e69f52Shev       Last ? Last->getOperand(0).getReg() : Lo12.getOperand(0).getReg();
334b2e69f52Shev 
335b2e69f52Shev   // Look for arithmetic instructions we can get an offset from.
336b2e69f52Shev   // We might be able to remove the arithmetic instructions by folding the
337b2e69f52Shev   // offset into the PCALAU12I+(ADDI/ADDI+LU32I+LU52I).
338b2e69f52Shev   if (!MRI->hasOneUse(DestReg))
339b2e69f52Shev     return false;
340b2e69f52Shev 
341b2e69f52Shev   // DestReg has only one use.
342b2e69f52Shev   MachineInstr &Tail = *MRI->use_instr_begin(DestReg);
343b2e69f52Shev   switch (Tail.getOpcode()) {
344b2e69f52Shev   default:
345b2e69f52Shev     LLVM_DEBUG(dbgs() << "Don't know how to get offset from this instr:"
346b2e69f52Shev                       << Tail);
347b2e69f52Shev     break;
348b2e69f52Shev   case LoongArch::ADDI_W:
349b2e69f52Shev     if (ST->is64Bit())
350b2e69f52Shev       return false;
351b2e69f52Shev     [[fallthrough]];
352b2e69f52Shev   case LoongArch::ADDI_D:
353b2e69f52Shev   case LoongArch::ADDU16I_D: {
354b2e69f52Shev     // Offset is simply an immediate operand.
355b2e69f52Shev     int64_t Offset = Tail.getOperand(2).getImm();
356b2e69f52Shev     if (Tail.getOpcode() == LoongArch::ADDU16I_D)
357b2e69f52Shev       Offset = SignExtend64<32>(Offset << 16);
358b2e69f52Shev 
359b2e69f52Shev     // We might have two ADDIs in a row.
360b2e69f52Shev     Register TailDestReg = Tail.getOperand(0).getReg();
361b2e69f52Shev     if (MRI->hasOneUse(TailDestReg)) {
362b2e69f52Shev       MachineInstr &TailTail = *MRI->use_instr_begin(TailDestReg);
363b2e69f52Shev       if (ST->is64Bit() && TailTail.getOpcode() == LoongArch::ADDI_W)
364b2e69f52Shev         return false;
365b2e69f52Shev       if (TailTail.getOpcode() == LoongArch::ADDI_W ||
366b2e69f52Shev           TailTail.getOpcode() == LoongArch::ADDI_D) {
367b2e69f52Shev         Offset += TailTail.getOperand(2).getImm();
368b2e69f52Shev         LLVM_DEBUG(dbgs() << "  Offset Instrs: " << Tail << TailTail);
369b2e69f52Shev         foldOffset(Hi20, Lo12, Lo20, Hi12, Last, TailTail, Offset);
370b2e69f52Shev         Tail.eraseFromParent();
371b2e69f52Shev         return true;
372b2e69f52Shev       }
373b2e69f52Shev     }
374b2e69f52Shev 
375b2e69f52Shev     LLVM_DEBUG(dbgs() << "  Offset Instr: " << Tail);
376b2e69f52Shev     foldOffset(Hi20, Lo12, Lo20, Hi12, Last, Tail, Offset);
377b2e69f52Shev     return true;
378b2e69f52Shev   }
379b2e69f52Shev   case LoongArch::ADD_W:
380b2e69f52Shev     if (ST->is64Bit())
381b2e69f52Shev       return false;
382b2e69f52Shev     [[fallthrough]];
383b2e69f52Shev   case LoongArch::ADD_D:
384b2e69f52Shev     // The offset is too large to fit in the immediate field of ADDI.
385b2e69f52Shev     return foldLargeOffset(Hi20, Lo12, Lo20, Hi12, Last, Tail, DestReg);
386b2e69f52Shev     break;
387b2e69f52Shev   }
388b2e69f52Shev 
389b2e69f52Shev   return false;
390b2e69f52Shev }
391b2e69f52Shev 
392b2e69f52Shev // Memory access opcode mapping for transforms.
393b2e69f52Shev static unsigned getNewOpc(unsigned Op, bool isLarge) {
394b2e69f52Shev   switch (Op) {
395b2e69f52Shev   case LoongArch::LD_B:
396b2e69f52Shev     return isLarge ? LoongArch::LDX_B : LoongArch::LD_B;
397b2e69f52Shev   case LoongArch::LD_H:
398b2e69f52Shev     return isLarge ? LoongArch::LDX_H : LoongArch::LD_H;
399b2e69f52Shev   case LoongArch::LD_W:
400b2e69f52Shev   case LoongArch::LDPTR_W:
401b2e69f52Shev     return isLarge ? LoongArch::LDX_W : LoongArch::LD_W;
402b2e69f52Shev   case LoongArch::LD_D:
403b2e69f52Shev   case LoongArch::LDPTR_D:
404b2e69f52Shev     return isLarge ? LoongArch::LDX_D : LoongArch::LD_D;
405b2e69f52Shev   case LoongArch::LD_BU:
406b2e69f52Shev     return isLarge ? LoongArch::LDX_BU : LoongArch::LD_BU;
407b2e69f52Shev   case LoongArch::LD_HU:
408b2e69f52Shev     return isLarge ? LoongArch::LDX_HU : LoongArch::LD_HU;
409b2e69f52Shev   case LoongArch::LD_WU:
410b2e69f52Shev     return isLarge ? LoongArch::LDX_WU : LoongArch::LD_WU;
411b2e69f52Shev   case LoongArch::FLD_S:
412b2e69f52Shev     return isLarge ? LoongArch::FLDX_S : LoongArch::FLD_S;
413b2e69f52Shev   case LoongArch::FLD_D:
414b2e69f52Shev     return isLarge ? LoongArch::FLDX_D : LoongArch::FLD_D;
415985d64b0Shev   case LoongArch::VLD:
416985d64b0Shev     return isLarge ? LoongArch::VLDX : LoongArch::VLD;
417985d64b0Shev   case LoongArch::XVLD:
418985d64b0Shev     return isLarge ? LoongArch::XVLDX : LoongArch::XVLD;
419985d64b0Shev   case LoongArch::VLDREPL_B:
420985d64b0Shev     return LoongArch::VLDREPL_B;
421985d64b0Shev   case LoongArch::XVLDREPL_B:
422985d64b0Shev     return LoongArch::XVLDREPL_B;
423b2e69f52Shev   case LoongArch::ST_B:
424b2e69f52Shev     return isLarge ? LoongArch::STX_B : LoongArch::ST_B;
425b2e69f52Shev   case LoongArch::ST_H:
426b2e69f52Shev     return isLarge ? LoongArch::STX_H : LoongArch::ST_H;
427b2e69f52Shev   case LoongArch::ST_W:
428b2e69f52Shev   case LoongArch::STPTR_W:
429b2e69f52Shev     return isLarge ? LoongArch::STX_W : LoongArch::ST_W;
430b2e69f52Shev   case LoongArch::ST_D:
431b2e69f52Shev   case LoongArch::STPTR_D:
432b2e69f52Shev     return isLarge ? LoongArch::STX_D : LoongArch::ST_D;
433b2e69f52Shev   case LoongArch::FST_S:
434b2e69f52Shev     return isLarge ? LoongArch::FSTX_S : LoongArch::FST_S;
435b2e69f52Shev   case LoongArch::FST_D:
436b2e69f52Shev     return isLarge ? LoongArch::FSTX_D : LoongArch::FST_D;
437985d64b0Shev   case LoongArch::VST:
438985d64b0Shev     return isLarge ? LoongArch::VSTX : LoongArch::VST;
439985d64b0Shev   case LoongArch::XVST:
440985d64b0Shev     return isLarge ? LoongArch::XVSTX : LoongArch::XVST;
441b2e69f52Shev   default:
442b2e69f52Shev     llvm_unreachable("Unexpected opcode for replacement");
443b2e69f52Shev   }
444b2e69f52Shev }
445b2e69f52Shev 
446b2e69f52Shev bool LoongArchMergeBaseOffsetOpt::foldIntoMemoryOps(MachineInstr &Hi20,
447b2e69f52Shev                                                     MachineInstr &Lo12,
448b2e69f52Shev                                                     MachineInstr *&Lo20,
449b2e69f52Shev                                                     MachineInstr *&Hi12,
450b2e69f52Shev                                                     MachineInstr *&Last) {
451b2e69f52Shev   Register DestReg =
452b2e69f52Shev       Last ? Last->getOperand(0).getReg() : Lo12.getOperand(0).getReg();
453b2e69f52Shev 
454b2e69f52Shev   // If all the uses are memory ops with the same offset, we can transform:
455b2e69f52Shev   //
456b2e69f52Shev   // 1. (small/medium):
457b2e69f52Shev   //   pcalau12i vreg1, %pc_hi20(s)
458b2e69f52Shev   //   addi.d    vreg2, vreg1, %pc_lo12(s)
459b2e69f52Shev   //   ld.w      vreg3, 8(vreg2)
460b2e69f52Shev   //
461b2e69f52Shev   //   =>
462b2e69f52Shev   //
463b2e69f52Shev   //   pcalau12i vreg1, %pc_hi20(s+8)
464b2e69f52Shev   //   ld.w      vreg3, vreg1, %pc_lo12(s+8)(vreg1)
465b2e69f52Shev   //
466b2e69f52Shev   // 2. (large):
467b2e69f52Shev   //   pcalau12i vreg1, %pc_hi20(s)
468b2e69f52Shev   //   addi.d    vreg2, $zero, %pc_lo12(s)
469b2e69f52Shev   //   lu32i.d   vreg3, vreg2, %pc64_lo20(s)
470b2e69f52Shev   //   lu52i.d   vreg4, vreg3, %pc64_hi12(s)
471b2e69f52Shev   //   add.d     vreg5, vreg4, vreg1
472b2e69f52Shev   //   ld.w      vreg6, 8(vreg5)
473b2e69f52Shev   //
474b2e69f52Shev   //   =>
475b2e69f52Shev   //
476b2e69f52Shev   //   pcalau12i vreg1, %pc_hi20(s+8)
477b2e69f52Shev   //   addi.d    vreg2, $zero, %pc_lo12(s+8)
478b2e69f52Shev   //   lu32i.d   vreg3, vreg2, %pc64_lo20(s+8)
479b2e69f52Shev   //   lu52i.d   vreg4, vreg3, %pc64_hi12(s+8)
480b2e69f52Shev   //   ldx.w     vreg6, vreg4, vreg1
481b2e69f52Shev 
482b2e69f52Shev   std::optional<int64_t> CommonOffset;
483b2e69f52Shev   DenseMap<const MachineInstr *, SmallVector<unsigned>>
484b2e69f52Shev       InlineAsmMemoryOpIndexesMap;
485b2e69f52Shev   for (const MachineInstr &UseMI : MRI->use_instructions(DestReg)) {
486b2e69f52Shev     switch (UseMI.getOpcode()) {
487b2e69f52Shev     default:
488b2e69f52Shev       LLVM_DEBUG(dbgs() << "Not a load or store instruction: " << UseMI);
489b2e69f52Shev       return false;
490985d64b0Shev     case LoongArch::VLDREPL_B:
491985d64b0Shev     case LoongArch::XVLDREPL_B:
492985d64b0Shev       // We can't do this for large pattern.
493985d64b0Shev       if (Last)
494985d64b0Shev         return false;
495985d64b0Shev       [[fallthrough]];
496b2e69f52Shev     case LoongArch::LD_B:
497b2e69f52Shev     case LoongArch::LD_H:
498b2e69f52Shev     case LoongArch::LD_W:
499b2e69f52Shev     case LoongArch::LD_D:
500b2e69f52Shev     case LoongArch::LD_BU:
501b2e69f52Shev     case LoongArch::LD_HU:
502b2e69f52Shev     case LoongArch::LD_WU:
503b2e69f52Shev     case LoongArch::LDPTR_W:
504b2e69f52Shev     case LoongArch::LDPTR_D:
505b2e69f52Shev     case LoongArch::FLD_S:
506b2e69f52Shev     case LoongArch::FLD_D:
507985d64b0Shev     case LoongArch::VLD:
508985d64b0Shev     case LoongArch::XVLD:
509b2e69f52Shev     case LoongArch::ST_B:
510b2e69f52Shev     case LoongArch::ST_H:
511b2e69f52Shev     case LoongArch::ST_W:
512b2e69f52Shev     case LoongArch::ST_D:
513b2e69f52Shev     case LoongArch::STPTR_W:
514b2e69f52Shev     case LoongArch::STPTR_D:
515b2e69f52Shev     case LoongArch::FST_S:
516985d64b0Shev     case LoongArch::FST_D:
517985d64b0Shev     case LoongArch::VST:
518985d64b0Shev     case LoongArch::XVST: {
519b2e69f52Shev       if (UseMI.getOperand(1).isFI())
520b2e69f52Shev         return false;
521b2e69f52Shev       // Register defined by Lo should not be the value register.
522b2e69f52Shev       if (DestReg == UseMI.getOperand(0).getReg())
523b2e69f52Shev         return false;
524b2e69f52Shev       assert(DestReg == UseMI.getOperand(1).getReg() &&
525b2e69f52Shev              "Expected base address use");
526b2e69f52Shev       // All load/store instructions must use the same offset.
527b2e69f52Shev       int64_t Offset = UseMI.getOperand(2).getImm();
528b2e69f52Shev       if (CommonOffset && Offset != CommonOffset)
529b2e69f52Shev         return false;
530b2e69f52Shev       CommonOffset = Offset;
531b2e69f52Shev       break;
532b2e69f52Shev     }
533b2e69f52Shev     case LoongArch::INLINEASM:
534b2e69f52Shev     case LoongArch::INLINEASM_BR: {
535b2e69f52Shev       // We can't do this for large pattern.
536b2e69f52Shev       if (Last)
537b2e69f52Shev         return false;
538b2e69f52Shev       SmallVector<unsigned> InlineAsmMemoryOpIndexes;
539b2e69f52Shev       unsigned NumOps = 0;
540b2e69f52Shev       for (unsigned I = InlineAsm::MIOp_FirstOperand;
541b2e69f52Shev            I < UseMI.getNumOperands(); I += 1 + NumOps) {
542b2e69f52Shev         const MachineOperand &FlagsMO = UseMI.getOperand(I);
543b2e69f52Shev         // Should be an imm.
544b2e69f52Shev         if (!FlagsMO.isImm())
545b2e69f52Shev           continue;
546b2e69f52Shev 
547b2e69f52Shev         const InlineAsm::Flag Flags(FlagsMO.getImm());
548b2e69f52Shev         NumOps = Flags.getNumOperandRegisters();
549b2e69f52Shev 
550b2e69f52Shev         // Memory constraints have two operands.
551b2e69f52Shev         if (NumOps != 2 || !Flags.isMemKind()) {
552b2e69f52Shev           // If the register is used by something other than a memory contraint,
553b2e69f52Shev           // we should not fold.
554b2e69f52Shev           for (unsigned J = 0; J < NumOps; ++J) {
555b2e69f52Shev             const MachineOperand &MO = UseMI.getOperand(I + 1 + J);
556b2e69f52Shev             if (MO.isReg() && MO.getReg() == DestReg)
557b2e69f52Shev               return false;
558b2e69f52Shev           }
559b2e69f52Shev           continue;
560b2e69f52Shev         }
561b2e69f52Shev 
562b2e69f52Shev         // We can only do this for constraint m.
563b2e69f52Shev         if (Flags.getMemoryConstraintID() != InlineAsm::ConstraintCode::m)
564b2e69f52Shev           return false;
565b2e69f52Shev 
566b2e69f52Shev         const MachineOperand &AddrMO = UseMI.getOperand(I + 1);
567b2e69f52Shev         if (!AddrMO.isReg() || AddrMO.getReg() != DestReg)
568b2e69f52Shev           continue;
569b2e69f52Shev 
570b2e69f52Shev         const MachineOperand &OffsetMO = UseMI.getOperand(I + 2);
571b2e69f52Shev         if (!OffsetMO.isImm())
572b2e69f52Shev           continue;
573b2e69f52Shev 
574b2e69f52Shev         // All inline asm memory operands must use the same offset.
575b2e69f52Shev         int64_t Offset = OffsetMO.getImm();
576b2e69f52Shev         if (CommonOffset && Offset != CommonOffset)
577b2e69f52Shev           return false;
578b2e69f52Shev         CommonOffset = Offset;
579b2e69f52Shev         InlineAsmMemoryOpIndexes.push_back(I + 1);
580b2e69f52Shev       }
581b2e69f52Shev       InlineAsmMemoryOpIndexesMap.insert(
582b2e69f52Shev           std::make_pair(&UseMI, InlineAsmMemoryOpIndexes));
583b2e69f52Shev       break;
584b2e69f52Shev     }
585b2e69f52Shev     }
586b2e69f52Shev   }
587b2e69f52Shev 
588b2e69f52Shev   // We found a common offset.
589b2e69f52Shev   // Update the offsets in global address lowering.
590b2e69f52Shev   // We may have already folded some arithmetic so we need to add to any
591b2e69f52Shev   // existing offset.
592b2e69f52Shev   int64_t NewOffset = Hi20.getOperand(1).getOffset() + *CommonOffset;
593b2e69f52Shev   // LA32 ignores the upper 32 bits.
594b2e69f52Shev   if (!ST->is64Bit())
595b2e69f52Shev     NewOffset = SignExtend64<32>(NewOffset);
596b2e69f52Shev   // We can only fold simm32 offsets.
597b2e69f52Shev   if (!isInt<32>(NewOffset))
598b2e69f52Shev     return false;
599b2e69f52Shev 
600*0288d065SZhaoQi   // If optimized by this pass successfully, MO_RELAX bitmask target-flag should
601*0288d065SZhaoQi   // be removed from the code sequence.
602*0288d065SZhaoQi   //
603*0288d065SZhaoQi   // For example:
604*0288d065SZhaoQi   //   pcalau12i $a0, %pc_hi20(symbol)
605*0288d065SZhaoQi   //   addi.d $a0, $a0, %pc_lo12(symbol)
606*0288d065SZhaoQi   //   ld.w $a0, $a0, 0
607*0288d065SZhaoQi   //
608*0288d065SZhaoQi   //   =>
609*0288d065SZhaoQi   //
610*0288d065SZhaoQi   //   pcalau12i $a0, %pc_hi20(symbol)
611*0288d065SZhaoQi   //   ld.w $a0, $a0, %pc_lo12(symbol)
612*0288d065SZhaoQi   //
613*0288d065SZhaoQi   // Code sequence optimized before can be relax by linker. But after being
614*0288d065SZhaoQi   // optimized, it cannot be relaxed any more. So MO_RELAX flag should not be
615*0288d065SZhaoQi   // carried by them.
616b2e69f52Shev   Hi20.getOperand(1).setOffset(NewOffset);
617*0288d065SZhaoQi   Hi20.getOperand(1).setTargetFlags(
618*0288d065SZhaoQi       LoongArchII::getDirectFlags(Hi20.getOperand(1)));
619b2e69f52Shev   MachineOperand &ImmOp = Lo12.getOperand(2);
620b2e69f52Shev   ImmOp.setOffset(NewOffset);
621*0288d065SZhaoQi   ImmOp.setTargetFlags(LoongArchII::getDirectFlags(ImmOp));
622b2e69f52Shev   if (Lo20 && Hi12) {
623b2e69f52Shev     Lo20->getOperand(2).setOffset(NewOffset);
624b2e69f52Shev     Hi12->getOperand(2).setOffset(NewOffset);
625b2e69f52Shev   }
626b2e69f52Shev 
627b2e69f52Shev   // Update the immediate in the load/store instructions to add the offset.
628b2e69f52Shev   const LoongArchInstrInfo &TII = *ST->getInstrInfo();
629b2e69f52Shev   for (MachineInstr &UseMI :
630b2e69f52Shev        llvm::make_early_inc_range(MRI->use_instructions(DestReg))) {
631b2e69f52Shev     if (UseMI.getOpcode() == LoongArch::INLINEASM ||
632b2e69f52Shev         UseMI.getOpcode() == LoongArch::INLINEASM_BR) {
633b2e69f52Shev       auto &InlineAsmMemoryOpIndexes = InlineAsmMemoryOpIndexesMap[&UseMI];
634b2e69f52Shev       for (unsigned I : InlineAsmMemoryOpIndexes) {
635b2e69f52Shev         MachineOperand &MO = UseMI.getOperand(I + 1);
636b2e69f52Shev         switch (ImmOp.getType()) {
637b2e69f52Shev         case MachineOperand::MO_GlobalAddress:
638b2e69f52Shev           MO.ChangeToGA(ImmOp.getGlobal(), ImmOp.getOffset(),
639*0288d065SZhaoQi                         LoongArchII::getDirectFlags(ImmOp));
640b2e69f52Shev           break;
641b2e69f52Shev         case MachineOperand::MO_MCSymbol:
642*0288d065SZhaoQi           MO.ChangeToMCSymbol(ImmOp.getMCSymbol(),
643*0288d065SZhaoQi                               LoongArchII::getDirectFlags(ImmOp));
644b2e69f52Shev           MO.setOffset(ImmOp.getOffset());
645b2e69f52Shev           break;
646b2e69f52Shev         case MachineOperand::MO_BlockAddress:
647b2e69f52Shev           MO.ChangeToBA(ImmOp.getBlockAddress(), ImmOp.getOffset(),
648*0288d065SZhaoQi                         LoongArchII::getDirectFlags(ImmOp));
649b2e69f52Shev           break;
650b2e69f52Shev         default:
651b2e69f52Shev           report_fatal_error("unsupported machine operand type");
652b2e69f52Shev           break;
653b2e69f52Shev         }
654b2e69f52Shev       }
655b2e69f52Shev     } else {
656b2e69f52Shev       UseMI.setDesc(TII.get(getNewOpc(UseMI.getOpcode(), Last)));
657b2e69f52Shev       if (Last) {
658b2e69f52Shev         UseMI.removeOperand(2);
659b2e69f52Shev         UseMI.removeOperand(1);
660b2e69f52Shev         UseMI.addOperand(Last->getOperand(1));
661b2e69f52Shev         UseMI.addOperand(Last->getOperand(2));
662b2e69f52Shev         UseMI.getOperand(1).setIsKill(false);
663b2e69f52Shev         UseMI.getOperand(2).setIsKill(false);
664b2e69f52Shev       } else {
665b2e69f52Shev         UseMI.removeOperand(2);
666b2e69f52Shev         UseMI.addOperand(ImmOp);
667b2e69f52Shev       }
668b2e69f52Shev     }
669b2e69f52Shev   }
670b2e69f52Shev 
671b2e69f52Shev   if (Last) {
672b2e69f52Shev     Last->eraseFromParent();
673b2e69f52Shev     return true;
674b2e69f52Shev   }
675b2e69f52Shev 
676b2e69f52Shev   MRI->replaceRegWith(Lo12.getOperand(0).getReg(), Hi20.getOperand(0).getReg());
677b2e69f52Shev   Lo12.eraseFromParent();
678b2e69f52Shev   return true;
679b2e69f52Shev }
680b2e69f52Shev 
681b2e69f52Shev bool LoongArchMergeBaseOffsetOpt::runOnMachineFunction(MachineFunction &Fn) {
682b2e69f52Shev   if (skipFunction(Fn.getFunction()))
683b2e69f52Shev     return false;
684b2e69f52Shev 
685b2e69f52Shev   ST = &Fn.getSubtarget<LoongArchSubtarget>();
686b2e69f52Shev 
687b2e69f52Shev   bool MadeChange = false;
688b2e69f52Shev   MRI = &Fn.getRegInfo();
689b2e69f52Shev   for (MachineBasicBlock &MBB : Fn) {
690b2e69f52Shev     LLVM_DEBUG(dbgs() << "MBB: " << MBB.getName() << "\n");
691b2e69f52Shev     for (MachineInstr &Hi20 : MBB) {
692b2e69f52Shev       MachineInstr *Lo12 = nullptr;
693b2e69f52Shev       MachineInstr *Lo20 = nullptr;
694b2e69f52Shev       MachineInstr *Hi12 = nullptr;
695b2e69f52Shev       MachineInstr *Last = nullptr;
696b2e69f52Shev       if (!detectFoldable(Hi20, Lo12, Lo20, Hi12, Last))
697b2e69f52Shev         continue;
698b2e69f52Shev       MadeChange |= detectAndFoldOffset(Hi20, *Lo12, Lo20, Hi12, Last);
699b2e69f52Shev       MadeChange |= foldIntoMemoryOps(Hi20, *Lo12, Lo20, Hi12, Last);
700b2e69f52Shev     }
701b2e69f52Shev   }
702b2e69f52Shev 
703b2e69f52Shev   return MadeChange;
704b2e69f52Shev }
705b2e69f52Shev 
706b2e69f52Shev /// Returns an instance of the Merge Base Offset Optimization pass.
707b2e69f52Shev FunctionPass *llvm::createLoongArchMergeBaseOffsetOptPass() {
708b2e69f52Shev   return new LoongArchMergeBaseOffsetOpt();
709b2e69f52Shev }
710