1 //===----- RISCVMergeBaseOffset.cpp - Optimise address calculations ------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Merge the offset of address calculation into the offset field
10 // of instructions in a global address lowering sequence.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "RISCV.h"
15 #include "RISCVTargetMachine.h"
16 #include "llvm/CodeGen/MachineFunctionPass.h"
17 #include "llvm/CodeGen/Passes.h"
18 #include "llvm/MC/TargetRegistry.h"
19 #include "llvm/Support/Debug.h"
20 #include "llvm/Target/TargetOptions.h"
21 #include <optional>
22 #include <set>
23 using namespace llvm;
24
25 #define DEBUG_TYPE "riscv-merge-base-offset"
26 #define RISCV_MERGE_BASE_OFFSET_NAME "RISCV Merge Base Offset"
27 namespace {
28
29 struct RISCVMergeBaseOffsetOpt : public MachineFunctionPass {
30 private:
31 const RISCVSubtarget *ST = nullptr;
32
33 public:
34 static char ID;
35 bool runOnMachineFunction(MachineFunction &Fn) override;
36 bool detectFoldable(MachineInstr &Hi, MachineInstr *&Lo);
37
38 bool detectAndFoldOffset(MachineInstr &Hi, MachineInstr &Lo);
39 void foldOffset(MachineInstr &Hi, MachineInstr &Lo, MachineInstr &Tail,
40 int64_t Offset);
41 bool foldLargeOffset(MachineInstr &Hi, MachineInstr &Lo,
42 MachineInstr &TailAdd, Register GSReg);
43 bool foldShiftedOffset(MachineInstr &Hi, MachineInstr &Lo,
44 MachineInstr &TailShXAdd, Register GSReg);
45
46 bool foldIntoMemoryOps(MachineInstr &Hi, MachineInstr &Lo);
47
RISCVMergeBaseOffsetOpt__anon270105cc0111::RISCVMergeBaseOffsetOpt48 RISCVMergeBaseOffsetOpt() : MachineFunctionPass(ID) {}
49
getRequiredProperties__anon270105cc0111::RISCVMergeBaseOffsetOpt50 MachineFunctionProperties getRequiredProperties() const override {
51 return MachineFunctionProperties().set(
52 MachineFunctionProperties::Property::IsSSA);
53 }
54
getAnalysisUsage__anon270105cc0111::RISCVMergeBaseOffsetOpt55 void getAnalysisUsage(AnalysisUsage &AU) const override {
56 AU.setPreservesCFG();
57 MachineFunctionPass::getAnalysisUsage(AU);
58 }
59
getPassName__anon270105cc0111::RISCVMergeBaseOffsetOpt60 StringRef getPassName() const override {
61 return RISCV_MERGE_BASE_OFFSET_NAME;
62 }
63
64 private:
65 MachineRegisterInfo *MRI;
66 };
67 } // end anonymous namespace
68
69 char RISCVMergeBaseOffsetOpt::ID = 0;
INITIALIZE_PASS(RISCVMergeBaseOffsetOpt,DEBUG_TYPE,RISCV_MERGE_BASE_OFFSET_NAME,false,false)70 INITIALIZE_PASS(RISCVMergeBaseOffsetOpt, DEBUG_TYPE,
71 RISCV_MERGE_BASE_OFFSET_NAME, false, false)
72
73 // Detect either of the patterns:
74 //
75 // 1. (medlow pattern):
76 // lui vreg1, %hi(s)
77 // addi vreg2, vreg1, %lo(s)
78 //
79 // 2. (medany pattern):
80 // .Lpcrel_hi1:
81 // auipc vreg1, %pcrel_hi(s)
82 // addi vreg2, vreg1, %pcrel_lo(.Lpcrel_hi1)
83 //
84 // The pattern is only accepted if:
85 // 1) The first instruction has only one use, which is the ADDI.
86 // 2) The address operands have the appropriate type, reflecting the
87 // lowering of a global address or constant pool using medlow or medany.
88 // 3) The offset value in the Global Address or Constant Pool is 0.
89 bool RISCVMergeBaseOffsetOpt::detectFoldable(MachineInstr &Hi,
90 MachineInstr *&Lo) {
91 if (Hi.getOpcode() != RISCV::LUI && Hi.getOpcode() != RISCV::AUIPC)
92 return false;
93
94 const MachineOperand &HiOp1 = Hi.getOperand(1);
95 unsigned ExpectedFlags =
96 Hi.getOpcode() == RISCV::AUIPC ? RISCVII::MO_PCREL_HI : RISCVII::MO_HI;
97 if (HiOp1.getTargetFlags() != ExpectedFlags)
98 return false;
99
100 if (!(HiOp1.isGlobal() || HiOp1.isCPI()) || HiOp1.getOffset() != 0)
101 return false;
102
103 Register HiDestReg = Hi.getOperand(0).getReg();
104 if (!MRI->hasOneUse(HiDestReg))
105 return false;
106
107 Lo = &*MRI->use_instr_begin(HiDestReg);
108 if (Lo->getOpcode() != RISCV::ADDI)
109 return false;
110
111 const MachineOperand &LoOp2 = Lo->getOperand(2);
112 if (Hi.getOpcode() == RISCV::LUI) {
113 if (LoOp2.getTargetFlags() != RISCVII::MO_LO ||
114 !(LoOp2.isGlobal() || LoOp2.isCPI()) || LoOp2.getOffset() != 0)
115 return false;
116 } else {
117 assert(Hi.getOpcode() == RISCV::AUIPC);
118 if (LoOp2.getTargetFlags() != RISCVII::MO_PCREL_LO ||
119 LoOp2.getType() != MachineOperand::MO_MCSymbol)
120 return false;
121 }
122
123 if (HiOp1.isGlobal()) {
124 LLVM_DEBUG(dbgs() << " Found lowered global address: "
125 << *HiOp1.getGlobal() << "\n");
126 } else {
127 assert(HiOp1.isCPI());
128 LLVM_DEBUG(dbgs() << " Found lowered constant pool: " << HiOp1.getIndex()
129 << "\n");
130 }
131
132 return true;
133 }
134
135 // Update the offset in Hi and Lo instructions.
136 // Delete the tail instruction and update all the uses to use the
137 // output from Lo.
foldOffset(MachineInstr & Hi,MachineInstr & Lo,MachineInstr & Tail,int64_t Offset)138 void RISCVMergeBaseOffsetOpt::foldOffset(MachineInstr &Hi, MachineInstr &Lo,
139 MachineInstr &Tail, int64_t Offset) {
140 assert(isInt<32>(Offset) && "Unexpected offset");
141 // Put the offset back in Hi and the Lo
142 Hi.getOperand(1).setOffset(Offset);
143 if (Hi.getOpcode() != RISCV::AUIPC)
144 Lo.getOperand(2).setOffset(Offset);
145 // Delete the tail instruction.
146 MRI->replaceRegWith(Tail.getOperand(0).getReg(), Lo.getOperand(0).getReg());
147 Tail.eraseFromParent();
148 LLVM_DEBUG(dbgs() << " Merged offset " << Offset << " into base.\n"
149 << " " << Hi << " " << Lo;);
150 }
151
152 // Detect patterns for large offsets that are passed into an ADD instruction.
153 // If the pattern is found, updates the offset in Hi and Lo instructions
154 // and deletes TailAdd and the instructions that produced the offset.
155 //
156 // Base address lowering is of the form:
157 // Hi: lui vreg1, %hi(s)
158 // Lo: addi vreg2, vreg1, %lo(s)
159 // / \
160 // / \
161 // / \
162 // / The large offset can be of two forms: \
163 // 1) Offset that has non zero bits in lower 2) Offset that has non zero
164 // 12 bits and upper 20 bits bits in upper 20 bits only
165 // OffseLUI: lui vreg3, 4
166 // OffsetTail: addi voff, vreg3, 188 OffsetTail: lui voff, 128
167 // \ /
168 // \ /
169 // \ /
170 // \ /
171 // TailAdd: add vreg4, vreg2, voff
foldLargeOffset(MachineInstr & Hi,MachineInstr & Lo,MachineInstr & TailAdd,Register GAReg)172 bool RISCVMergeBaseOffsetOpt::foldLargeOffset(MachineInstr &Hi,
173 MachineInstr &Lo,
174 MachineInstr &TailAdd,
175 Register GAReg) {
176 assert((TailAdd.getOpcode() == RISCV::ADD) && "Expected ADD instruction!");
177 Register Rs = TailAdd.getOperand(1).getReg();
178 Register Rt = TailAdd.getOperand(2).getReg();
179 Register Reg = Rs == GAReg ? Rt : Rs;
180
181 // Can't fold if the register has more than one use.
182 if (!MRI->hasOneUse(Reg))
183 return false;
184 // This can point to an ADDI(W) or a LUI:
185 MachineInstr &OffsetTail = *MRI->getVRegDef(Reg);
186 if (OffsetTail.getOpcode() == RISCV::ADDI ||
187 OffsetTail.getOpcode() == RISCV::ADDIW) {
188 // The offset value has non zero bits in both %hi and %lo parts.
189 // Detect an ADDI that feeds from a LUI instruction.
190 MachineOperand &AddiImmOp = OffsetTail.getOperand(2);
191 if (AddiImmOp.getTargetFlags() != RISCVII::MO_None)
192 return false;
193 int64_t OffLo = AddiImmOp.getImm();
194 MachineInstr &OffsetLui =
195 *MRI->getVRegDef(OffsetTail.getOperand(1).getReg());
196 MachineOperand &LuiImmOp = OffsetLui.getOperand(1);
197 if (OffsetLui.getOpcode() != RISCV::LUI ||
198 LuiImmOp.getTargetFlags() != RISCVII::MO_None ||
199 !MRI->hasOneUse(OffsetLui.getOperand(0).getReg()))
200 return false;
201 int64_t Offset = SignExtend64<32>(LuiImmOp.getImm() << 12);
202 Offset += OffLo;
203 // RV32 ignores the upper 32 bits. ADDIW sign extends the result.
204 if (!ST->is64Bit() || OffsetTail.getOpcode() == RISCV::ADDIW)
205 Offset = SignExtend64<32>(Offset);
206 // We can only fold simm32 offsets.
207 if (!isInt<32>(Offset))
208 return false;
209 LLVM_DEBUG(dbgs() << " Offset Instrs: " << OffsetTail
210 << " " << OffsetLui);
211 foldOffset(Hi, Lo, TailAdd, Offset);
212 OffsetTail.eraseFromParent();
213 OffsetLui.eraseFromParent();
214 return true;
215 } else if (OffsetTail.getOpcode() == RISCV::LUI) {
216 // The offset value has all zero bits in the lower 12 bits. Only LUI
217 // exists.
218 LLVM_DEBUG(dbgs() << " Offset Instr: " << OffsetTail);
219 int64_t Offset = SignExtend64<32>(OffsetTail.getOperand(1).getImm() << 12);
220 foldOffset(Hi, Lo, TailAdd, Offset);
221 OffsetTail.eraseFromParent();
222 return true;
223 }
224 return false;
225 }
226
227 // Detect patterns for offsets that are passed into a SHXADD instruction.
228 // The offset has 1, 2, or 3 trailing zeros and fits in simm13, simm14, simm15.
229 // The constant is created with addi voff, x0, C, and shXadd is used to
230 // fill insert the trailing zeros and do the addition.
231 // If the pattern is found, updates the offset in Hi and Lo instructions
232 // and deletes TailShXAdd and the instructions that produced the offset.
233 //
234 // Hi: lui vreg1, %hi(s)
235 // Lo: addi vreg2, vreg1, %lo(s)
236 // OffsetTail: addi voff, x0, C
237 // TailAdd: shXadd vreg4, voff, vreg2
foldShiftedOffset(MachineInstr & Hi,MachineInstr & Lo,MachineInstr & TailShXAdd,Register GAReg)238 bool RISCVMergeBaseOffsetOpt::foldShiftedOffset(MachineInstr &Hi,
239 MachineInstr &Lo,
240 MachineInstr &TailShXAdd,
241 Register GAReg) {
242 assert((TailShXAdd.getOpcode() == RISCV::SH1ADD ||
243 TailShXAdd.getOpcode() == RISCV::SH2ADD ||
244 TailShXAdd.getOpcode() == RISCV::SH3ADD) &&
245 "Expected SHXADD instruction!");
246
247 // The first source is the shifted operand.
248 Register Rs1 = TailShXAdd.getOperand(1).getReg();
249
250 if (GAReg != TailShXAdd.getOperand(2).getReg())
251 return false;
252
253 // Can't fold if the register has more than one use.
254 if (!MRI->hasOneUse(Rs1))
255 return false;
256 // This can point to an ADDI X0, C.
257 MachineInstr &OffsetTail = *MRI->getVRegDef(Rs1);
258 if (OffsetTail.getOpcode() != RISCV::ADDI)
259 return false;
260 if (!OffsetTail.getOperand(1).isReg() ||
261 OffsetTail.getOperand(1).getReg() != RISCV::X0 ||
262 !OffsetTail.getOperand(2).isImm())
263 return false;
264
265 int64_t Offset = OffsetTail.getOperand(2).getImm();
266 assert(isInt<12>(Offset) && "Unexpected offset");
267
268 unsigned ShAmt;
269 switch (TailShXAdd.getOpcode()) {
270 default: llvm_unreachable("Unexpected opcode");
271 case RISCV::SH1ADD: ShAmt = 1; break;
272 case RISCV::SH2ADD: ShAmt = 2; break;
273 case RISCV::SH3ADD: ShAmt = 3; break;
274 }
275
276 Offset = (uint64_t)Offset << ShAmt;
277
278 LLVM_DEBUG(dbgs() << " Offset Instr: " << OffsetTail);
279 foldOffset(Hi, Lo, TailShXAdd, Offset);
280 OffsetTail.eraseFromParent();
281 return true;
282 }
283
detectAndFoldOffset(MachineInstr & Hi,MachineInstr & Lo)284 bool RISCVMergeBaseOffsetOpt::detectAndFoldOffset(MachineInstr &Hi,
285 MachineInstr &Lo) {
286 Register DestReg = Lo.getOperand(0).getReg();
287
288 // Look for arithmetic instructions we can get an offset from.
289 // We might be able to remove the arithmetic instructions by folding the
290 // offset into the LUI+ADDI.
291 if (!MRI->hasOneUse(DestReg))
292 return false;
293
294 // Lo has only one use.
295 MachineInstr &Tail = *MRI->use_instr_begin(DestReg);
296 switch (Tail.getOpcode()) {
297 default:
298 LLVM_DEBUG(dbgs() << "Don't know how to get offset from this instr:"
299 << Tail);
300 break;
301 case RISCV::ADDI: {
302 // Offset is simply an immediate operand.
303 int64_t Offset = Tail.getOperand(2).getImm();
304
305 // We might have two ADDIs in a row.
306 Register TailDestReg = Tail.getOperand(0).getReg();
307 if (MRI->hasOneUse(TailDestReg)) {
308 MachineInstr &TailTail = *MRI->use_instr_begin(TailDestReg);
309 if (TailTail.getOpcode() == RISCV::ADDI) {
310 Offset += TailTail.getOperand(2).getImm();
311 LLVM_DEBUG(dbgs() << " Offset Instrs: " << Tail << TailTail);
312 foldOffset(Hi, Lo, TailTail, Offset);
313 Tail.eraseFromParent();
314 return true;
315 }
316 }
317
318 LLVM_DEBUG(dbgs() << " Offset Instr: " << Tail);
319 foldOffset(Hi, Lo, Tail, Offset);
320 return true;
321 }
322 case RISCV::ADD:
323 // The offset is too large to fit in the immediate field of ADDI.
324 // This can be in two forms:
325 // 1) LUI hi_Offset followed by:
326 // ADDI lo_offset
327 // This happens in case the offset has non zero bits in
328 // both hi 20 and lo 12 bits.
329 // 2) LUI (offset20)
330 // This happens in case the lower 12 bits of the offset are zeros.
331 return foldLargeOffset(Hi, Lo, Tail, DestReg);
332 case RISCV::SH1ADD:
333 case RISCV::SH2ADD:
334 case RISCV::SH3ADD:
335 // The offset is too large to fit in the immediate field of ADDI.
336 // It may be encoded as (SH2ADD (ADDI X0, C), DestReg) or
337 // (SH3ADD (ADDI X0, C), DestReg).
338 return foldShiftedOffset(Hi, Lo, Tail, DestReg);
339 }
340
341 return false;
342 }
343
foldIntoMemoryOps(MachineInstr & Hi,MachineInstr & Lo)344 bool RISCVMergeBaseOffsetOpt::foldIntoMemoryOps(MachineInstr &Hi,
345 MachineInstr &Lo) {
346 Register DestReg = Lo.getOperand(0).getReg();
347
348 // If all the uses are memory ops with the same offset, we can transform:
349 //
350 // 1. (medlow pattern):
351 // Hi: lui vreg1, %hi(foo) ---> lui vreg1, %hi(foo+8)
352 // Lo: addi vreg2, vreg1, %lo(foo) ---> lw vreg3, lo(foo+8)(vreg1)
353 // Tail: lw vreg3, 8(vreg2)
354 //
355 // 2. (medany pattern):
356 // Hi: 1:auipc vreg1, %pcrel_hi(s) ---> auipc vreg1, %pcrel_hi(foo+8)
357 // Lo: addi vreg2, vreg1, %pcrel_lo(1b) ---> lw vreg3, %pcrel_lo(1b)(vreg1)
358 // Tail: lw vreg3, 8(vreg2)
359
360 std::optional<int64_t> CommonOffset;
361 for (const MachineInstr &UseMI : MRI->use_instructions(DestReg)) {
362 switch (UseMI.getOpcode()) {
363 default:
364 LLVM_DEBUG(dbgs() << "Not a load or store instruction: " << UseMI);
365 return false;
366 case RISCV::LB:
367 case RISCV::LH:
368 case RISCV::LW:
369 case RISCV::LBU:
370 case RISCV::LHU:
371 case RISCV::LWU:
372 case RISCV::LD:
373 case RISCV::FLH:
374 case RISCV::FLW:
375 case RISCV::FLD:
376 case RISCV::SB:
377 case RISCV::SH:
378 case RISCV::SW:
379 case RISCV::SD:
380 case RISCV::FSH:
381 case RISCV::FSW:
382 case RISCV::FSD: {
383 if (UseMI.getOperand(1).isFI())
384 return false;
385 // Register defined by Lo should not be the value register.
386 if (DestReg == UseMI.getOperand(0).getReg())
387 return false;
388 assert(DestReg == UseMI.getOperand(1).getReg() &&
389 "Expected base address use");
390 // All load/store instructions must use the same offset.
391 int64_t Offset = UseMI.getOperand(2).getImm();
392 if (CommonOffset && Offset != CommonOffset)
393 return false;
394 CommonOffset = Offset;
395 }
396 }
397 }
398
399 // We found a common offset.
400 // Update the offsets in global address lowering.
401 // We may have already folded some arithmetic so we need to add to any
402 // existing offset.
403 int64_t NewOffset = Hi.getOperand(1).getOffset() + *CommonOffset;
404 // RV32 ignores the upper 32 bits.
405 if (!ST->is64Bit())
406 NewOffset = SignExtend64<32>(NewOffset);
407 // We can only fold simm32 offsets.
408 if (!isInt<32>(NewOffset))
409 return false;
410
411 Hi.getOperand(1).setOffset(NewOffset);
412 MachineOperand &ImmOp = Lo.getOperand(2);
413 if (Hi.getOpcode() != RISCV::AUIPC)
414 ImmOp.setOffset(NewOffset);
415
416 // Update the immediate in the load/store instructions to add the offset.
417 for (MachineInstr &UseMI :
418 llvm::make_early_inc_range(MRI->use_instructions(DestReg))) {
419 UseMI.removeOperand(2);
420 UseMI.addOperand(ImmOp);
421 // Update the base reg in the Tail instruction to feed from LUI.
422 // Output of Hi is only used in Lo, no need to use MRI->replaceRegWith().
423 UseMI.getOperand(1).setReg(Hi.getOperand(0).getReg());
424 }
425
426 Lo.eraseFromParent();
427 return true;
428 }
429
runOnMachineFunction(MachineFunction & Fn)430 bool RISCVMergeBaseOffsetOpt::runOnMachineFunction(MachineFunction &Fn) {
431 if (skipFunction(Fn.getFunction()))
432 return false;
433
434 ST = &Fn.getSubtarget<RISCVSubtarget>();
435
436 bool MadeChange = false;
437 MRI = &Fn.getRegInfo();
438 for (MachineBasicBlock &MBB : Fn) {
439 LLVM_DEBUG(dbgs() << "MBB: " << MBB.getName() << "\n");
440 for (MachineInstr &Hi : MBB) {
441 MachineInstr *Lo = nullptr;
442 if (!detectFoldable(Hi, Lo))
443 continue;
444 MadeChange |= detectAndFoldOffset(Hi, *Lo);
445 MadeChange |= foldIntoMemoryOps(Hi, *Lo);
446 }
447 }
448
449 return MadeChange;
450 }
451
452 /// Returns an instance of the Merge Base Offset Optimization pass.
createRISCVMergeBaseOffsetOptPass()453 FunctionPass *llvm::createRISCVMergeBaseOffsetOptPass() {
454 return new RISCVMergeBaseOffsetOpt();
455 }
456