xref: /llvm-project/llvm/lib/Target/RISCV/RISCVMakeCompressible.cpp (revision bc91f3cdd57cbe4b0a456626f52960158cb3232f)
1 //===-- RISCVMakeCompressible.cpp - Make more instructions compressible ---===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass searches for instructions that are prevented from being compressed
10 // by one of the following:
11 //
12 //   1. The use of a single uncompressed register.
13 //   2. A base register + offset where the offset is too large to be compressed
14 //   and the base register may or may not be compressed.
15 //
16 //
17 // For case 1, if a compressed register is available, then the uncompressed
18 // register is copied to the compressed register and its uses are replaced.
19 //
20 // For example, storing zero uses the uncompressible zero register:
21 //   sw zero, 0(a0)   # if zero
22 //   sw zero, 8(a0)   # if zero
23 //   sw zero, 4(a0)   # if zero
24 //   sw zero, 24(a0)   # if zero
25 //
26 // If a compressed register (e.g. a1) is available, the above can be transformed
27 // to the following to improve code size:
28 //   li a1, 0
29 //   c.sw a1, 0(a0)
30 //   c.sw a1, 8(a0)
31 //   c.sw a1, 4(a0)
32 //   c.sw a1, 24(a0)
33 //
34 //
35 // For case 2, if a compressed register is available, then the original base
36 // is copied and adjusted such that:
37 //
38 //   new_base_register = base_register + adjustment
39 //   base_register + large_offset = new_base_register + small_offset
40 //
41 // For example, the following offsets are too large for c.sw:
42 //   lui a2, 983065
43 //   sw  a1, -236(a2)
44 //   sw  a1, -240(a2)
45 //   sw  a1, -244(a2)
46 //   sw  a1, -248(a2)
47 //   sw  a1, -252(a2)
48 //   sw  a0, -256(a2)
49 //
50 // If a compressed register is available (e.g. a3), a new base could be created
51 // such that the addresses can accessed with a compressible offset, thus
52 // improving code size:
53 //   lui a2, 983065
54 //   addi  a3, a2, -256
55 //   c.sw  a1, 20(a3)
56 //   c.sw  a1, 16(a3)
57 //   c.sw  a1, 12(a3)
58 //   c.sw  a1, 8(a3)
59 //   c.sw  a1, 4(a3)
60 //   c.sw  a0, 0(a3)
61 //
62 //
63 // This optimization is only applied if there are enough uses of the copied
64 // register for code size to be reduced.
65 //
66 //===----------------------------------------------------------------------===//
67 
68 #include "RISCV.h"
69 #include "RISCVSubtarget.h"
70 #include "llvm/CodeGen/Passes.h"
71 #include "llvm/CodeGen/RegisterScavenging.h"
72 #include "llvm/MC/TargetRegistry.h"
73 #include "llvm/Support/Debug.h"
74 
75 using namespace llvm;
76 
77 #define DEBUG_TYPE "riscv-make-compressible"
78 #define RISCV_COMPRESS_INSTRS_NAME "RISC-V Make Compressible"
79 
80 namespace {
81 
82 struct RISCVMakeCompressibleOpt : public MachineFunctionPass {
83   static char ID;
84 
85   bool runOnMachineFunction(MachineFunction &Fn) override;
86 
87   RISCVMakeCompressibleOpt() : MachineFunctionPass(ID) {}
88 
89   StringRef getPassName() const override { return RISCV_COMPRESS_INSTRS_NAME; }
90 };
91 } // namespace
92 
93 char RISCVMakeCompressibleOpt::ID = 0;
94 INITIALIZE_PASS(RISCVMakeCompressibleOpt, "riscv-make-compressible",
95                 RISCV_COMPRESS_INSTRS_NAME, false, false)
96 
97 // Return log2(widthInBytes) of load/store done by Opcode.
98 static unsigned log2LdstWidth(unsigned Opcode) {
99   switch (Opcode) {
100   default:
101     llvm_unreachable("Unexpected opcode");
102   case RISCV::LBU:
103   case RISCV::SB:
104     return 0;
105   case RISCV::LH:
106   case RISCV::LH_INX:
107   case RISCV::LHU:
108   case RISCV::SH:
109   case RISCV::SH_INX:
110     return 1;
111   case RISCV::LW:
112   case RISCV::LW_INX:
113   case RISCV::SW:
114   case RISCV::SW_INX:
115   case RISCV::FLW:
116   case RISCV::FSW:
117     return 2;
118   case RISCV::LD:
119   case RISCV::SD:
120   case RISCV::FLD:
121   case RISCV::FSD:
122     return 3;
123   }
124 }
125 
126 // Return bit field size of immediate operand of Opcode.
127 static unsigned offsetMask(unsigned Opcode) {
128   switch (Opcode) {
129   default:
130     llvm_unreachable("Unexpected opcode");
131   case RISCV::LBU:
132   case RISCV::SB:
133     return maskTrailingOnes<unsigned>(2U);
134   case RISCV::LH:
135   case RISCV::LH_INX:
136   case RISCV::LHU:
137   case RISCV::SH:
138   case RISCV::SH_INX:
139     return maskTrailingOnes<unsigned>(1U);
140   case RISCV::LW:
141   case RISCV::LW_INX:
142   case RISCV::SW:
143   case RISCV::SW_INX:
144   case RISCV::FLW:
145   case RISCV::FSW:
146   case RISCV::LD:
147   case RISCV::SD:
148   case RISCV::FLD:
149   case RISCV::FSD:
150     return maskTrailingOnes<unsigned>(5U);
151   }
152 }
153 
154 // Return a mask for the offset bits of a non-stack-pointer based compressed
155 // load/store.
156 static uint8_t compressedLDSTOffsetMask(unsigned Opcode) {
157   return offsetMask(Opcode) << log2LdstWidth(Opcode);
158 }
159 
160 // Return true if Offset fits within a compressed stack-pointer based
161 // load/store.
162 static bool compressibleSPOffset(int64_t Offset, unsigned Opcode) {
163   // Compressed sp-based loads and stores only work for 32/64 bits.
164   switch (log2LdstWidth(Opcode)) {
165   case 2:
166     return isShiftedUInt<6, 2>(Offset);
167   case 3:
168     return isShiftedUInt<6, 3>(Offset);
169   }
170   return false;
171 }
172 
173 // Given an offset for a load/store, return the adjustment required to the base
174 // register such that the address can be accessed with a compressible offset.
175 // This will return 0 if the offset is already compressible.
176 static int64_t getBaseAdjustForCompression(int64_t Offset, unsigned Opcode) {
177   // Return the excess bits that do not fit in a compressible offset.
178   return Offset & ~compressedLDSTOffsetMask(Opcode);
179 }
180 
181 // Return true if Reg is in a compressed register class.
182 static bool isCompressedReg(Register Reg) {
183   return RISCV::GPRCRegClass.contains(Reg) ||
184          RISCV::GPRF16CRegClass.contains(Reg) ||
185          RISCV::GPRF32CRegClass.contains(Reg) ||
186          RISCV::FPR32CRegClass.contains(Reg) ||
187          RISCV::FPR64CRegClass.contains(Reg);
188 }
189 
190 // Return true if MI is a load for which there exists a compressed version.
191 static bool isCompressibleLoad(const MachineInstr &MI) {
192   const RISCVSubtarget &STI = MI.getMF()->getSubtarget<RISCVSubtarget>();
193 
194   switch (MI.getOpcode()) {
195   default:
196     return false;
197   case RISCV::LBU:
198   case RISCV::LH:
199   case RISCV::LH_INX:
200   case RISCV::LHU:
201     return STI.hasStdExtZcb();
202   case RISCV::LW:
203   case RISCV::LW_INX:
204   case RISCV::LD:
205     return STI.hasStdExtCOrZca();
206   case RISCV::FLW:
207     return !STI.is64Bit() && STI.hasStdExtCOrZcfOrZce();
208   case RISCV::FLD:
209     return STI.hasStdExtCOrZcd();
210   }
211 }
212 
213 // Return true if MI is a store for which there exists a compressed version.
214 static bool isCompressibleStore(const MachineInstr &MI) {
215   const RISCVSubtarget &STI = MI.getMF()->getSubtarget<RISCVSubtarget>();
216 
217   switch (MI.getOpcode()) {
218   default:
219     return false;
220   case RISCV::SB:
221   case RISCV::SH:
222   case RISCV::SH_INX:
223     return STI.hasStdExtZcb();
224   case RISCV::SW:
225   case RISCV::SW_INX:
226   case RISCV::SD:
227     return STI.hasStdExtCOrZca();
228   case RISCV::FSW:
229     return !STI.is64Bit() && STI.hasStdExtCOrZcfOrZce();
230   case RISCV::FSD:
231     return STI.hasStdExtCOrZcd();
232   }
233 }
234 
235 // Find a single register and/or large offset which, if compressible, would
236 // allow the given instruction to be compressed.
237 //
238 // Possible return values:
239 //
240 //   {Reg, 0}               - Uncompressed Reg needs replacing with a compressed
241 //                            register.
242 //   {Reg, N}               - Reg needs replacing with a compressed register and
243 //                            N needs adding to the new register. (Reg may be
244 //                            compressed or uncompressed).
245 //   {RISCV::NoRegister, 0} - No suitable optimization found for this
246 //   instruction.
247 static RegImmPair getRegImmPairPreventingCompression(const MachineInstr &MI) {
248   const unsigned Opcode = MI.getOpcode();
249 
250   if (isCompressibleLoad(MI) || isCompressibleStore(MI)) {
251     const MachineOperand &MOImm = MI.getOperand(2);
252     if (!MOImm.isImm())
253       return RegImmPair(RISCV::NoRegister, 0);
254 
255     int64_t Offset = MOImm.getImm();
256     int64_t NewBaseAdjust = getBaseAdjustForCompression(Offset, Opcode);
257     Register Base = MI.getOperand(1).getReg();
258 
259     // Memory accesses via the stack pointer do not have a requirement for
260     // either of the registers to be compressible and can take a larger offset.
261     if (RISCV::SPRegClass.contains(Base)) {
262       if (!compressibleSPOffset(Offset, Opcode) && NewBaseAdjust)
263         return RegImmPair(Base, NewBaseAdjust);
264     } else {
265       Register SrcDest = MI.getOperand(0).getReg();
266       bool SrcDestCompressed = isCompressedReg(SrcDest);
267       bool BaseCompressed = isCompressedReg(Base);
268 
269       // If only Base and/or offset prevent compression, then return Base and
270       // any adjustment required to make the offset compressible.
271       if ((!BaseCompressed || NewBaseAdjust) && SrcDestCompressed)
272         return RegImmPair(Base, NewBaseAdjust);
273 
274       // For loads, we can only change the base register since dest is defined
275       // rather than used.
276       //
277       // For stores, we can change SrcDest (and Base if SrcDest == Base) but
278       // cannot resolve an uncompressible offset in this case.
279       if (isCompressibleStore(MI)) {
280         if (!SrcDestCompressed && (BaseCompressed || SrcDest == Base) &&
281             !NewBaseAdjust)
282           return RegImmPair(SrcDest, NewBaseAdjust);
283       }
284     }
285   }
286   return RegImmPair(RISCV::NoRegister, 0);
287 }
288 
289 // Check all uses after FirstMI of the given register, keeping a vector of
290 // instructions that would be compressible if the given register (and offset if
291 // applicable) were compressible.
292 //
293 // If there are enough uses for this optimization to improve code size and a
294 // compressed register is available, return that compressed register.
295 static Register analyzeCompressibleUses(MachineInstr &FirstMI,
296                                         RegImmPair RegImm,
297                                         SmallVectorImpl<MachineInstr *> &MIs) {
298   MachineBasicBlock &MBB = *FirstMI.getParent();
299   const TargetRegisterInfo *TRI =
300       MBB.getParent()->getSubtarget().getRegisterInfo();
301 
302   for (MachineBasicBlock::instr_iterator I = FirstMI.getIterator(),
303                                          E = MBB.instr_end();
304        I != E; ++I) {
305     MachineInstr &MI = *I;
306 
307     // Determine if this is an instruction which would benefit from using the
308     // new register.
309     RegImmPair CandidateRegImm = getRegImmPairPreventingCompression(MI);
310     if (CandidateRegImm.Reg == RegImm.Reg && CandidateRegImm.Imm == RegImm.Imm)
311       MIs.push_back(&MI);
312 
313     // If RegImm.Reg is modified by this instruction, then we cannot optimize
314     // past this instruction. If the register is already compressed, then it may
315     // possible to optimize a large offset in the current instruction - this
316     // will have been detected by the preceeding call to
317     // getRegImmPairPreventingCompression.
318     if (MI.modifiesRegister(RegImm.Reg, TRI))
319       break;
320   }
321 
322   // Adjusting the base costs one new uncompressed addi and therefore three uses
323   // are required for a code size reduction. If no base adjustment is required,
324   // then copying the register costs one new c.mv (or c.li Rd, 0 for "copying"
325   // the zero register) and therefore two uses are required for a code size
326   // reduction.
327   if (MIs.size() < 2 || (RegImm.Imm != 0 && MIs.size() < 3))
328     return RISCV::NoRegister;
329 
330   // Find a compressible register which will be available from the first
331   // instruction we care about to the last.
332   const TargetRegisterClass *RCToScavenge;
333 
334   // Work out the compressed register class from which to scavenge.
335   if (RISCV::GPRRegClass.contains(RegImm.Reg))
336     RCToScavenge = &RISCV::GPRCRegClass;
337   else if (RISCV::GPRF16RegClass.contains(RegImm.Reg))
338     RCToScavenge = &RISCV::GPRF16CRegClass;
339   else if (RISCV::GPRF32RegClass.contains(RegImm.Reg))
340     RCToScavenge = &RISCV::GPRF32CRegClass;
341   else if (RISCV::FPR32RegClass.contains(RegImm.Reg))
342     RCToScavenge = &RISCV::FPR32CRegClass;
343   else if (RISCV::FPR64RegClass.contains(RegImm.Reg))
344     RCToScavenge = &RISCV::FPR64CRegClass;
345   else
346     return RISCV::NoRegister;
347 
348   RegScavenger RS;
349   RS.enterBasicBlockEnd(MBB);
350   RS.backward(std::next(MIs.back()->getIterator()));
351   return RS.scavengeRegisterBackwards(*RCToScavenge, FirstMI.getIterator(),
352                                       /*RestoreAfter=*/false, /*SPAdj=*/0,
353                                       /*AllowSpill=*/false);
354 }
355 
356 // Update uses of the old register in the given instruction to the new register.
357 static void updateOperands(MachineInstr &MI, RegImmPair OldRegImm,
358                            Register NewReg) {
359   unsigned Opcode = MI.getOpcode();
360 
361   // If this pass is extended to support more instructions, the check for
362   // definedness may need to be strengthened.
363   assert((isCompressibleLoad(MI) || isCompressibleStore(MI)) &&
364          "Unsupported instruction for this optimization.");
365 
366   int SkipN = 0;
367 
368   // Skip the first (value) operand to a store instruction (except if the store
369   // offset is zero) in order to avoid an incorrect transformation.
370   // e.g. sd a0, 808(a0) to addi a2, a0, 768; sd a2, 40(a2)
371   if (isCompressibleStore(MI) && OldRegImm.Imm != 0)
372     SkipN = 1;
373 
374   // Update registers
375   for (MachineOperand &MO : drop_begin(MI.operands(), SkipN))
376     if (MO.isReg() && MO.getReg() == OldRegImm.Reg) {
377       // Do not update operands that define the old register.
378       //
379       // The new register was scavenged for the range of instructions that are
380       // being updated, therefore it should not be defined within this range
381       // except possibly in the final instruction.
382       if (MO.isDef()) {
383         assert(isCompressibleLoad(MI));
384         continue;
385       }
386       // Update reg
387       MO.setReg(NewReg);
388     }
389 
390   // Update offset
391   MachineOperand &MOImm = MI.getOperand(2);
392   int64_t NewOffset = MOImm.getImm() & compressedLDSTOffsetMask(Opcode);
393   MOImm.setImm(NewOffset);
394 }
395 
396 bool RISCVMakeCompressibleOpt::runOnMachineFunction(MachineFunction &Fn) {
397   // This is a size optimization.
398   if (skipFunction(Fn.getFunction()) || !Fn.getFunction().hasMinSize())
399     return false;
400 
401   const RISCVSubtarget &STI = Fn.getSubtarget<RISCVSubtarget>();
402   const RISCVInstrInfo &TII = *STI.getInstrInfo();
403 
404   // This optimization only makes sense if compressed instructions are emitted.
405   if (!STI.hasStdExtCOrZca())
406     return false;
407 
408   for (MachineBasicBlock &MBB : Fn) {
409     LLVM_DEBUG(dbgs() << "MBB: " << MBB.getName() << "\n");
410     for (MachineInstr &MI : MBB) {
411       // Determine if this instruction would otherwise be compressed if not for
412       // an uncompressible register or offset.
413       RegImmPair RegImm = getRegImmPairPreventingCompression(MI);
414       if (!RegImm.Reg && RegImm.Imm == 0)
415         continue;
416 
417       // Determine if there is a set of instructions for which replacing this
418       // register with a compressed register (and compressible offset if
419       // applicable) is possible and will allow compression.
420       SmallVector<MachineInstr *, 8> MIs;
421       Register NewReg = analyzeCompressibleUses(MI, RegImm, MIs);
422       if (!NewReg)
423         continue;
424 
425       // Create the appropriate copy and/or offset.
426       if (RISCV::GPRRegClass.contains(RegImm.Reg)) {
427         assert(isInt<12>(RegImm.Imm));
428         BuildMI(MBB, MI, MI.getDebugLoc(), TII.get(RISCV::ADDI), NewReg)
429             .addReg(RegImm.Reg)
430             .addImm(RegImm.Imm);
431       } else if (RISCV::GPRF16RegClass.contains(RegImm.Reg)) {
432         assert(RegImm.Imm == 0);
433         BuildMI(MBB, MI, MI.getDebugLoc(), TII.get(RISCV::PseudoMV_FPR16INX),
434                 NewReg)
435             .addReg(RegImm.Reg);
436       } else if (RISCV::GPRF32RegClass.contains(RegImm.Reg)) {
437         assert(RegImm.Imm == 0);
438         BuildMI(MBB, MI, MI.getDebugLoc(), TII.get(RISCV::PseudoMV_FPR32INX),
439                 NewReg)
440             .addReg(RegImm.Reg);
441       } else {
442         // If we are looking at replacing an FPR register we don't expect to
443         // have any offset. The only compressible FP instructions with an offset
444         // are loads and stores, for which the offset applies to the GPR operand
445         // not the FPR operand.
446         assert(RegImm.Imm == 0);
447         unsigned Opcode = RISCV::FPR32RegClass.contains(RegImm.Reg)
448                               ? RISCV::FSGNJ_S
449                               : RISCV::FSGNJ_D;
450         BuildMI(MBB, MI, MI.getDebugLoc(), TII.get(Opcode), NewReg)
451             .addReg(RegImm.Reg)
452             .addReg(RegImm.Reg);
453       }
454 
455       // Update the set of instructions to use the compressed register and
456       // compressible offset instead. These instructions should now be
457       // compressible.
458       // TODO: Update all uses if RegImm.Imm == 0? Not just those that are
459       // expected to become compressible.
460       for (MachineInstr *UpdateMI : MIs)
461         updateOperands(*UpdateMI, RegImm, NewReg);
462     }
463   }
464   return true;
465 }
466 
467 /// Returns an instance of the Make Compressible Optimization pass.
468 FunctionPass *llvm::createRISCVMakeCompressibleOptPass() {
469   return new RISCVMakeCompressibleOpt();
470 }
471