1 //===-- RISCVMakeCompressible.cpp - Make more instructions compressible ---===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This pass searches for instructions that are prevented from being compressed 10 // by one of the following: 11 // 12 // 1. The use of a single uncompressed register. 13 // 2. A base register + offset where the offset is too large to be compressed 14 // and the base register may or may not be compressed. 15 // 16 // 17 // For case 1, if a compressed register is available, then the uncompressed 18 // register is copied to the compressed register and its uses are replaced. 19 // 20 // For example, storing zero uses the uncompressible zero register: 21 // sw zero, 0(a0) # if zero 22 // sw zero, 8(a0) # if zero 23 // sw zero, 4(a0) # if zero 24 // sw zero, 24(a0) # if zero 25 // 26 // If a compressed register (e.g. a1) is available, the above can be transformed 27 // to the following to improve code size: 28 // li a1, 0 29 // c.sw a1, 0(a0) 30 // c.sw a1, 8(a0) 31 // c.sw a1, 4(a0) 32 // c.sw a1, 24(a0) 33 // 34 // 35 // For case 2, if a compressed register is available, then the original base 36 // is copied and adjusted such that: 37 // 38 // new_base_register = base_register + adjustment 39 // base_register + large_offset = new_base_register + small_offset 40 // 41 // For example, the following offsets are too large for c.sw: 42 // lui a2, 983065 43 // sw a1, -236(a2) 44 // sw a1, -240(a2) 45 // sw a1, -244(a2) 46 // sw a1, -248(a2) 47 // sw a1, -252(a2) 48 // sw a0, -256(a2) 49 // 50 // If a compressed register is available (e.g. a3), a new base could be created 51 // such that the addresses can accessed with a compressible offset, thus 52 // improving code size: 53 // lui a2, 983065 54 // addi a3, a2, -256 55 // c.sw a1, 20(a3) 56 // c.sw a1, 16(a3) 57 // c.sw a1, 12(a3) 58 // c.sw a1, 8(a3) 59 // c.sw a1, 4(a3) 60 // c.sw a0, 0(a3) 61 // 62 // 63 // This optimization is only applied if there are enough uses of the copied 64 // register for code size to be reduced. 65 // 66 //===----------------------------------------------------------------------===// 67 68 #include "RISCV.h" 69 #include "RISCVSubtarget.h" 70 #include "llvm/CodeGen/Passes.h" 71 #include "llvm/CodeGen/RegisterScavenging.h" 72 #include "llvm/MC/TargetRegistry.h" 73 #include "llvm/Support/Debug.h" 74 75 using namespace llvm; 76 77 #define DEBUG_TYPE "riscv-make-compressible" 78 #define RISCV_COMPRESS_INSTRS_NAME "RISC-V Make Compressible" 79 80 namespace { 81 82 struct RISCVMakeCompressibleOpt : public MachineFunctionPass { 83 static char ID; 84 85 bool runOnMachineFunction(MachineFunction &Fn) override; 86 87 RISCVMakeCompressibleOpt() : MachineFunctionPass(ID) {} 88 89 StringRef getPassName() const override { return RISCV_COMPRESS_INSTRS_NAME; } 90 }; 91 } // namespace 92 93 char RISCVMakeCompressibleOpt::ID = 0; 94 INITIALIZE_PASS(RISCVMakeCompressibleOpt, "riscv-make-compressible", 95 RISCV_COMPRESS_INSTRS_NAME, false, false) 96 97 // Return log2(widthInBytes) of load/store done by Opcode. 98 static unsigned log2LdstWidth(unsigned Opcode) { 99 switch (Opcode) { 100 default: 101 llvm_unreachable("Unexpected opcode"); 102 case RISCV::LBU: 103 case RISCV::SB: 104 return 0; 105 case RISCV::LH: 106 case RISCV::LH_INX: 107 case RISCV::LHU: 108 case RISCV::SH: 109 case RISCV::SH_INX: 110 return 1; 111 case RISCV::LW: 112 case RISCV::LW_INX: 113 case RISCV::SW: 114 case RISCV::SW_INX: 115 case RISCV::FLW: 116 case RISCV::FSW: 117 return 2; 118 case RISCV::LD: 119 case RISCV::SD: 120 case RISCV::FLD: 121 case RISCV::FSD: 122 return 3; 123 } 124 } 125 126 // Return bit field size of immediate operand of Opcode. 127 static unsigned offsetMask(unsigned Opcode) { 128 switch (Opcode) { 129 default: 130 llvm_unreachable("Unexpected opcode"); 131 case RISCV::LBU: 132 case RISCV::SB: 133 return maskTrailingOnes<unsigned>(2U); 134 case RISCV::LH: 135 case RISCV::LH_INX: 136 case RISCV::LHU: 137 case RISCV::SH: 138 case RISCV::SH_INX: 139 return maskTrailingOnes<unsigned>(1U); 140 case RISCV::LW: 141 case RISCV::LW_INX: 142 case RISCV::SW: 143 case RISCV::SW_INX: 144 case RISCV::FLW: 145 case RISCV::FSW: 146 case RISCV::LD: 147 case RISCV::SD: 148 case RISCV::FLD: 149 case RISCV::FSD: 150 return maskTrailingOnes<unsigned>(5U); 151 } 152 } 153 154 // Return a mask for the offset bits of a non-stack-pointer based compressed 155 // load/store. 156 static uint8_t compressedLDSTOffsetMask(unsigned Opcode) { 157 return offsetMask(Opcode) << log2LdstWidth(Opcode); 158 } 159 160 // Return true if Offset fits within a compressed stack-pointer based 161 // load/store. 162 static bool compressibleSPOffset(int64_t Offset, unsigned Opcode) { 163 // Compressed sp-based loads and stores only work for 32/64 bits. 164 switch (log2LdstWidth(Opcode)) { 165 case 2: 166 return isShiftedUInt<6, 2>(Offset); 167 case 3: 168 return isShiftedUInt<6, 3>(Offset); 169 } 170 return false; 171 } 172 173 // Given an offset for a load/store, return the adjustment required to the base 174 // register such that the address can be accessed with a compressible offset. 175 // This will return 0 if the offset is already compressible. 176 static int64_t getBaseAdjustForCompression(int64_t Offset, unsigned Opcode) { 177 // Return the excess bits that do not fit in a compressible offset. 178 return Offset & ~compressedLDSTOffsetMask(Opcode); 179 } 180 181 // Return true if Reg is in a compressed register class. 182 static bool isCompressedReg(Register Reg) { 183 return RISCV::GPRCRegClass.contains(Reg) || 184 RISCV::GPRF16CRegClass.contains(Reg) || 185 RISCV::GPRF32CRegClass.contains(Reg) || 186 RISCV::FPR32CRegClass.contains(Reg) || 187 RISCV::FPR64CRegClass.contains(Reg); 188 } 189 190 // Return true if MI is a load for which there exists a compressed version. 191 static bool isCompressibleLoad(const MachineInstr &MI) { 192 const RISCVSubtarget &STI = MI.getMF()->getSubtarget<RISCVSubtarget>(); 193 194 switch (MI.getOpcode()) { 195 default: 196 return false; 197 case RISCV::LBU: 198 case RISCV::LH: 199 case RISCV::LH_INX: 200 case RISCV::LHU: 201 return STI.hasStdExtZcb(); 202 case RISCV::LW: 203 case RISCV::LW_INX: 204 case RISCV::LD: 205 return STI.hasStdExtCOrZca(); 206 case RISCV::FLW: 207 return !STI.is64Bit() && STI.hasStdExtCOrZcfOrZce(); 208 case RISCV::FLD: 209 return STI.hasStdExtCOrZcd(); 210 } 211 } 212 213 // Return true if MI is a store for which there exists a compressed version. 214 static bool isCompressibleStore(const MachineInstr &MI) { 215 const RISCVSubtarget &STI = MI.getMF()->getSubtarget<RISCVSubtarget>(); 216 217 switch (MI.getOpcode()) { 218 default: 219 return false; 220 case RISCV::SB: 221 case RISCV::SH: 222 case RISCV::SH_INX: 223 return STI.hasStdExtZcb(); 224 case RISCV::SW: 225 case RISCV::SW_INX: 226 case RISCV::SD: 227 return STI.hasStdExtCOrZca(); 228 case RISCV::FSW: 229 return !STI.is64Bit() && STI.hasStdExtCOrZcfOrZce(); 230 case RISCV::FSD: 231 return STI.hasStdExtCOrZcd(); 232 } 233 } 234 235 // Find a single register and/or large offset which, if compressible, would 236 // allow the given instruction to be compressed. 237 // 238 // Possible return values: 239 // 240 // {Reg, 0} - Uncompressed Reg needs replacing with a compressed 241 // register. 242 // {Reg, N} - Reg needs replacing with a compressed register and 243 // N needs adding to the new register. (Reg may be 244 // compressed or uncompressed). 245 // {RISCV::NoRegister, 0} - No suitable optimization found for this 246 // instruction. 247 static RegImmPair getRegImmPairPreventingCompression(const MachineInstr &MI) { 248 const unsigned Opcode = MI.getOpcode(); 249 250 if (isCompressibleLoad(MI) || isCompressibleStore(MI)) { 251 const MachineOperand &MOImm = MI.getOperand(2); 252 if (!MOImm.isImm()) 253 return RegImmPair(RISCV::NoRegister, 0); 254 255 int64_t Offset = MOImm.getImm(); 256 int64_t NewBaseAdjust = getBaseAdjustForCompression(Offset, Opcode); 257 Register Base = MI.getOperand(1).getReg(); 258 259 // Memory accesses via the stack pointer do not have a requirement for 260 // either of the registers to be compressible and can take a larger offset. 261 if (RISCV::SPRegClass.contains(Base)) { 262 if (!compressibleSPOffset(Offset, Opcode) && NewBaseAdjust) 263 return RegImmPair(Base, NewBaseAdjust); 264 } else { 265 Register SrcDest = MI.getOperand(0).getReg(); 266 bool SrcDestCompressed = isCompressedReg(SrcDest); 267 bool BaseCompressed = isCompressedReg(Base); 268 269 // If only Base and/or offset prevent compression, then return Base and 270 // any adjustment required to make the offset compressible. 271 if ((!BaseCompressed || NewBaseAdjust) && SrcDestCompressed) 272 return RegImmPair(Base, NewBaseAdjust); 273 274 // For loads, we can only change the base register since dest is defined 275 // rather than used. 276 // 277 // For stores, we can change SrcDest (and Base if SrcDest == Base) but 278 // cannot resolve an uncompressible offset in this case. 279 if (isCompressibleStore(MI)) { 280 if (!SrcDestCompressed && (BaseCompressed || SrcDest == Base) && 281 !NewBaseAdjust) 282 return RegImmPair(SrcDest, NewBaseAdjust); 283 } 284 } 285 } 286 return RegImmPair(RISCV::NoRegister, 0); 287 } 288 289 // Check all uses after FirstMI of the given register, keeping a vector of 290 // instructions that would be compressible if the given register (and offset if 291 // applicable) were compressible. 292 // 293 // If there are enough uses for this optimization to improve code size and a 294 // compressed register is available, return that compressed register. 295 static Register analyzeCompressibleUses(MachineInstr &FirstMI, 296 RegImmPair RegImm, 297 SmallVectorImpl<MachineInstr *> &MIs) { 298 MachineBasicBlock &MBB = *FirstMI.getParent(); 299 const TargetRegisterInfo *TRI = 300 MBB.getParent()->getSubtarget().getRegisterInfo(); 301 302 for (MachineBasicBlock::instr_iterator I = FirstMI.getIterator(), 303 E = MBB.instr_end(); 304 I != E; ++I) { 305 MachineInstr &MI = *I; 306 307 // Determine if this is an instruction which would benefit from using the 308 // new register. 309 RegImmPair CandidateRegImm = getRegImmPairPreventingCompression(MI); 310 if (CandidateRegImm.Reg == RegImm.Reg && CandidateRegImm.Imm == RegImm.Imm) 311 MIs.push_back(&MI); 312 313 // If RegImm.Reg is modified by this instruction, then we cannot optimize 314 // past this instruction. If the register is already compressed, then it may 315 // possible to optimize a large offset in the current instruction - this 316 // will have been detected by the preceeding call to 317 // getRegImmPairPreventingCompression. 318 if (MI.modifiesRegister(RegImm.Reg, TRI)) 319 break; 320 } 321 322 // Adjusting the base costs one new uncompressed addi and therefore three uses 323 // are required for a code size reduction. If no base adjustment is required, 324 // then copying the register costs one new c.mv (or c.li Rd, 0 for "copying" 325 // the zero register) and therefore two uses are required for a code size 326 // reduction. 327 if (MIs.size() < 2 || (RegImm.Imm != 0 && MIs.size() < 3)) 328 return RISCV::NoRegister; 329 330 // Find a compressible register which will be available from the first 331 // instruction we care about to the last. 332 const TargetRegisterClass *RCToScavenge; 333 334 // Work out the compressed register class from which to scavenge. 335 if (RISCV::GPRRegClass.contains(RegImm.Reg)) 336 RCToScavenge = &RISCV::GPRCRegClass; 337 else if (RISCV::GPRF16RegClass.contains(RegImm.Reg)) 338 RCToScavenge = &RISCV::GPRF16CRegClass; 339 else if (RISCV::GPRF32RegClass.contains(RegImm.Reg)) 340 RCToScavenge = &RISCV::GPRF32CRegClass; 341 else if (RISCV::FPR32RegClass.contains(RegImm.Reg)) 342 RCToScavenge = &RISCV::FPR32CRegClass; 343 else if (RISCV::FPR64RegClass.contains(RegImm.Reg)) 344 RCToScavenge = &RISCV::FPR64CRegClass; 345 else 346 return RISCV::NoRegister; 347 348 RegScavenger RS; 349 RS.enterBasicBlockEnd(MBB); 350 RS.backward(std::next(MIs.back()->getIterator())); 351 return RS.scavengeRegisterBackwards(*RCToScavenge, FirstMI.getIterator(), 352 /*RestoreAfter=*/false, /*SPAdj=*/0, 353 /*AllowSpill=*/false); 354 } 355 356 // Update uses of the old register in the given instruction to the new register. 357 static void updateOperands(MachineInstr &MI, RegImmPair OldRegImm, 358 Register NewReg) { 359 unsigned Opcode = MI.getOpcode(); 360 361 // If this pass is extended to support more instructions, the check for 362 // definedness may need to be strengthened. 363 assert((isCompressibleLoad(MI) || isCompressibleStore(MI)) && 364 "Unsupported instruction for this optimization."); 365 366 int SkipN = 0; 367 368 // Skip the first (value) operand to a store instruction (except if the store 369 // offset is zero) in order to avoid an incorrect transformation. 370 // e.g. sd a0, 808(a0) to addi a2, a0, 768; sd a2, 40(a2) 371 if (isCompressibleStore(MI) && OldRegImm.Imm != 0) 372 SkipN = 1; 373 374 // Update registers 375 for (MachineOperand &MO : drop_begin(MI.operands(), SkipN)) 376 if (MO.isReg() && MO.getReg() == OldRegImm.Reg) { 377 // Do not update operands that define the old register. 378 // 379 // The new register was scavenged for the range of instructions that are 380 // being updated, therefore it should not be defined within this range 381 // except possibly in the final instruction. 382 if (MO.isDef()) { 383 assert(isCompressibleLoad(MI)); 384 continue; 385 } 386 // Update reg 387 MO.setReg(NewReg); 388 } 389 390 // Update offset 391 MachineOperand &MOImm = MI.getOperand(2); 392 int64_t NewOffset = MOImm.getImm() & compressedLDSTOffsetMask(Opcode); 393 MOImm.setImm(NewOffset); 394 } 395 396 bool RISCVMakeCompressibleOpt::runOnMachineFunction(MachineFunction &Fn) { 397 // This is a size optimization. 398 if (skipFunction(Fn.getFunction()) || !Fn.getFunction().hasMinSize()) 399 return false; 400 401 const RISCVSubtarget &STI = Fn.getSubtarget<RISCVSubtarget>(); 402 const RISCVInstrInfo &TII = *STI.getInstrInfo(); 403 404 // This optimization only makes sense if compressed instructions are emitted. 405 if (!STI.hasStdExtCOrZca()) 406 return false; 407 408 for (MachineBasicBlock &MBB : Fn) { 409 LLVM_DEBUG(dbgs() << "MBB: " << MBB.getName() << "\n"); 410 for (MachineInstr &MI : MBB) { 411 // Determine if this instruction would otherwise be compressed if not for 412 // an uncompressible register or offset. 413 RegImmPair RegImm = getRegImmPairPreventingCompression(MI); 414 if (!RegImm.Reg && RegImm.Imm == 0) 415 continue; 416 417 // Determine if there is a set of instructions for which replacing this 418 // register with a compressed register (and compressible offset if 419 // applicable) is possible and will allow compression. 420 SmallVector<MachineInstr *, 8> MIs; 421 Register NewReg = analyzeCompressibleUses(MI, RegImm, MIs); 422 if (!NewReg) 423 continue; 424 425 // Create the appropriate copy and/or offset. 426 if (RISCV::GPRRegClass.contains(RegImm.Reg)) { 427 assert(isInt<12>(RegImm.Imm)); 428 BuildMI(MBB, MI, MI.getDebugLoc(), TII.get(RISCV::ADDI), NewReg) 429 .addReg(RegImm.Reg) 430 .addImm(RegImm.Imm); 431 } else if (RISCV::GPRF16RegClass.contains(RegImm.Reg)) { 432 assert(RegImm.Imm == 0); 433 BuildMI(MBB, MI, MI.getDebugLoc(), TII.get(RISCV::PseudoMV_FPR16INX), 434 NewReg) 435 .addReg(RegImm.Reg); 436 } else if (RISCV::GPRF32RegClass.contains(RegImm.Reg)) { 437 assert(RegImm.Imm == 0); 438 BuildMI(MBB, MI, MI.getDebugLoc(), TII.get(RISCV::PseudoMV_FPR32INX), 439 NewReg) 440 .addReg(RegImm.Reg); 441 } else { 442 // If we are looking at replacing an FPR register we don't expect to 443 // have any offset. The only compressible FP instructions with an offset 444 // are loads and stores, for which the offset applies to the GPR operand 445 // not the FPR operand. 446 assert(RegImm.Imm == 0); 447 unsigned Opcode = RISCV::FPR32RegClass.contains(RegImm.Reg) 448 ? RISCV::FSGNJ_S 449 : RISCV::FSGNJ_D; 450 BuildMI(MBB, MI, MI.getDebugLoc(), TII.get(Opcode), NewReg) 451 .addReg(RegImm.Reg) 452 .addReg(RegImm.Reg); 453 } 454 455 // Update the set of instructions to use the compressed register and 456 // compressible offset instead. These instructions should now be 457 // compressible. 458 // TODO: Update all uses if RegImm.Imm == 0? Not just those that are 459 // expected to become compressible. 460 for (MachineInstr *UpdateMI : MIs) 461 updateOperands(*UpdateMI, RegImm, NewReg); 462 } 463 } 464 return true; 465 } 466 467 /// Returns an instance of the Make Compressible Optimization pass. 468 FunctionPass *llvm::createRISCVMakeCompressibleOptPass() { 469 return new RISCVMakeCompressibleOpt(); 470 } 471