1e8d8bef9SDimitry Andric //===- RISCVMatInt.cpp - Immediate materialisation -------------*- C++ -*--===// 2e8d8bef9SDimitry Andric // 3e8d8bef9SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4e8d8bef9SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5e8d8bef9SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6e8d8bef9SDimitry Andric // 7e8d8bef9SDimitry Andric //===----------------------------------------------------------------------===// 8e8d8bef9SDimitry Andric 9e8d8bef9SDimitry Andric #include "RISCVMatInt.h" 10e8d8bef9SDimitry Andric #include "MCTargetDesc/RISCVMCTargetDesc.h" 11e8d8bef9SDimitry Andric #include "llvm/ADT/APInt.h" 12e8d8bef9SDimitry Andric #include "llvm/Support/MathExtras.h" 13*fe6060f1SDimitry Andric using namespace llvm; 14e8d8bef9SDimitry Andric 15*fe6060f1SDimitry Andric static int getInstSeqCost(RISCVMatInt::InstSeq &Res, bool HasRVC) { 16*fe6060f1SDimitry Andric if (!HasRVC) 17*fe6060f1SDimitry Andric return Res.size(); 18e8d8bef9SDimitry Andric 19*fe6060f1SDimitry Andric int Cost = 0; 20*fe6060f1SDimitry Andric for (auto Instr : Res) { 21*fe6060f1SDimitry Andric bool Compressed; 22*fe6060f1SDimitry Andric switch (Instr.Opc) { 23*fe6060f1SDimitry Andric default: llvm_unreachable("Unexpected opcode"); 24*fe6060f1SDimitry Andric case RISCV::SLLI: 25*fe6060f1SDimitry Andric case RISCV::SRLI: 26*fe6060f1SDimitry Andric Compressed = true; 27*fe6060f1SDimitry Andric break; 28*fe6060f1SDimitry Andric case RISCV::ADDI: 29*fe6060f1SDimitry Andric case RISCV::ADDIW: 30*fe6060f1SDimitry Andric case RISCV::LUI: 31*fe6060f1SDimitry Andric Compressed = isInt<6>(Instr.Imm); 32*fe6060f1SDimitry Andric break; 33*fe6060f1SDimitry Andric case RISCV::ADDUW: 34*fe6060f1SDimitry Andric Compressed = false; 35*fe6060f1SDimitry Andric break; 36*fe6060f1SDimitry Andric } 37*fe6060f1SDimitry Andric // Two RVC instructions take the same space as one RVI instruction, but 38*fe6060f1SDimitry Andric // can take longer to execute than the single RVI instruction. Thus, we 39*fe6060f1SDimitry Andric // consider that two RVC instruction are slightly more costly than one 40*fe6060f1SDimitry Andric // RVI instruction. For longer sequences of RVC instructions the space 41*fe6060f1SDimitry Andric // savings can be worth it, though. The costs below try to model that. 42*fe6060f1SDimitry Andric if (!Compressed) 43*fe6060f1SDimitry Andric Cost += 100; // Baseline cost of one RVI instruction: 100%. 44*fe6060f1SDimitry Andric else 45*fe6060f1SDimitry Andric Cost += 70; // 70% cost of baseline. 46*fe6060f1SDimitry Andric } 47*fe6060f1SDimitry Andric return Cost; 48*fe6060f1SDimitry Andric } 49*fe6060f1SDimitry Andric 50*fe6060f1SDimitry Andric // Recursively generate a sequence for materializing an integer. 51*fe6060f1SDimitry Andric static void generateInstSeqImpl(int64_t Val, 52*fe6060f1SDimitry Andric const FeatureBitset &ActiveFeatures, 53*fe6060f1SDimitry Andric RISCVMatInt::InstSeq &Res) { 54*fe6060f1SDimitry Andric bool IsRV64 = ActiveFeatures[RISCV::Feature64Bit]; 55*fe6060f1SDimitry Andric 56e8d8bef9SDimitry Andric if (isInt<32>(Val)) { 57e8d8bef9SDimitry Andric // Depending on the active bits in the immediate Value v, the following 58e8d8bef9SDimitry Andric // instruction sequences are emitted: 59e8d8bef9SDimitry Andric // 60e8d8bef9SDimitry Andric // v == 0 : ADDI 61e8d8bef9SDimitry Andric // v[0,12) != 0 && v[12,32) == 0 : ADDI 62e8d8bef9SDimitry Andric // v[0,12) == 0 && v[12,32) != 0 : LUI 63e8d8bef9SDimitry Andric // v[0,32) != 0 : LUI+ADDI(W) 64e8d8bef9SDimitry Andric int64_t Hi20 = ((Val + 0x800) >> 12) & 0xFFFFF; 65e8d8bef9SDimitry Andric int64_t Lo12 = SignExtend64<12>(Val); 66e8d8bef9SDimitry Andric 67e8d8bef9SDimitry Andric if (Hi20) 68*fe6060f1SDimitry Andric Res.push_back(RISCVMatInt::Inst(RISCV::LUI, Hi20)); 69e8d8bef9SDimitry Andric 70e8d8bef9SDimitry Andric if (Lo12 || Hi20 == 0) { 71e8d8bef9SDimitry Andric unsigned AddiOpc = (IsRV64 && Hi20) ? RISCV::ADDIW : RISCV::ADDI; 72*fe6060f1SDimitry Andric Res.push_back(RISCVMatInt::Inst(AddiOpc, Lo12)); 73e8d8bef9SDimitry Andric } 74e8d8bef9SDimitry Andric return; 75e8d8bef9SDimitry Andric } 76e8d8bef9SDimitry Andric 77e8d8bef9SDimitry Andric assert(IsRV64 && "Can't emit >32-bit imm for non-RV64 target"); 78e8d8bef9SDimitry Andric 79e8d8bef9SDimitry Andric // In the worst case, for a full 64-bit constant, a sequence of 8 instructions 80e8d8bef9SDimitry Andric // (i.e., LUI+ADDIW+SLLI+ADDI+SLLI+ADDI+SLLI+ADDI) has to be emmitted. Note 81e8d8bef9SDimitry Andric // that the first two instructions (LUI+ADDIW) can contribute up to 32 bits 82e8d8bef9SDimitry Andric // while the following ADDI instructions contribute up to 12 bits each. 83e8d8bef9SDimitry Andric // 84e8d8bef9SDimitry Andric // On the first glance, implementing this seems to be possible by simply 85e8d8bef9SDimitry Andric // emitting the most significant 32 bits (LUI+ADDIW) followed by as many left 86e8d8bef9SDimitry Andric // shift (SLLI) and immediate additions (ADDI) as needed. However, due to the 87e8d8bef9SDimitry Andric // fact that ADDI performs a sign extended addition, doing it like that would 88e8d8bef9SDimitry Andric // only be possible when at most 11 bits of the ADDI instructions are used. 89e8d8bef9SDimitry Andric // Using all 12 bits of the ADDI instructions, like done by GAS, actually 90e8d8bef9SDimitry Andric // requires that the constant is processed starting with the least significant 91e8d8bef9SDimitry Andric // bit. 92e8d8bef9SDimitry Andric // 93e8d8bef9SDimitry Andric // In the following, constants are processed from LSB to MSB but instruction 94e8d8bef9SDimitry Andric // emission is performed from MSB to LSB by recursively calling 95e8d8bef9SDimitry Andric // generateInstSeq. In each recursion, first the lowest 12 bits are removed 96e8d8bef9SDimitry Andric // from the constant and the optimal shift amount, which can be greater than 97e8d8bef9SDimitry Andric // 12 bits if the constant is sparse, is determined. Then, the shifted 98e8d8bef9SDimitry Andric // remaining constant is processed recursively and gets emitted as soon as it 99e8d8bef9SDimitry Andric // fits into 32 bits. The emission of the shifts and additions is subsequently 100e8d8bef9SDimitry Andric // performed when the recursion returns. 101e8d8bef9SDimitry Andric 102e8d8bef9SDimitry Andric int64_t Lo12 = SignExtend64<12>(Val); 103e8d8bef9SDimitry Andric int64_t Hi52 = ((uint64_t)Val + 0x800ull) >> 12; 104e8d8bef9SDimitry Andric int ShiftAmount = 12 + findFirstSet((uint64_t)Hi52); 105e8d8bef9SDimitry Andric Hi52 = SignExtend64(Hi52 >> (ShiftAmount - 12), 64 - ShiftAmount); 106e8d8bef9SDimitry Andric 107*fe6060f1SDimitry Andric // If the remaining bits don't fit in 12 bits, we might be able to reduce the 108*fe6060f1SDimitry Andric // shift amount in order to use LUI which will zero the lower 12 bits. 109*fe6060f1SDimitry Andric if (ShiftAmount > 12 && !isInt<12>(Hi52) && isInt<32>((uint64_t)Hi52 << 12)) { 110*fe6060f1SDimitry Andric // Reduce the shift amount and add zeros to the LSBs so it will match LUI. 111*fe6060f1SDimitry Andric ShiftAmount -= 12; 112*fe6060f1SDimitry Andric Hi52 = (uint64_t)Hi52 << 12; 113e8d8bef9SDimitry Andric } 114e8d8bef9SDimitry Andric 115*fe6060f1SDimitry Andric generateInstSeqImpl(Hi52, ActiveFeatures, Res); 116*fe6060f1SDimitry Andric 117*fe6060f1SDimitry Andric Res.push_back(RISCVMatInt::Inst(RISCV::SLLI, ShiftAmount)); 118*fe6060f1SDimitry Andric if (Lo12) 119*fe6060f1SDimitry Andric Res.push_back(RISCVMatInt::Inst(RISCV::ADDI, Lo12)); 120*fe6060f1SDimitry Andric } 121*fe6060f1SDimitry Andric 122*fe6060f1SDimitry Andric namespace llvm { 123*fe6060f1SDimitry Andric namespace RISCVMatInt { 124*fe6060f1SDimitry Andric InstSeq generateInstSeq(int64_t Val, const FeatureBitset &ActiveFeatures) { 125*fe6060f1SDimitry Andric RISCVMatInt::InstSeq Res; 126*fe6060f1SDimitry Andric generateInstSeqImpl(Val, ActiveFeatures, Res); 127*fe6060f1SDimitry Andric 128*fe6060f1SDimitry Andric // If the constant is positive we might be able to generate a shifted constant 129*fe6060f1SDimitry Andric // with no leading zeros and use a final SRLI to restore them. 130*fe6060f1SDimitry Andric if (Val > 0 && Res.size() > 2) { 131*fe6060f1SDimitry Andric assert(ActiveFeatures[RISCV::Feature64Bit] && 132*fe6060f1SDimitry Andric "Expected RV32 to only need 2 instructions"); 133*fe6060f1SDimitry Andric unsigned LeadingZeros = countLeadingZeros((uint64_t)Val); 134*fe6060f1SDimitry Andric uint64_t ShiftedVal = (uint64_t)Val << LeadingZeros; 135*fe6060f1SDimitry Andric // Fill in the bits that will be shifted out with 1s. An example where this 136*fe6060f1SDimitry Andric // helps is trailing one masks with 32 or more ones. This will generate 137*fe6060f1SDimitry Andric // ADDI -1 and an SRLI. 138*fe6060f1SDimitry Andric ShiftedVal |= maskTrailingOnes<uint64_t>(LeadingZeros); 139*fe6060f1SDimitry Andric 140*fe6060f1SDimitry Andric RISCVMatInt::InstSeq TmpSeq; 141*fe6060f1SDimitry Andric generateInstSeqImpl(ShiftedVal, ActiveFeatures, TmpSeq); 142*fe6060f1SDimitry Andric TmpSeq.push_back(RISCVMatInt::Inst(RISCV::SRLI, LeadingZeros)); 143*fe6060f1SDimitry Andric 144*fe6060f1SDimitry Andric // Keep the new sequence if it is an improvement. 145*fe6060f1SDimitry Andric if (TmpSeq.size() < Res.size()) { 146*fe6060f1SDimitry Andric Res = TmpSeq; 147*fe6060f1SDimitry Andric // A 2 instruction sequence is the best we can do. 148*fe6060f1SDimitry Andric if (Res.size() <= 2) 149*fe6060f1SDimitry Andric return Res; 150*fe6060f1SDimitry Andric } 151*fe6060f1SDimitry Andric 152*fe6060f1SDimitry Andric // Some cases can benefit from filling the lower bits with zeros instead. 153*fe6060f1SDimitry Andric ShiftedVal &= maskTrailingZeros<uint64_t>(LeadingZeros); 154*fe6060f1SDimitry Andric TmpSeq.clear(); 155*fe6060f1SDimitry Andric generateInstSeqImpl(ShiftedVal, ActiveFeatures, TmpSeq); 156*fe6060f1SDimitry Andric TmpSeq.push_back(RISCVMatInt::Inst(RISCV::SRLI, LeadingZeros)); 157*fe6060f1SDimitry Andric 158*fe6060f1SDimitry Andric // Keep the new sequence if it is an improvement. 159*fe6060f1SDimitry Andric if (TmpSeq.size() < Res.size()) { 160*fe6060f1SDimitry Andric Res = TmpSeq; 161*fe6060f1SDimitry Andric // A 2 instruction sequence is the best we can do. 162*fe6060f1SDimitry Andric if (Res.size() <= 2) 163*fe6060f1SDimitry Andric return Res; 164*fe6060f1SDimitry Andric } 165*fe6060f1SDimitry Andric 166*fe6060f1SDimitry Andric // If we have exactly 32 leading zeros and Zba, we can try using zext.w at 167*fe6060f1SDimitry Andric // the end of the sequence. 168*fe6060f1SDimitry Andric if (LeadingZeros == 32 && ActiveFeatures[RISCV::FeatureExtZba]) { 169*fe6060f1SDimitry Andric // Try replacing upper bits with 1. 170*fe6060f1SDimitry Andric uint64_t LeadingOnesVal = Val | maskLeadingOnes<uint64_t>(LeadingZeros); 171*fe6060f1SDimitry Andric TmpSeq.clear(); 172*fe6060f1SDimitry Andric generateInstSeqImpl(LeadingOnesVal, ActiveFeatures, TmpSeq); 173*fe6060f1SDimitry Andric TmpSeq.push_back(RISCVMatInt::Inst(RISCV::ADDUW, 0)); 174*fe6060f1SDimitry Andric 175*fe6060f1SDimitry Andric // Keep the new sequence if it is an improvement. 176*fe6060f1SDimitry Andric if (TmpSeq.size() < Res.size()) { 177*fe6060f1SDimitry Andric Res = TmpSeq; 178*fe6060f1SDimitry Andric // A 2 instruction sequence is the best we can do. 179*fe6060f1SDimitry Andric if (Res.size() <= 2) 180*fe6060f1SDimitry Andric return Res; 181*fe6060f1SDimitry Andric } 182*fe6060f1SDimitry Andric } 183*fe6060f1SDimitry Andric } 184*fe6060f1SDimitry Andric 185*fe6060f1SDimitry Andric return Res; 186*fe6060f1SDimitry Andric } 187*fe6060f1SDimitry Andric 188*fe6060f1SDimitry Andric int getIntMatCost(const APInt &Val, unsigned Size, 189*fe6060f1SDimitry Andric const FeatureBitset &ActiveFeatures, 190*fe6060f1SDimitry Andric bool CompressionCost) { 191*fe6060f1SDimitry Andric bool IsRV64 = ActiveFeatures[RISCV::Feature64Bit]; 192*fe6060f1SDimitry Andric bool HasRVC = CompressionCost && ActiveFeatures[RISCV::FeatureStdExtC]; 193e8d8bef9SDimitry Andric int PlatRegSize = IsRV64 ? 64 : 32; 194e8d8bef9SDimitry Andric 195e8d8bef9SDimitry Andric // Split the constant into platform register sized chunks, and calculate cost 196e8d8bef9SDimitry Andric // of each chunk. 197e8d8bef9SDimitry Andric int Cost = 0; 198e8d8bef9SDimitry Andric for (unsigned ShiftVal = 0; ShiftVal < Size; ShiftVal += PlatRegSize) { 199e8d8bef9SDimitry Andric APInt Chunk = Val.ashr(ShiftVal).sextOrTrunc(PlatRegSize); 200*fe6060f1SDimitry Andric InstSeq MatSeq = generateInstSeq(Chunk.getSExtValue(), ActiveFeatures); 201*fe6060f1SDimitry Andric Cost += getInstSeqCost(MatSeq, HasRVC); 202e8d8bef9SDimitry Andric } 203e8d8bef9SDimitry Andric return std::max(1, Cost); 204e8d8bef9SDimitry Andric } 205e8d8bef9SDimitry Andric } // namespace RISCVMatInt 206e8d8bef9SDimitry Andric } // namespace llvm 207