1387d3c24SCraig Topper //===- RISCVMatInt.cpp - Immediate materialisation -------------*- C++ -*--===// 2387d3c24SCraig Topper // 3387d3c24SCraig Topper // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4387d3c24SCraig Topper // See https://llvm.org/LICENSE.txt for license information. 5387d3c24SCraig Topper // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6387d3c24SCraig Topper // 7387d3c24SCraig Topper //===----------------------------------------------------------------------===// 8387d3c24SCraig Topper 9387d3c24SCraig Topper #include "RISCVMatInt.h" 10387d3c24SCraig Topper #include "MCTargetDesc/RISCVMCTargetDesc.h" 11387d3c24SCraig Topper #include "llvm/ADT/APInt.h" 12d2f8ba7dSSacha Coppey #include "llvm/MC/MCInstBuilder.h" 13387d3c24SCraig Topper #include "llvm/Support/MathExtras.h" 14d61b40edSCraig Topper using namespace llvm; 15387d3c24SCraig Topper 1681efb825SCraig Topper static int getInstSeqCost(RISCVMatInt::InstSeq &Res, bool HasRVC) { 1781efb825SCraig Topper if (!HasRVC) 1881efb825SCraig Topper return Res.size(); 1981efb825SCraig Topper 2081efb825SCraig Topper int Cost = 0; 2181efb825SCraig Topper for (auto Instr : Res) { 22186d5c8aSCraig Topper // Assume instructions that aren't listed aren't compressible. 23186d5c8aSCraig Topper bool Compressed = false; 24f2ffdbebSCraig Topper switch (Instr.getOpcode()) { 2581efb825SCraig Topper case RISCV::SLLI: 2681efb825SCraig Topper case RISCV::SRLI: 2781efb825SCraig Topper Compressed = true; 2881efb825SCraig Topper break; 2981efb825SCraig Topper case RISCV::ADDI: 3081efb825SCraig Topper case RISCV::ADDIW: 3181efb825SCraig Topper case RISCV::LUI: 32f2ffdbebSCraig Topper Compressed = isInt<6>(Instr.getImm()); 3381efb825SCraig Topper break; 3481efb825SCraig Topper } 3581efb825SCraig Topper // Two RVC instructions take the same space as one RVI instruction, but 3681efb825SCraig Topper // can take longer to execute than the single RVI instruction. Thus, we 3781efb825SCraig Topper // consider that two RVC instruction are slightly more costly than one 3881efb825SCraig Topper // RVI instruction. For longer sequences of RVC instructions the space 3981efb825SCraig Topper // savings can be worth it, though. The costs below try to model that. 4081efb825SCraig Topper if (!Compressed) 4181efb825SCraig Topper Cost += 100; // Baseline cost of one RVI instruction: 100%. 4281efb825SCraig Topper else 4381efb825SCraig Topper Cost += 70; // 70% cost of baseline. 4481efb825SCraig Topper } 4581efb825SCraig Topper return Cost; 4681efb825SCraig Topper } 4781efb825SCraig Topper 48d61b40edSCraig Topper // Recursively generate a sequence for materializing an integer. 49e179b125SWang Pengcheng static void generateInstSeqImpl(int64_t Val, const MCSubtargetInfo &STI, 502c52d516SCraig Topper RISCVMatInt::InstSeq &Res) { 51e179b125SWang Pengcheng bool IsRV64 = STI.hasFeature(RISCV::Feature64Bit); 524dbb7880SCraig Topper 53564e09c7SCraig Topper // Use BSETI for a single bit that can't be expressed by a single LUI or ADDI. 54e179b125SWang Pengcheng if (STI.hasFeature(RISCV::FeatureStdExtZbs) && isPowerOf2_64(Val) && 55564e09c7SCraig Topper (!isInt<32>(Val) || Val == 0x800)) { 56564e09c7SCraig Topper Res.emplace_back(RISCV::BSETI, Log2_64(Val)); 57564e09c7SCraig Topper return; 58564e09c7SCraig Topper } 59564e09c7SCraig Topper 60387d3c24SCraig Topper if (isInt<32>(Val)) { 61387d3c24SCraig Topper // Depending on the active bits in the immediate Value v, the following 62387d3c24SCraig Topper // instruction sequences are emitted: 63387d3c24SCraig Topper // 64387d3c24SCraig Topper // v == 0 : ADDI 65387d3c24SCraig Topper // v[0,12) != 0 && v[12,32) == 0 : ADDI 66387d3c24SCraig Topper // v[0,12) == 0 && v[12,32) != 0 : LUI 67387d3c24SCraig Topper // v[0,32) != 0 : LUI+ADDI(W) 6803bc33c8SCraig Topper int64_t Hi20 = ((Val + 0x800) >> 12) & 0xFFFFF; 69387d3c24SCraig Topper int64_t Lo12 = SignExtend64<12>(Val); 70387d3c24SCraig Topper 71387d3c24SCraig Topper if (Hi20) 72c8c1d7afSCraig Topper Res.emplace_back(RISCV::LUI, Hi20); 73387d3c24SCraig Topper 74387d3c24SCraig Topper if (Lo12 || Hi20 == 0) { 75387d3c24SCraig Topper unsigned AddiOpc = (IsRV64 && Hi20) ? RISCV::ADDIW : RISCV::ADDI; 76c8c1d7afSCraig Topper Res.emplace_back(AddiOpc, Lo12); 77387d3c24SCraig Topper } 782c52d516SCraig Topper return; 79387d3c24SCraig Topper } 80387d3c24SCraig Topper 81387d3c24SCraig Topper assert(IsRV64 && "Can't emit >32-bit imm for non-RV64 target"); 82387d3c24SCraig Topper 83387d3c24SCraig Topper // In the worst case, for a full 64-bit constant, a sequence of 8 instructions 841104e325SAlexander Pivovarov // (i.e., LUI+ADDIW+SLLI+ADDI+SLLI+ADDI+SLLI+ADDI) has to be emitted. Note 85387d3c24SCraig Topper // that the first two instructions (LUI+ADDIW) can contribute up to 32 bits 86387d3c24SCraig Topper // while the following ADDI instructions contribute up to 12 bits each. 87387d3c24SCraig Topper // 88387d3c24SCraig Topper // On the first glance, implementing this seems to be possible by simply 89387d3c24SCraig Topper // emitting the most significant 32 bits (LUI+ADDIW) followed by as many left 90387d3c24SCraig Topper // shift (SLLI) and immediate additions (ADDI) as needed. However, due to the 91387d3c24SCraig Topper // fact that ADDI performs a sign extended addition, doing it like that would 92387d3c24SCraig Topper // only be possible when at most 11 bits of the ADDI instructions are used. 93387d3c24SCraig Topper // Using all 12 bits of the ADDI instructions, like done by GAS, actually 94387d3c24SCraig Topper // requires that the constant is processed starting with the least significant 95387d3c24SCraig Topper // bit. 96387d3c24SCraig Topper // 97387d3c24SCraig Topper // In the following, constants are processed from LSB to MSB but instruction 98387d3c24SCraig Topper // emission is performed from MSB to LSB by recursively calling 99387d3c24SCraig Topper // generateInstSeq. In each recursion, first the lowest 12 bits are removed 100387d3c24SCraig Topper // from the constant and the optimal shift amount, which can be greater than 101387d3c24SCraig Topper // 12 bits if the constant is sparse, is determined. Then, the shifted 102387d3c24SCraig Topper // remaining constant is processed recursively and gets emitted as soon as it 103387d3c24SCraig Topper // fits into 32 bits. The emission of the shifts and additions is subsequently 104387d3c24SCraig Topper // performed when the recursion returns. 105387d3c24SCraig Topper 106387d3c24SCraig Topper int64_t Lo12 = SignExtend64<12>(Val); 1075c383731SCraig Topper Val = (uint64_t)Val - (uint64_t)Lo12; 1085c383731SCraig Topper 1095c383731SCraig Topper int ShiftAmount = 0; 1105c383731SCraig Topper bool Unsigned = false; 1115c383731SCraig Topper 1125c383731SCraig Topper // Val might now be valid for LUI without needing a shift. 1135c383731SCraig Topper if (!isInt<32>(Val)) { 114b3af04f8SKazu Hirata ShiftAmount = llvm::countr_zero((uint64_t)Val); 1155c383731SCraig Topper Val >>= ShiftAmount; 116387d3c24SCraig Topper 117f0e79db2SCraig Topper // If the remaining bits don't fit in 12 bits, we might be able to reduce 11891445532SEvgenii Stepanov // the // shift amount in order to use LUI which will zero the lower 12 11991445532SEvgenii Stepanov // bits. 1205c383731SCraig Topper if (ShiftAmount > 12 && !isInt<12>(Val)) { 12191445532SEvgenii Stepanov if (isInt<32>((uint64_t)Val << 12)) { 122f0e79db2SCraig Topper // Reduce the shift amount and add zeros to the LSBs so it will match 123f0e79db2SCraig Topper // LUI. 12481efb825SCraig Topper ShiftAmount -= 12; 12591445532SEvgenii Stepanov Val = (uint64_t)Val << 12; 12691445532SEvgenii Stepanov } else if (isUInt<32>((uint64_t)Val << 12) && 127e179b125SWang Pengcheng STI.hasFeature(RISCV::FeatureStdExtZba)) { 128481db13fSBen Shi // Reduce the shift amount and add zeros to the LSBs so it will match 129481db13fSBen Shi // LUI, then shift left with SLLI.UW to clear the upper 32 set bits. 130481db13fSBen Shi ShiftAmount -= 12; 13191445532SEvgenii Stepanov Val = ((uint64_t)Val << 12) | (0xffffffffull << 32); 132481db13fSBen Shi Unsigned = true; 133481db13fSBen Shi } 13481efb825SCraig Topper } 13581efb825SCraig Topper 1365c383731SCraig Topper // Try to use SLLI_UW for Val when it is uint32 but not int32. 13791445532SEvgenii Stepanov if (isUInt<32>((uint64_t)Val) && !isInt<32>((uint64_t)Val) && 138e179b125SWang Pengcheng STI.hasFeature(RISCV::FeatureStdExtZba)) { 13991445532SEvgenii Stepanov // Use LUI+ADDI or LUI to compose, then clear the upper 32 bits with 140588f121aSAlex Bradbury // SLLI_UW. 14191445532SEvgenii Stepanov Val = ((uint64_t)Val) | (0xffffffffull << 32); 14297e52e1cSBen Shi Unsigned = true; 14397e52e1cSBen Shi } 1445c383731SCraig Topper } 14597e52e1cSBen Shi 146e179b125SWang Pengcheng generateInstSeqImpl(Val, STI, Res); 147387d3c24SCraig Topper 1485c383731SCraig Topper // Skip shift if we were able to use LUI directly. 1495c383731SCraig Topper if (ShiftAmount) { 150c8c1d7afSCraig Topper unsigned Opc = Unsigned ? RISCV::SLLI_UW : RISCV::SLLI; 151c8c1d7afSCraig Topper Res.emplace_back(Opc, ShiftAmount); 1525c383731SCraig Topper } 1535c383731SCraig Topper 154387d3c24SCraig Topper if (Lo12) 155c8c1d7afSCraig Topper Res.emplace_back(RISCV::ADDI, Lo12); 156d61b40edSCraig Topper } 157d61b40edSCraig Topper 158af931a51SBaoshan Pang static unsigned extractRotateInfo(int64_t Val) { 159af931a51SBaoshan Pang // for case: 0b111..1..xxxxxx1..1.. 160e0782018SKazu Hirata unsigned LeadingOnes = llvm::countl_one((uint64_t)Val); 161e0782018SKazu Hirata unsigned TrailingOnes = llvm::countr_one((uint64_t)Val); 162af931a51SBaoshan Pang if (TrailingOnes > 0 && TrailingOnes < 64 && 163af931a51SBaoshan Pang (LeadingOnes + TrailingOnes) > (64 - 12)) 164af931a51SBaoshan Pang return 64 - TrailingOnes; 165af931a51SBaoshan Pang 166af931a51SBaoshan Pang // for case: 0bxxx1..1..1...xxx 167e0782018SKazu Hirata unsigned UpperTrailingOnes = llvm::countr_one(Hi_32(Val)); 168e0782018SKazu Hirata unsigned LowerLeadingOnes = llvm::countl_one(Lo_32(Val)); 169af931a51SBaoshan Pang if (UpperTrailingOnes < 32 && 170af931a51SBaoshan Pang (UpperTrailingOnes + LowerLeadingOnes) > (64 - 12)) 171af931a51SBaoshan Pang return 32 - UpperTrailingOnes; 172af931a51SBaoshan Pang 173af931a51SBaoshan Pang return 0; 174af931a51SBaoshan Pang } 175af931a51SBaoshan Pang 176e179b125SWang Pengcheng static void generateInstSeqLeadingZeros(int64_t Val, const MCSubtargetInfo &STI, 177cbd45961SCraig Topper RISCVMatInt::InstSeq &Res) { 178cbd45961SCraig Topper assert(Val > 0 && "Expected postive val"); 179cbd45961SCraig Topper 180cbd45961SCraig Topper unsigned LeadingZeros = llvm::countl_zero((uint64_t)Val); 181cbd45961SCraig Topper uint64_t ShiftedVal = (uint64_t)Val << LeadingZeros; 182cbd45961SCraig Topper // Fill in the bits that will be shifted out with 1s. An example where this 183cbd45961SCraig Topper // helps is trailing one masks with 32 or more ones. This will generate 184cbd45961SCraig Topper // ADDI -1 and an SRLI. 185cbd45961SCraig Topper ShiftedVal |= maskTrailingOnes<uint64_t>(LeadingZeros); 186cbd45961SCraig Topper 187cbd45961SCraig Topper RISCVMatInt::InstSeq TmpSeq; 188e179b125SWang Pengcheng generateInstSeqImpl(ShiftedVal, STI, TmpSeq); 189cbd45961SCraig Topper 190cbd45961SCraig Topper // Keep the new sequence if it is an improvement or the original is empty. 191cbd45961SCraig Topper if ((TmpSeq.size() + 1) < Res.size() || 192cbd45961SCraig Topper (Res.empty() && TmpSeq.size() < 8)) { 193cbd45961SCraig Topper TmpSeq.emplace_back(RISCV::SRLI, LeadingZeros); 194cbd45961SCraig Topper Res = TmpSeq; 195cbd45961SCraig Topper } 196cbd45961SCraig Topper 197cbd45961SCraig Topper // Some cases can benefit from filling the lower bits with zeros instead. 198cbd45961SCraig Topper ShiftedVal &= maskTrailingZeros<uint64_t>(LeadingZeros); 199cbd45961SCraig Topper TmpSeq.clear(); 200e179b125SWang Pengcheng generateInstSeqImpl(ShiftedVal, STI, TmpSeq); 201cbd45961SCraig Topper 202cbd45961SCraig Topper // Keep the new sequence if it is an improvement or the original is empty. 203cbd45961SCraig Topper if ((TmpSeq.size() + 1) < Res.size() || 204cbd45961SCraig Topper (Res.empty() && TmpSeq.size() < 8)) { 205cbd45961SCraig Topper TmpSeq.emplace_back(RISCV::SRLI, LeadingZeros); 206cbd45961SCraig Topper Res = TmpSeq; 207cbd45961SCraig Topper } 208cbd45961SCraig Topper 209cbd45961SCraig Topper // If we have exactly 32 leading zeros and Zba, we can try using zext.w at 210cbd45961SCraig Topper // the end of the sequence. 211e179b125SWang Pengcheng if (LeadingZeros == 32 && STI.hasFeature(RISCV::FeatureStdExtZba)) { 212cbd45961SCraig Topper // Try replacing upper bits with 1. 213cbd45961SCraig Topper uint64_t LeadingOnesVal = Val | maskLeadingOnes<uint64_t>(LeadingZeros); 214cbd45961SCraig Topper TmpSeq.clear(); 215e179b125SWang Pengcheng generateInstSeqImpl(LeadingOnesVal, STI, TmpSeq); 216cbd45961SCraig Topper 217cbd45961SCraig Topper // Keep the new sequence if it is an improvement. 218cbd45961SCraig Topper if ((TmpSeq.size() + 1) < Res.size() || 219cbd45961SCraig Topper (Res.empty() && TmpSeq.size() < 8)) { 220cbd45961SCraig Topper TmpSeq.emplace_back(RISCV::ADD_UW, 0); 221cbd45961SCraig Topper Res = TmpSeq; 222cbd45961SCraig Topper } 223cbd45961SCraig Topper } 224cbd45961SCraig Topper } 225cbd45961SCraig Topper 2260fe5f03eSjacquesguan namespace llvm::RISCVMatInt { 227e179b125SWang Pengcheng InstSeq generateInstSeq(int64_t Val, const MCSubtargetInfo &STI) { 2282c52d516SCraig Topper RISCVMatInt::InstSeq Res; 229e179b125SWang Pengcheng generateInstSeqImpl(Val, STI, Res); 230d61b40edSCraig Topper 231ce66f4d0SCraig Topper // If the low 12 bits are non-zero, the first expansion may end with an ADDI 232ce66f4d0SCraig Topper // or ADDIW. If there are trailing zeros, try generating a sign extended 233ce66f4d0SCraig Topper // constant with no trailing zeros and use a final SLLI to restore them. 2341806ce90SCraig Topper if ((Val & 0xfff) != 0 && (Val & 1) == 0 && Res.size() >= 2) { 235e0782018SKazu Hirata unsigned TrailingZeros = llvm::countr_zero((uint64_t)Val); 23698b86689SCraig Topper int64_t ShiftedVal = Val >> TrailingZeros; 2371806ce90SCraig Topper // If we can use C.LI+C.SLLI instead of LUI+ADDI(W) prefer that since 2381806ce90SCraig Topper // its more compressible. But only if LUI+ADDI(W) isn't fusable. 2391806ce90SCraig Topper // NOTE: We don't check for C extension to minimize differences in generated 2401806ce90SCraig Topper // code. 2411806ce90SCraig Topper bool IsShiftedCompressible = 242e179b125SWang Pengcheng isInt<6>(ShiftedVal) && !STI.hasFeature(RISCV::TuneLUIADDIFusion); 2432c52d516SCraig Topper RISCVMatInt::InstSeq TmpSeq; 244e179b125SWang Pengcheng generateInstSeqImpl(ShiftedVal, STI, TmpSeq); 24598b86689SCraig Topper 24698b86689SCraig Topper // Keep the new sequence if it is an improvement. 247b41e0fb4SCraig Topper if ((TmpSeq.size() + 1) < Res.size() || IsShiftedCompressible) { 248b41e0fb4SCraig Topper TmpSeq.emplace_back(RISCV::SLLI, TrailingZeros); 24998b86689SCraig Topper Res = TmpSeq; 25098b86689SCraig Topper } 251b41e0fb4SCraig Topper } 25298b86689SCraig Topper 2534f5f38bdSCraig Topper // If we have a 1 or 2 instruction sequence this is the best we can do. This 2544f5f38bdSCraig Topper // will always be true for RV32 and will often be true for RV64. 2554f5f38bdSCraig Topper if (Res.size() <= 2) 2564f5f38bdSCraig Topper return Res; 2574f5f38bdSCraig Topper 258e179b125SWang Pengcheng assert(STI.hasFeature(RISCV::Feature64Bit) && 2594dbb7880SCraig Topper "Expected RV32 to only need 2 instructions"); 2604f5f38bdSCraig Topper 261c75e3ea4SCraig Topper // If the lower 13 bits are something like 0x17ff, try to add 1 to change the 262c75e3ea4SCraig Topper // lower 13 bits to 0x1800. We can restore this with an ADDI of -1 at the end 263c75e3ea4SCraig Topper // of the sequence. Call generateInstSeqImpl on the new constant which may 264c75e3ea4SCraig Topper // subtract 0xfffffffffffff800 to create another ADDI. This will leave a 265c75e3ea4SCraig Topper // constant with more than 12 trailing zeros for the next recursive step. 266c75e3ea4SCraig Topper if ((Val & 0xfff) != 0 && (Val & 0x1800) == 0x1000) { 267c75e3ea4SCraig Topper int64_t Imm12 = -(0x800 - (Val & 0xfff)); 268c75e3ea4SCraig Topper int64_t AdjustedVal = Val - Imm12; 269c75e3ea4SCraig Topper RISCVMatInt::InstSeq TmpSeq; 270e179b125SWang Pengcheng generateInstSeqImpl(AdjustedVal, STI, TmpSeq); 271c75e3ea4SCraig Topper 272c75e3ea4SCraig Topper // Keep the new sequence if it is an improvement. 273c75e3ea4SCraig Topper if ((TmpSeq.size() + 1) < Res.size()) { 274c75e3ea4SCraig Topper TmpSeq.emplace_back(RISCV::ADDI, Imm12); 275c75e3ea4SCraig Topper Res = TmpSeq; 276c75e3ea4SCraig Topper } 277c75e3ea4SCraig Topper } 278c75e3ea4SCraig Topper 2794f5f38bdSCraig Topper // If the constant is positive we might be able to generate a shifted constant 2804f5f38bdSCraig Topper // with no leading zeros and use a final SRLI to restore them. 281c75e3ea4SCraig Topper if (Val > 0 && Res.size() > 2) { 282e179b125SWang Pengcheng generateInstSeqLeadingZeros(Val, STI, Res); 283cbd45961SCraig Topper } 284ea064ba6SCraig Topper 285cbd45961SCraig Topper // If the constant is negative, trying inverting and using our trailing zero 286cbd45961SCraig Topper // optimizations. Use an xori to invert the final value. 287cbd45961SCraig Topper if (Val < 0 && Res.size() > 3) { 288cbd45961SCraig Topper uint64_t InvertedVal = ~(uint64_t)Val; 289ea064ba6SCraig Topper RISCVMatInt::InstSeq TmpSeq; 290e179b125SWang Pengcheng generateInstSeqLeadingZeros(InvertedVal, STI, TmpSeq); 291ea064ba6SCraig Topper 292cbd45961SCraig Topper // Keep it if we found a sequence that is smaller after inverting. 293cbd45961SCraig Topper if (!TmpSeq.empty() && (TmpSeq.size() + 1) < Res.size()) { 294cbd45961SCraig Topper TmpSeq.emplace_back(RISCV::XORI, -1); 295ea064ba6SCraig Topper Res = TmpSeq; 296a8b8e947SCraig Topper } 297b41e0fb4SCraig Topper } 298d61b40edSCraig Topper 299bb106125SCraig Topper // If the Low and High halves are the same, use pack. The pack instruction 300bb106125SCraig Topper // packs the XLEN/2-bit lower halves of rs1 and rs2 into rd, with rs1 in the 301bb106125SCraig Topper // lower half and rs2 in the upper half. 302e179b125SWang Pengcheng if (Res.size() > 2 && STI.hasFeature(RISCV::FeatureStdExtZbkb)) { 303bb106125SCraig Topper int64_t LoVal = SignExtend64<32>(Val); 304bb106125SCraig Topper int64_t HiVal = SignExtend64<32>(Val >> 32); 305bb106125SCraig Topper if (LoVal == HiVal) { 306bb106125SCraig Topper RISCVMatInt::InstSeq TmpSeq; 307e179b125SWang Pengcheng generateInstSeqImpl(LoVal, STI, TmpSeq); 308b41e0fb4SCraig Topper if ((TmpSeq.size() + 1) < Res.size()) { 309bb106125SCraig Topper TmpSeq.emplace_back(RISCV::PACK, 0); 310bb106125SCraig Topper Res = TmpSeq; 311bb106125SCraig Topper } 312bb106125SCraig Topper } 313b41e0fb4SCraig Topper } 314bb106125SCraig Topper 3155baf58b6SCraig Topper // Perform optimization with BSETI in the Zbs extension. 316e179b125SWang Pengcheng if (Res.size() > 2 && STI.hasFeature(RISCV::FeatureStdExtZbs)) { 3175baf58b6SCraig Topper // Create a simm32 value for LUI+ADDIW by forcing the upper 33 bits to zero. 3185baf58b6SCraig Topper // Xor that with original value to get which bits should be set by BSETI. 3195baf58b6SCraig Topper uint64_t Lo = Val & 0x7fffffff; 3205baf58b6SCraig Topper uint64_t Hi = Val ^ Lo; 3215baf58b6SCraig Topper assert(Hi != 0); 3222c52d516SCraig Topper RISCVMatInt::InstSeq TmpSeq; 3235baf58b6SCraig Topper 3245baf58b6SCraig Topper if (Lo != 0) 3255baf58b6SCraig Topper generateInstSeqImpl(Lo, STI, TmpSeq); 3265baf58b6SCraig Topper 3275baf58b6SCraig Topper if (TmpSeq.size() + llvm::popcount(Hi) < Res.size()) { 3285baf58b6SCraig Topper do { 3295baf58b6SCraig Topper TmpSeq.emplace_back(RISCV::BSETI, llvm::countr_zero(Hi)); 3305baf58b6SCraig Topper Hi &= (Hi - 1); // Clear lowest set bit. 3315baf58b6SCraig Topper } while (Hi != 0); 332787eeb85SBen Shi Res = TmpSeq; 333787eeb85SBen Shi } 334b41e0fb4SCraig Topper } 3357e815261SBen Shi 3365baf58b6SCraig Topper // Perform optimization with BCLRI in the Zbs extension. 3375baf58b6SCraig Topper if (Res.size() > 2 && STI.hasFeature(RISCV::FeatureStdExtZbs)) { 3385baf58b6SCraig Topper // Create a simm32 value for LUI+ADDIW by forcing the upper 33 bits to one. 3395baf58b6SCraig Topper // Xor that with original value to get which bits should be cleared by 3405baf58b6SCraig Topper // BCLRI. 3415baf58b6SCraig Topper uint64_t Lo = Val | 0xffffffff80000000; 3425baf58b6SCraig Topper uint64_t Hi = Val ^ Lo; 3435baf58b6SCraig Topper assert(Hi != 0); 3445baf58b6SCraig Topper 3452c52d516SCraig Topper RISCVMatInt::InstSeq TmpSeq; 346e179b125SWang Pengcheng generateInstSeqImpl(Lo, STI, TmpSeq); 3475baf58b6SCraig Topper 3485baf58b6SCraig Topper if (TmpSeq.size() + llvm::popcount(Hi) < Res.size()) { 3495baf58b6SCraig Topper do { 3505baf58b6SCraig Topper TmpSeq.emplace_back(RISCV::BCLRI, llvm::countr_zero(Hi)); 35147ff3042SCraig Topper Hi &= (Hi - 1); // Clear lowest set bit. 3525baf58b6SCraig Topper } while (Hi != 0); 3537e815261SBen Shi Res = TmpSeq; 3547e815261SBen Shi } 355787eeb85SBen Shi } 356787eeb85SBen Shi 3574fe5ab4bSBen Shi // Perform optimization with SH*ADD in the Zba extension. 358e179b125SWang Pengcheng if (Res.size() > 2 && STI.hasFeature(RISCV::FeatureStdExtZba)) { 3594fe5ab4bSBen Shi int64_t Div = 0; 3604fe5ab4bSBen Shi unsigned Opc = 0; 3612c52d516SCraig Topper RISCVMatInt::InstSeq TmpSeq; 3624fe5ab4bSBen Shi // Select the opcode and divisor. 3634fe5ab4bSBen Shi if ((Val % 3) == 0 && isInt<32>(Val / 3)) { 3644fe5ab4bSBen Shi Div = 3; 3654fe5ab4bSBen Shi Opc = RISCV::SH1ADD; 3664fe5ab4bSBen Shi } else if ((Val % 5) == 0 && isInt<32>(Val / 5)) { 3674fe5ab4bSBen Shi Div = 5; 3684fe5ab4bSBen Shi Opc = RISCV::SH2ADD; 3694fe5ab4bSBen Shi } else if ((Val % 9) == 0 && isInt<32>(Val / 9)) { 3704fe5ab4bSBen Shi Div = 9; 3714fe5ab4bSBen Shi Opc = RISCV::SH3ADD; 3724fe5ab4bSBen Shi } 3734fe5ab4bSBen Shi // Build the new instruction sequence. 3744fe5ab4bSBen Shi if (Div > 0) { 375e179b125SWang Pengcheng generateInstSeqImpl(Val / Div, STI, TmpSeq); 376b41e0fb4SCraig Topper if ((TmpSeq.size() + 1) < Res.size()) { 377c8c1d7afSCraig Topper TmpSeq.emplace_back(Opc, 0); 3784fe5ab4bSBen Shi Res = TmpSeq; 379b41e0fb4SCraig Topper } 38070046438SCraig Topper } else { 3814c3d916cSBen Shi // Try to use LUI+SH*ADD+ADDI. 3824c3d916cSBen Shi int64_t Hi52 = ((uint64_t)Val + 0x800ull) & ~0xfffull; 3834c3d916cSBen Shi int64_t Lo12 = SignExtend64<12>(Val); 3844c3d916cSBen Shi Div = 0; 3854c3d916cSBen Shi if (isInt<32>(Hi52 / 3) && (Hi52 % 3) == 0) { 3864c3d916cSBen Shi Div = 3; 3874c3d916cSBen Shi Opc = RISCV::SH1ADD; 3884c3d916cSBen Shi } else if (isInt<32>(Hi52 / 5) && (Hi52 % 5) == 0) { 3894c3d916cSBen Shi Div = 5; 3904c3d916cSBen Shi Opc = RISCV::SH2ADD; 3914c3d916cSBen Shi } else if (isInt<32>(Hi52 / 9) && (Hi52 % 9) == 0) { 3924c3d916cSBen Shi Div = 9; 3934c3d916cSBen Shi Opc = RISCV::SH3ADD; 3944c3d916cSBen Shi } 3954c3d916cSBen Shi // Build the new instruction sequence. 3964c3d916cSBen Shi if (Div > 0) { 3974c3d916cSBen Shi // For Val that has zero Lo12 (implies Val equals to Hi52) should has 3984c3d916cSBen Shi // already been processed to LUI+SH*ADD by previous optimization. 3994c3d916cSBen Shi assert(Lo12 != 0 && 4004c3d916cSBen Shi "unexpected instruction sequence for immediate materialisation"); 4012c52d516SCraig Topper assert(TmpSeq.empty() && "Expected empty TmpSeq"); 402e179b125SWang Pengcheng generateInstSeqImpl(Hi52 / Div, STI, TmpSeq); 403b41e0fb4SCraig Topper if ((TmpSeq.size() + 2) < Res.size()) { 404c8c1d7afSCraig Topper TmpSeq.emplace_back(Opc, 0); 405c8c1d7afSCraig Topper TmpSeq.emplace_back(RISCV::ADDI, Lo12); 4064c3d916cSBen Shi Res = TmpSeq; 4074c3d916cSBen Shi } 4084fe5ab4bSBen Shi } 40970046438SCraig Topper } 410b41e0fb4SCraig Topper } 4114fe5ab4bSBen Shi 412fc02eeb2SPhilipp Tomsich // Perform optimization with rori in the Zbb and th.srri in the XTheadBb 413fc02eeb2SPhilipp Tomsich // extension. 414e179b125SWang Pengcheng if (Res.size() > 2 && (STI.hasFeature(RISCV::FeatureStdExtZbb) || 415e179b125SWang Pengcheng STI.hasFeature(RISCV::FeatureVendorXTHeadBb))) { 416af931a51SBaoshan Pang if (unsigned Rotate = extractRotateInfo(Val)) { 417af931a51SBaoshan Pang RISCVMatInt::InstSeq TmpSeq; 418639b7865SKazu Hirata uint64_t NegImm12 = llvm::rotl<uint64_t>(Val, Rotate); 419af931a51SBaoshan Pang assert(isInt<12>(NegImm12)); 420c8c1d7afSCraig Topper TmpSeq.emplace_back(RISCV::ADDI, NegImm12); 421e179b125SWang Pengcheng TmpSeq.emplace_back(STI.hasFeature(RISCV::FeatureStdExtZbb) 422fc02eeb2SPhilipp Tomsich ? RISCV::RORI 423fc02eeb2SPhilipp Tomsich : RISCV::TH_SRRI, 424fc02eeb2SPhilipp Tomsich Rotate); 425af931a51SBaoshan Pang Res = TmpSeq; 426af931a51SBaoshan Pang } 427af931a51SBaoshan Pang } 428d61b40edSCraig Topper return Res; 429387d3c24SCraig Topper } 430387d3c24SCraig Topper 431d2f8ba7dSSacha Coppey void generateMCInstSeq(int64_t Val, const MCSubtargetInfo &STI, 432d2f8ba7dSSacha Coppey MCRegister DestReg, SmallVectorImpl<MCInst> &Insts) { 433d2f8ba7dSSacha Coppey RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(Val, STI); 434d2f8ba7dSSacha Coppey 435d2f8ba7dSSacha Coppey MCRegister SrcReg = RISCV::X0; 436d2f8ba7dSSacha Coppey for (RISCVMatInt::Inst &Inst : Seq) { 437d2f8ba7dSSacha Coppey switch (Inst.getOpndKind()) { 438d2f8ba7dSSacha Coppey case RISCVMatInt::Imm: 439d2f8ba7dSSacha Coppey Insts.push_back(MCInstBuilder(Inst.getOpcode()) 440d2f8ba7dSSacha Coppey .addReg(DestReg) 441d2f8ba7dSSacha Coppey .addImm(Inst.getImm())); 442d2f8ba7dSSacha Coppey break; 443d2f8ba7dSSacha Coppey case RISCVMatInt::RegX0: 444d2f8ba7dSSacha Coppey Insts.push_back(MCInstBuilder(Inst.getOpcode()) 445d2f8ba7dSSacha Coppey .addReg(DestReg) 446d2f8ba7dSSacha Coppey .addReg(SrcReg) 447d2f8ba7dSSacha Coppey .addReg(RISCV::X0)); 448d2f8ba7dSSacha Coppey break; 449d2f8ba7dSSacha Coppey case RISCVMatInt::RegReg: 450d2f8ba7dSSacha Coppey Insts.push_back(MCInstBuilder(Inst.getOpcode()) 451d2f8ba7dSSacha Coppey .addReg(DestReg) 452d2f8ba7dSSacha Coppey .addReg(SrcReg) 453d2f8ba7dSSacha Coppey .addReg(SrcReg)); 454d2f8ba7dSSacha Coppey break; 455d2f8ba7dSSacha Coppey case RISCVMatInt::RegImm: 456d2f8ba7dSSacha Coppey Insts.push_back(MCInstBuilder(Inst.getOpcode()) 457d2f8ba7dSSacha Coppey .addReg(DestReg) 458d2f8ba7dSSacha Coppey .addReg(SrcReg) 459d2f8ba7dSSacha Coppey .addImm(Inst.getImm())); 460d2f8ba7dSSacha Coppey break; 461d2f8ba7dSSacha Coppey } 462d2f8ba7dSSacha Coppey 463d2f8ba7dSSacha Coppey // Only the first instruction has X0 as its source. 464d2f8ba7dSSacha Coppey SrcReg = DestReg; 465d2f8ba7dSSacha Coppey } 466d2f8ba7dSSacha Coppey } 467d2f8ba7dSSacha Coppey 468e179b125SWang Pengcheng InstSeq generateTwoRegInstSeq(int64_t Val, const MCSubtargetInfo &STI, 4693c0990c1SCraig Topper unsigned &ShiftAmt, unsigned &AddOpc) { 4703c0990c1SCraig Topper int64_t LoVal = SignExtend64<32>(Val); 4713c0990c1SCraig Topper if (LoVal == 0) 4723c0990c1SCraig Topper return RISCVMatInt::InstSeq(); 4733c0990c1SCraig Topper 4743c0990c1SCraig Topper // Subtract the LoVal to emulate the effect of the final ADD. 4753c0990c1SCraig Topper uint64_t Tmp = (uint64_t)Val - (uint64_t)LoVal; 4763c0990c1SCraig Topper assert(Tmp != 0); 4773c0990c1SCraig Topper 4783c0990c1SCraig Topper // Use trailing zero counts to figure how far we need to shift LoVal to line 4793c0990c1SCraig Topper // up with the remaining constant. 4803c0990c1SCraig Topper // TODO: This algorithm assumes all non-zero bits in the low 32 bits of the 4813c0990c1SCraig Topper // final constant come from LoVal. 4823c0990c1SCraig Topper unsigned TzLo = llvm::countr_zero((uint64_t)LoVal); 4833c0990c1SCraig Topper unsigned TzHi = llvm::countr_zero(Tmp); 4843c0990c1SCraig Topper assert(TzLo < 32 && TzHi >= 32); 4853c0990c1SCraig Topper ShiftAmt = TzHi - TzLo; 4863c0990c1SCraig Topper AddOpc = RISCV::ADD; 4873c0990c1SCraig Topper 4883c0990c1SCraig Topper if (Tmp == ((uint64_t)LoVal << ShiftAmt)) 489e179b125SWang Pengcheng return RISCVMatInt::generateInstSeq(LoVal, STI); 4903c0990c1SCraig Topper 4913c0990c1SCraig Topper // If we have Zba, we can use (ADD_UW X, (SLLI X, 32)). 492e179b125SWang Pengcheng if (STI.hasFeature(RISCV::FeatureStdExtZba) && Lo_32(Val) == Hi_32(Val)) { 4933c0990c1SCraig Topper ShiftAmt = 32; 4943c0990c1SCraig Topper AddOpc = RISCV::ADD_UW; 495e179b125SWang Pengcheng return RISCVMatInt::generateInstSeq(LoVal, STI); 4963c0990c1SCraig Topper } 4973c0990c1SCraig Topper 4983c0990c1SCraig Topper return RISCVMatInt::InstSeq(); 4993c0990c1SCraig Topper } 5003c0990c1SCraig Topper 501e179b125SWang Pengcheng int getIntMatCost(const APInt &Val, unsigned Size, const MCSubtargetInfo &STI, 502*8687f7cdSAlex Bradbury bool CompressionCost, bool FreeZeroes) { 503e179b125SWang Pengcheng bool IsRV64 = STI.hasFeature(RISCV::Feature64Bit); 504e179b125SWang Pengcheng bool HasRVC = CompressionCost && (STI.hasFeature(RISCV::FeatureStdExtC) || 505e179b125SWang Pengcheng STI.hasFeature(RISCV::FeatureStdExtZca)); 506387d3c24SCraig Topper int PlatRegSize = IsRV64 ? 64 : 32; 507387d3c24SCraig Topper 508387d3c24SCraig Topper // Split the constant into platform register sized chunks, and calculate cost 509387d3c24SCraig Topper // of each chunk. 510387d3c24SCraig Topper int Cost = 0; 511387d3c24SCraig Topper for (unsigned ShiftVal = 0; ShiftVal < Size; ShiftVal += PlatRegSize) { 512387d3c24SCraig Topper APInt Chunk = Val.ashr(ShiftVal).sextOrTrunc(PlatRegSize); 513*8687f7cdSAlex Bradbury if (FreeZeroes && Chunk.getSExtValue() == 0) 514*8687f7cdSAlex Bradbury continue; 515e179b125SWang Pengcheng InstSeq MatSeq = generateInstSeq(Chunk.getSExtValue(), STI); 51681efb825SCraig Topper Cost += getInstSeqCost(MatSeq, HasRVC); 517387d3c24SCraig Topper } 518*8687f7cdSAlex Bradbury return std::max(FreeZeroes ? 0 : 1, Cost); 519387d3c24SCraig Topper } 520d2ee2c9cSCraig Topper 521d2ee2c9cSCraig Topper OpndKind Inst::getOpndKind() const { 522d2ee2c9cSCraig Topper switch (Opc) { 523d2ee2c9cSCraig Topper default: 524d2ee2c9cSCraig Topper llvm_unreachable("Unexpected opcode!"); 525d2ee2c9cSCraig Topper case RISCV::LUI: 526d2ee2c9cSCraig Topper return RISCVMatInt::Imm; 527d2ee2c9cSCraig Topper case RISCV::ADD_UW: 528d2ee2c9cSCraig Topper return RISCVMatInt::RegX0; 529d2ee2c9cSCraig Topper case RISCV::SH1ADD: 530d2ee2c9cSCraig Topper case RISCV::SH2ADD: 531d2ee2c9cSCraig Topper case RISCV::SH3ADD: 532bb106125SCraig Topper case RISCV::PACK: 533d2ee2c9cSCraig Topper return RISCVMatInt::RegReg; 534d2ee2c9cSCraig Topper case RISCV::ADDI: 535d2ee2c9cSCraig Topper case RISCV::ADDIW: 536cbd45961SCraig Topper case RISCV::XORI: 537d2ee2c9cSCraig Topper case RISCV::SLLI: 538d2ee2c9cSCraig Topper case RISCV::SRLI: 539d2ee2c9cSCraig Topper case RISCV::SLLI_UW: 540d2ee2c9cSCraig Topper case RISCV::RORI: 541d2ee2c9cSCraig Topper case RISCV::BSETI: 542d2ee2c9cSCraig Topper case RISCV::BCLRI: 543fc02eeb2SPhilipp Tomsich case RISCV::TH_SRRI: 544d2ee2c9cSCraig Topper return RISCVMatInt::RegImm; 545d2ee2c9cSCraig Topper } 546d2ee2c9cSCraig Topper } 547d2ee2c9cSCraig Topper 5480fe5f03eSjacquesguan } // namespace llvm::RISCVMatInt 549