xref: /llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp (revision 8687f7cd662384e3bd009a0f43eabbbe87f4387a)
1387d3c24SCraig Topper //===- RISCVMatInt.cpp - Immediate materialisation -------------*- C++ -*--===//
2387d3c24SCraig Topper //
3387d3c24SCraig Topper // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4387d3c24SCraig Topper // See https://llvm.org/LICENSE.txt for license information.
5387d3c24SCraig Topper // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6387d3c24SCraig Topper //
7387d3c24SCraig Topper //===----------------------------------------------------------------------===//
8387d3c24SCraig Topper 
9387d3c24SCraig Topper #include "RISCVMatInt.h"
10387d3c24SCraig Topper #include "MCTargetDesc/RISCVMCTargetDesc.h"
11387d3c24SCraig Topper #include "llvm/ADT/APInt.h"
12d2f8ba7dSSacha Coppey #include "llvm/MC/MCInstBuilder.h"
13387d3c24SCraig Topper #include "llvm/Support/MathExtras.h"
14d61b40edSCraig Topper using namespace llvm;
15387d3c24SCraig Topper 
1681efb825SCraig Topper static int getInstSeqCost(RISCVMatInt::InstSeq &Res, bool HasRVC) {
1781efb825SCraig Topper   if (!HasRVC)
1881efb825SCraig Topper     return Res.size();
1981efb825SCraig Topper 
2081efb825SCraig Topper   int Cost = 0;
2181efb825SCraig Topper   for (auto Instr : Res) {
22186d5c8aSCraig Topper     // Assume instructions that aren't listed aren't compressible.
23186d5c8aSCraig Topper     bool Compressed = false;
24f2ffdbebSCraig Topper     switch (Instr.getOpcode()) {
2581efb825SCraig Topper     case RISCV::SLLI:
2681efb825SCraig Topper     case RISCV::SRLI:
2781efb825SCraig Topper       Compressed = true;
2881efb825SCraig Topper       break;
2981efb825SCraig Topper     case RISCV::ADDI:
3081efb825SCraig Topper     case RISCV::ADDIW:
3181efb825SCraig Topper     case RISCV::LUI:
32f2ffdbebSCraig Topper       Compressed = isInt<6>(Instr.getImm());
3381efb825SCraig Topper       break;
3481efb825SCraig Topper     }
3581efb825SCraig Topper     // Two RVC instructions take the same space as one RVI instruction, but
3681efb825SCraig Topper     // can take longer to execute than the single RVI instruction. Thus, we
3781efb825SCraig Topper     // consider that two RVC instruction are slightly more costly than one
3881efb825SCraig Topper     // RVI instruction. For longer sequences of RVC instructions the space
3981efb825SCraig Topper     // savings can be worth it, though. The costs below try to model that.
4081efb825SCraig Topper     if (!Compressed)
4181efb825SCraig Topper       Cost += 100; // Baseline cost of one RVI instruction: 100%.
4281efb825SCraig Topper     else
4381efb825SCraig Topper       Cost += 70; // 70% cost of baseline.
4481efb825SCraig Topper   }
4581efb825SCraig Topper   return Cost;
4681efb825SCraig Topper }
4781efb825SCraig Topper 
48d61b40edSCraig Topper // Recursively generate a sequence for materializing an integer.
49e179b125SWang Pengcheng static void generateInstSeqImpl(int64_t Val, const MCSubtargetInfo &STI,
502c52d516SCraig Topper                                 RISCVMatInt::InstSeq &Res) {
51e179b125SWang Pengcheng   bool IsRV64 = STI.hasFeature(RISCV::Feature64Bit);
524dbb7880SCraig Topper 
53564e09c7SCraig Topper   // Use BSETI for a single bit that can't be expressed by a single LUI or ADDI.
54e179b125SWang Pengcheng   if (STI.hasFeature(RISCV::FeatureStdExtZbs) && isPowerOf2_64(Val) &&
55564e09c7SCraig Topper       (!isInt<32>(Val) || Val == 0x800)) {
56564e09c7SCraig Topper     Res.emplace_back(RISCV::BSETI, Log2_64(Val));
57564e09c7SCraig Topper     return;
58564e09c7SCraig Topper   }
59564e09c7SCraig Topper 
60387d3c24SCraig Topper   if (isInt<32>(Val)) {
61387d3c24SCraig Topper     // Depending on the active bits in the immediate Value v, the following
62387d3c24SCraig Topper     // instruction sequences are emitted:
63387d3c24SCraig Topper     //
64387d3c24SCraig Topper     // v == 0                        : ADDI
65387d3c24SCraig Topper     // v[0,12) != 0 && v[12,32) == 0 : ADDI
66387d3c24SCraig Topper     // v[0,12) == 0 && v[12,32) != 0 : LUI
67387d3c24SCraig Topper     // v[0,32) != 0                  : LUI+ADDI(W)
6803bc33c8SCraig Topper     int64_t Hi20 = ((Val + 0x800) >> 12) & 0xFFFFF;
69387d3c24SCraig Topper     int64_t Lo12 = SignExtend64<12>(Val);
70387d3c24SCraig Topper 
71387d3c24SCraig Topper     if (Hi20)
72c8c1d7afSCraig Topper       Res.emplace_back(RISCV::LUI, Hi20);
73387d3c24SCraig Topper 
74387d3c24SCraig Topper     if (Lo12 || Hi20 == 0) {
75387d3c24SCraig Topper       unsigned AddiOpc = (IsRV64 && Hi20) ? RISCV::ADDIW : RISCV::ADDI;
76c8c1d7afSCraig Topper       Res.emplace_back(AddiOpc, Lo12);
77387d3c24SCraig Topper     }
782c52d516SCraig Topper     return;
79387d3c24SCraig Topper   }
80387d3c24SCraig Topper 
81387d3c24SCraig Topper   assert(IsRV64 && "Can't emit >32-bit imm for non-RV64 target");
82387d3c24SCraig Topper 
83387d3c24SCraig Topper   // In the worst case, for a full 64-bit constant, a sequence of 8 instructions
841104e325SAlexander Pivovarov   // (i.e., LUI+ADDIW+SLLI+ADDI+SLLI+ADDI+SLLI+ADDI) has to be emitted. Note
85387d3c24SCraig Topper   // that the first two instructions (LUI+ADDIW) can contribute up to 32 bits
86387d3c24SCraig Topper   // while the following ADDI instructions contribute up to 12 bits each.
87387d3c24SCraig Topper   //
88387d3c24SCraig Topper   // On the first glance, implementing this seems to be possible by simply
89387d3c24SCraig Topper   // emitting the most significant 32 bits (LUI+ADDIW) followed by as many left
90387d3c24SCraig Topper   // shift (SLLI) and immediate additions (ADDI) as needed. However, due to the
91387d3c24SCraig Topper   // fact that ADDI performs a sign extended addition, doing it like that would
92387d3c24SCraig Topper   // only be possible when at most 11 bits of the ADDI instructions are used.
93387d3c24SCraig Topper   // Using all 12 bits of the ADDI instructions, like done by GAS, actually
94387d3c24SCraig Topper   // requires that the constant is processed starting with the least significant
95387d3c24SCraig Topper   // bit.
96387d3c24SCraig Topper   //
97387d3c24SCraig Topper   // In the following, constants are processed from LSB to MSB but instruction
98387d3c24SCraig Topper   // emission is performed from MSB to LSB by recursively calling
99387d3c24SCraig Topper   // generateInstSeq. In each recursion, first the lowest 12 bits are removed
100387d3c24SCraig Topper   // from the constant and the optimal shift amount, which can be greater than
101387d3c24SCraig Topper   // 12 bits if the constant is sparse, is determined. Then, the shifted
102387d3c24SCraig Topper   // remaining constant is processed recursively and gets emitted as soon as it
103387d3c24SCraig Topper   // fits into 32 bits. The emission of the shifts and additions is subsequently
104387d3c24SCraig Topper   // performed when the recursion returns.
105387d3c24SCraig Topper 
106387d3c24SCraig Topper   int64_t Lo12 = SignExtend64<12>(Val);
1075c383731SCraig Topper   Val = (uint64_t)Val - (uint64_t)Lo12;
1085c383731SCraig Topper 
1095c383731SCraig Topper   int ShiftAmount = 0;
1105c383731SCraig Topper   bool Unsigned = false;
1115c383731SCraig Topper 
1125c383731SCraig Topper   // Val might now be valid for LUI without needing a shift.
1135c383731SCraig Topper   if (!isInt<32>(Val)) {
114b3af04f8SKazu Hirata     ShiftAmount = llvm::countr_zero((uint64_t)Val);
1155c383731SCraig Topper     Val >>= ShiftAmount;
116387d3c24SCraig Topper 
117f0e79db2SCraig Topper     // If the remaining bits don't fit in 12 bits, we might be able to reduce
11891445532SEvgenii Stepanov     // the // shift amount in order to use LUI which will zero the lower 12
11991445532SEvgenii Stepanov     // bits.
1205c383731SCraig Topper     if (ShiftAmount > 12 && !isInt<12>(Val)) {
12191445532SEvgenii Stepanov       if (isInt<32>((uint64_t)Val << 12)) {
122f0e79db2SCraig Topper         // Reduce the shift amount and add zeros to the LSBs so it will match
123f0e79db2SCraig Topper         // LUI.
12481efb825SCraig Topper         ShiftAmount -= 12;
12591445532SEvgenii Stepanov         Val = (uint64_t)Val << 12;
12691445532SEvgenii Stepanov       } else if (isUInt<32>((uint64_t)Val << 12) &&
127e179b125SWang Pengcheng                  STI.hasFeature(RISCV::FeatureStdExtZba)) {
128481db13fSBen Shi         // Reduce the shift amount and add zeros to the LSBs so it will match
129481db13fSBen Shi         // LUI, then shift left with SLLI.UW to clear the upper 32 set bits.
130481db13fSBen Shi         ShiftAmount -= 12;
13191445532SEvgenii Stepanov         Val = ((uint64_t)Val << 12) | (0xffffffffull << 32);
132481db13fSBen Shi         Unsigned = true;
133481db13fSBen Shi       }
13481efb825SCraig Topper     }
13581efb825SCraig Topper 
1365c383731SCraig Topper     // Try to use SLLI_UW for Val when it is uint32 but not int32.
13791445532SEvgenii Stepanov     if (isUInt<32>((uint64_t)Val) && !isInt<32>((uint64_t)Val) &&
138e179b125SWang Pengcheng         STI.hasFeature(RISCV::FeatureStdExtZba)) {
13991445532SEvgenii Stepanov       // Use LUI+ADDI or LUI to compose, then clear the upper 32 bits with
140588f121aSAlex Bradbury       // SLLI_UW.
14191445532SEvgenii Stepanov       Val = ((uint64_t)Val) | (0xffffffffull << 32);
14297e52e1cSBen Shi       Unsigned = true;
14397e52e1cSBen Shi     }
1445c383731SCraig Topper   }
14597e52e1cSBen Shi 
146e179b125SWang Pengcheng   generateInstSeqImpl(Val, STI, Res);
147387d3c24SCraig Topper 
1485c383731SCraig Topper   // Skip shift if we were able to use LUI directly.
1495c383731SCraig Topper   if (ShiftAmount) {
150c8c1d7afSCraig Topper     unsigned Opc = Unsigned ? RISCV::SLLI_UW : RISCV::SLLI;
151c8c1d7afSCraig Topper     Res.emplace_back(Opc, ShiftAmount);
1525c383731SCraig Topper   }
1535c383731SCraig Topper 
154387d3c24SCraig Topper   if (Lo12)
155c8c1d7afSCraig Topper     Res.emplace_back(RISCV::ADDI, Lo12);
156d61b40edSCraig Topper }
157d61b40edSCraig Topper 
158af931a51SBaoshan Pang static unsigned extractRotateInfo(int64_t Val) {
159af931a51SBaoshan Pang   // for case: 0b111..1..xxxxxx1..1..
160e0782018SKazu Hirata   unsigned LeadingOnes = llvm::countl_one((uint64_t)Val);
161e0782018SKazu Hirata   unsigned TrailingOnes = llvm::countr_one((uint64_t)Val);
162af931a51SBaoshan Pang   if (TrailingOnes > 0 && TrailingOnes < 64 &&
163af931a51SBaoshan Pang       (LeadingOnes + TrailingOnes) > (64 - 12))
164af931a51SBaoshan Pang     return 64 - TrailingOnes;
165af931a51SBaoshan Pang 
166af931a51SBaoshan Pang   // for case: 0bxxx1..1..1...xxx
167e0782018SKazu Hirata   unsigned UpperTrailingOnes = llvm::countr_one(Hi_32(Val));
168e0782018SKazu Hirata   unsigned LowerLeadingOnes = llvm::countl_one(Lo_32(Val));
169af931a51SBaoshan Pang   if (UpperTrailingOnes < 32 &&
170af931a51SBaoshan Pang       (UpperTrailingOnes + LowerLeadingOnes) > (64 - 12))
171af931a51SBaoshan Pang     return 32 - UpperTrailingOnes;
172af931a51SBaoshan Pang 
173af931a51SBaoshan Pang   return 0;
174af931a51SBaoshan Pang }
175af931a51SBaoshan Pang 
176e179b125SWang Pengcheng static void generateInstSeqLeadingZeros(int64_t Val, const MCSubtargetInfo &STI,
177cbd45961SCraig Topper                                         RISCVMatInt::InstSeq &Res) {
178cbd45961SCraig Topper   assert(Val > 0 && "Expected postive val");
179cbd45961SCraig Topper 
180cbd45961SCraig Topper   unsigned LeadingZeros = llvm::countl_zero((uint64_t)Val);
181cbd45961SCraig Topper   uint64_t ShiftedVal = (uint64_t)Val << LeadingZeros;
182cbd45961SCraig Topper   // Fill in the bits that will be shifted out with 1s. An example where this
183cbd45961SCraig Topper   // helps is trailing one masks with 32 or more ones. This will generate
184cbd45961SCraig Topper   // ADDI -1 and an SRLI.
185cbd45961SCraig Topper   ShiftedVal |= maskTrailingOnes<uint64_t>(LeadingZeros);
186cbd45961SCraig Topper 
187cbd45961SCraig Topper   RISCVMatInt::InstSeq TmpSeq;
188e179b125SWang Pengcheng   generateInstSeqImpl(ShiftedVal, STI, TmpSeq);
189cbd45961SCraig Topper 
190cbd45961SCraig Topper   // Keep the new sequence if it is an improvement or the original is empty.
191cbd45961SCraig Topper   if ((TmpSeq.size() + 1) < Res.size() ||
192cbd45961SCraig Topper       (Res.empty() && TmpSeq.size() < 8)) {
193cbd45961SCraig Topper     TmpSeq.emplace_back(RISCV::SRLI, LeadingZeros);
194cbd45961SCraig Topper     Res = TmpSeq;
195cbd45961SCraig Topper   }
196cbd45961SCraig Topper 
197cbd45961SCraig Topper   // Some cases can benefit from filling the lower bits with zeros instead.
198cbd45961SCraig Topper   ShiftedVal &= maskTrailingZeros<uint64_t>(LeadingZeros);
199cbd45961SCraig Topper   TmpSeq.clear();
200e179b125SWang Pengcheng   generateInstSeqImpl(ShiftedVal, STI, TmpSeq);
201cbd45961SCraig Topper 
202cbd45961SCraig Topper   // Keep the new sequence if it is an improvement or the original is empty.
203cbd45961SCraig Topper   if ((TmpSeq.size() + 1) < Res.size() ||
204cbd45961SCraig Topper       (Res.empty() && TmpSeq.size() < 8)) {
205cbd45961SCraig Topper     TmpSeq.emplace_back(RISCV::SRLI, LeadingZeros);
206cbd45961SCraig Topper     Res = TmpSeq;
207cbd45961SCraig Topper   }
208cbd45961SCraig Topper 
209cbd45961SCraig Topper   // If we have exactly 32 leading zeros and Zba, we can try using zext.w at
210cbd45961SCraig Topper   // the end of the sequence.
211e179b125SWang Pengcheng   if (LeadingZeros == 32 && STI.hasFeature(RISCV::FeatureStdExtZba)) {
212cbd45961SCraig Topper     // Try replacing upper bits with 1.
213cbd45961SCraig Topper     uint64_t LeadingOnesVal = Val | maskLeadingOnes<uint64_t>(LeadingZeros);
214cbd45961SCraig Topper     TmpSeq.clear();
215e179b125SWang Pengcheng     generateInstSeqImpl(LeadingOnesVal, STI, TmpSeq);
216cbd45961SCraig Topper 
217cbd45961SCraig Topper     // Keep the new sequence if it is an improvement.
218cbd45961SCraig Topper     if ((TmpSeq.size() + 1) < Res.size() ||
219cbd45961SCraig Topper         (Res.empty() && TmpSeq.size() < 8)) {
220cbd45961SCraig Topper       TmpSeq.emplace_back(RISCV::ADD_UW, 0);
221cbd45961SCraig Topper       Res = TmpSeq;
222cbd45961SCraig Topper     }
223cbd45961SCraig Topper   }
224cbd45961SCraig Topper }
225cbd45961SCraig Topper 
2260fe5f03eSjacquesguan namespace llvm::RISCVMatInt {
227e179b125SWang Pengcheng InstSeq generateInstSeq(int64_t Val, const MCSubtargetInfo &STI) {
2282c52d516SCraig Topper   RISCVMatInt::InstSeq Res;
229e179b125SWang Pengcheng   generateInstSeqImpl(Val, STI, Res);
230d61b40edSCraig Topper 
231ce66f4d0SCraig Topper   // If the low 12 bits are non-zero, the first expansion may end with an ADDI
232ce66f4d0SCraig Topper   // or ADDIW. If there are trailing zeros, try generating a sign extended
233ce66f4d0SCraig Topper   // constant with no trailing zeros and use a final SLLI to restore them.
2341806ce90SCraig Topper   if ((Val & 0xfff) != 0 && (Val & 1) == 0 && Res.size() >= 2) {
235e0782018SKazu Hirata     unsigned TrailingZeros = llvm::countr_zero((uint64_t)Val);
23698b86689SCraig Topper     int64_t ShiftedVal = Val >> TrailingZeros;
2371806ce90SCraig Topper     // If we can use C.LI+C.SLLI instead of LUI+ADDI(W) prefer that since
2381806ce90SCraig Topper     // its more compressible. But only if LUI+ADDI(W) isn't fusable.
2391806ce90SCraig Topper     // NOTE: We don't check for C extension to minimize differences in generated
2401806ce90SCraig Topper     // code.
2411806ce90SCraig Topper     bool IsShiftedCompressible =
242e179b125SWang Pengcheng         isInt<6>(ShiftedVal) && !STI.hasFeature(RISCV::TuneLUIADDIFusion);
2432c52d516SCraig Topper     RISCVMatInt::InstSeq TmpSeq;
244e179b125SWang Pengcheng     generateInstSeqImpl(ShiftedVal, STI, TmpSeq);
24598b86689SCraig Topper 
24698b86689SCraig Topper     // Keep the new sequence if it is an improvement.
247b41e0fb4SCraig Topper     if ((TmpSeq.size() + 1) < Res.size() || IsShiftedCompressible) {
248b41e0fb4SCraig Topper       TmpSeq.emplace_back(RISCV::SLLI, TrailingZeros);
24998b86689SCraig Topper       Res = TmpSeq;
25098b86689SCraig Topper     }
251b41e0fb4SCraig Topper   }
25298b86689SCraig Topper 
2534f5f38bdSCraig Topper   // If we have a 1 or 2 instruction sequence this is the best we can do. This
2544f5f38bdSCraig Topper   // will always be true for RV32 and will often be true for RV64.
2554f5f38bdSCraig Topper   if (Res.size() <= 2)
2564f5f38bdSCraig Topper     return Res;
2574f5f38bdSCraig Topper 
258e179b125SWang Pengcheng   assert(STI.hasFeature(RISCV::Feature64Bit) &&
2594dbb7880SCraig Topper          "Expected RV32 to only need 2 instructions");
2604f5f38bdSCraig Topper 
261c75e3ea4SCraig Topper   // If the lower 13 bits are something like 0x17ff, try to add 1 to change the
262c75e3ea4SCraig Topper   // lower 13 bits to 0x1800. We can restore this with an ADDI of -1 at the end
263c75e3ea4SCraig Topper   // of the sequence. Call generateInstSeqImpl on the new constant which may
264c75e3ea4SCraig Topper   // subtract 0xfffffffffffff800 to create another ADDI. This will leave a
265c75e3ea4SCraig Topper   // constant with more than 12 trailing zeros for the next recursive step.
266c75e3ea4SCraig Topper   if ((Val & 0xfff) != 0 && (Val & 0x1800) == 0x1000) {
267c75e3ea4SCraig Topper     int64_t Imm12 = -(0x800 - (Val & 0xfff));
268c75e3ea4SCraig Topper     int64_t AdjustedVal = Val - Imm12;
269c75e3ea4SCraig Topper     RISCVMatInt::InstSeq TmpSeq;
270e179b125SWang Pengcheng     generateInstSeqImpl(AdjustedVal, STI, TmpSeq);
271c75e3ea4SCraig Topper 
272c75e3ea4SCraig Topper     // Keep the new sequence if it is an improvement.
273c75e3ea4SCraig Topper     if ((TmpSeq.size() + 1) < Res.size()) {
274c75e3ea4SCraig Topper       TmpSeq.emplace_back(RISCV::ADDI, Imm12);
275c75e3ea4SCraig Topper       Res = TmpSeq;
276c75e3ea4SCraig Topper     }
277c75e3ea4SCraig Topper   }
278c75e3ea4SCraig Topper 
2794f5f38bdSCraig Topper   // If the constant is positive we might be able to generate a shifted constant
2804f5f38bdSCraig Topper   // with no leading zeros and use a final SRLI to restore them.
281c75e3ea4SCraig Topper   if (Val > 0 && Res.size() > 2) {
282e179b125SWang Pengcheng     generateInstSeqLeadingZeros(Val, STI, Res);
283cbd45961SCraig Topper   }
284ea064ba6SCraig Topper 
285cbd45961SCraig Topper   // If the constant is negative, trying inverting and using our trailing zero
286cbd45961SCraig Topper   // optimizations. Use an xori to invert the final value.
287cbd45961SCraig Topper   if (Val < 0 && Res.size() > 3) {
288cbd45961SCraig Topper     uint64_t InvertedVal = ~(uint64_t)Val;
289ea064ba6SCraig Topper     RISCVMatInt::InstSeq TmpSeq;
290e179b125SWang Pengcheng     generateInstSeqLeadingZeros(InvertedVal, STI, TmpSeq);
291ea064ba6SCraig Topper 
292cbd45961SCraig Topper     // Keep it if we found a sequence that is smaller after inverting.
293cbd45961SCraig Topper     if (!TmpSeq.empty() && (TmpSeq.size() + 1) < Res.size()) {
294cbd45961SCraig Topper       TmpSeq.emplace_back(RISCV::XORI, -1);
295ea064ba6SCraig Topper       Res = TmpSeq;
296a8b8e947SCraig Topper     }
297b41e0fb4SCraig Topper   }
298d61b40edSCraig Topper 
299bb106125SCraig Topper   // If the Low and High halves are the same, use pack. The pack instruction
300bb106125SCraig Topper   // packs the XLEN/2-bit lower halves of rs1 and rs2 into rd, with rs1 in the
301bb106125SCraig Topper   // lower half and rs2 in the upper half.
302e179b125SWang Pengcheng   if (Res.size() > 2 && STI.hasFeature(RISCV::FeatureStdExtZbkb)) {
303bb106125SCraig Topper     int64_t LoVal = SignExtend64<32>(Val);
304bb106125SCraig Topper     int64_t HiVal = SignExtend64<32>(Val >> 32);
305bb106125SCraig Topper     if (LoVal == HiVal) {
306bb106125SCraig Topper       RISCVMatInt::InstSeq TmpSeq;
307e179b125SWang Pengcheng       generateInstSeqImpl(LoVal, STI, TmpSeq);
308b41e0fb4SCraig Topper       if ((TmpSeq.size() + 1) < Res.size()) {
309bb106125SCraig Topper         TmpSeq.emplace_back(RISCV::PACK, 0);
310bb106125SCraig Topper         Res = TmpSeq;
311bb106125SCraig Topper       }
312bb106125SCraig Topper     }
313b41e0fb4SCraig Topper   }
314bb106125SCraig Topper 
3155baf58b6SCraig Topper   // Perform optimization with BSETI in the Zbs extension.
316e179b125SWang Pengcheng   if (Res.size() > 2 && STI.hasFeature(RISCV::FeatureStdExtZbs)) {
3175baf58b6SCraig Topper     // Create a simm32 value for LUI+ADDIW by forcing the upper 33 bits to zero.
3185baf58b6SCraig Topper     // Xor that with original value to get which bits should be set by BSETI.
3195baf58b6SCraig Topper     uint64_t Lo = Val & 0x7fffffff;
3205baf58b6SCraig Topper     uint64_t Hi = Val ^ Lo;
3215baf58b6SCraig Topper     assert(Hi != 0);
3222c52d516SCraig Topper     RISCVMatInt::InstSeq TmpSeq;
3235baf58b6SCraig Topper 
3245baf58b6SCraig Topper     if (Lo != 0)
3255baf58b6SCraig Topper       generateInstSeqImpl(Lo, STI, TmpSeq);
3265baf58b6SCraig Topper 
3275baf58b6SCraig Topper     if (TmpSeq.size() + llvm::popcount(Hi) < Res.size()) {
3285baf58b6SCraig Topper       do {
3295baf58b6SCraig Topper         TmpSeq.emplace_back(RISCV::BSETI, llvm::countr_zero(Hi));
3305baf58b6SCraig Topper         Hi &= (Hi - 1); // Clear lowest set bit.
3315baf58b6SCraig Topper       } while (Hi != 0);
332787eeb85SBen Shi       Res = TmpSeq;
333787eeb85SBen Shi     }
334b41e0fb4SCraig Topper   }
3357e815261SBen Shi 
3365baf58b6SCraig Topper   // Perform optimization with BCLRI in the Zbs extension.
3375baf58b6SCraig Topper   if (Res.size() > 2 && STI.hasFeature(RISCV::FeatureStdExtZbs)) {
3385baf58b6SCraig Topper     // Create a simm32 value for LUI+ADDIW by forcing the upper 33 bits to one.
3395baf58b6SCraig Topper     // Xor that with original value to get which bits should be cleared by
3405baf58b6SCraig Topper     // BCLRI.
3415baf58b6SCraig Topper     uint64_t Lo = Val | 0xffffffff80000000;
3425baf58b6SCraig Topper     uint64_t Hi = Val ^ Lo;
3435baf58b6SCraig Topper     assert(Hi != 0);
3445baf58b6SCraig Topper 
3452c52d516SCraig Topper     RISCVMatInt::InstSeq TmpSeq;
346e179b125SWang Pengcheng     generateInstSeqImpl(Lo, STI, TmpSeq);
3475baf58b6SCraig Topper 
3485baf58b6SCraig Topper     if (TmpSeq.size() + llvm::popcount(Hi) < Res.size()) {
3495baf58b6SCraig Topper       do {
3505baf58b6SCraig Topper         TmpSeq.emplace_back(RISCV::BCLRI, llvm::countr_zero(Hi));
35147ff3042SCraig Topper         Hi &= (Hi - 1); // Clear lowest set bit.
3525baf58b6SCraig Topper       } while (Hi != 0);
3537e815261SBen Shi       Res = TmpSeq;
3547e815261SBen Shi     }
355787eeb85SBen Shi   }
356787eeb85SBen Shi 
3574fe5ab4bSBen Shi   // Perform optimization with SH*ADD in the Zba extension.
358e179b125SWang Pengcheng   if (Res.size() > 2 && STI.hasFeature(RISCV::FeatureStdExtZba)) {
3594fe5ab4bSBen Shi     int64_t Div = 0;
3604fe5ab4bSBen Shi     unsigned Opc = 0;
3612c52d516SCraig Topper     RISCVMatInt::InstSeq TmpSeq;
3624fe5ab4bSBen Shi     // Select the opcode and divisor.
3634fe5ab4bSBen Shi     if ((Val % 3) == 0 && isInt<32>(Val / 3)) {
3644fe5ab4bSBen Shi       Div = 3;
3654fe5ab4bSBen Shi       Opc = RISCV::SH1ADD;
3664fe5ab4bSBen Shi     } else if ((Val % 5) == 0 && isInt<32>(Val / 5)) {
3674fe5ab4bSBen Shi       Div = 5;
3684fe5ab4bSBen Shi       Opc = RISCV::SH2ADD;
3694fe5ab4bSBen Shi     } else if ((Val % 9) == 0 && isInt<32>(Val / 9)) {
3704fe5ab4bSBen Shi       Div = 9;
3714fe5ab4bSBen Shi       Opc = RISCV::SH3ADD;
3724fe5ab4bSBen Shi     }
3734fe5ab4bSBen Shi     // Build the new instruction sequence.
3744fe5ab4bSBen Shi     if (Div > 0) {
375e179b125SWang Pengcheng       generateInstSeqImpl(Val / Div, STI, TmpSeq);
376b41e0fb4SCraig Topper       if ((TmpSeq.size() + 1) < Res.size()) {
377c8c1d7afSCraig Topper         TmpSeq.emplace_back(Opc, 0);
3784fe5ab4bSBen Shi         Res = TmpSeq;
379b41e0fb4SCraig Topper       }
38070046438SCraig Topper     } else {
3814c3d916cSBen Shi       // Try to use LUI+SH*ADD+ADDI.
3824c3d916cSBen Shi       int64_t Hi52 = ((uint64_t)Val + 0x800ull) & ~0xfffull;
3834c3d916cSBen Shi       int64_t Lo12 = SignExtend64<12>(Val);
3844c3d916cSBen Shi       Div = 0;
3854c3d916cSBen Shi       if (isInt<32>(Hi52 / 3) && (Hi52 % 3) == 0) {
3864c3d916cSBen Shi         Div = 3;
3874c3d916cSBen Shi         Opc = RISCV::SH1ADD;
3884c3d916cSBen Shi       } else if (isInt<32>(Hi52 / 5) && (Hi52 % 5) == 0) {
3894c3d916cSBen Shi         Div = 5;
3904c3d916cSBen Shi         Opc = RISCV::SH2ADD;
3914c3d916cSBen Shi       } else if (isInt<32>(Hi52 / 9) && (Hi52 % 9) == 0) {
3924c3d916cSBen Shi         Div = 9;
3934c3d916cSBen Shi         Opc = RISCV::SH3ADD;
3944c3d916cSBen Shi       }
3954c3d916cSBen Shi       // Build the new instruction sequence.
3964c3d916cSBen Shi       if (Div > 0) {
3974c3d916cSBen Shi         // For Val that has zero Lo12 (implies Val equals to Hi52) should has
3984c3d916cSBen Shi         // already been processed to LUI+SH*ADD by previous optimization.
3994c3d916cSBen Shi         assert(Lo12 != 0 &&
4004c3d916cSBen Shi                "unexpected instruction sequence for immediate materialisation");
4012c52d516SCraig Topper         assert(TmpSeq.empty() && "Expected empty TmpSeq");
402e179b125SWang Pengcheng         generateInstSeqImpl(Hi52 / Div, STI, TmpSeq);
403b41e0fb4SCraig Topper         if ((TmpSeq.size() + 2) < Res.size()) {
404c8c1d7afSCraig Topper           TmpSeq.emplace_back(Opc, 0);
405c8c1d7afSCraig Topper           TmpSeq.emplace_back(RISCV::ADDI, Lo12);
4064c3d916cSBen Shi           Res = TmpSeq;
4074c3d916cSBen Shi         }
4084fe5ab4bSBen Shi       }
40970046438SCraig Topper     }
410b41e0fb4SCraig Topper   }
4114fe5ab4bSBen Shi 
412fc02eeb2SPhilipp Tomsich   // Perform optimization with rori in the Zbb and th.srri in the XTheadBb
413fc02eeb2SPhilipp Tomsich   // extension.
414e179b125SWang Pengcheng   if (Res.size() > 2 && (STI.hasFeature(RISCV::FeatureStdExtZbb) ||
415e179b125SWang Pengcheng                          STI.hasFeature(RISCV::FeatureVendorXTHeadBb))) {
416af931a51SBaoshan Pang     if (unsigned Rotate = extractRotateInfo(Val)) {
417af931a51SBaoshan Pang       RISCVMatInt::InstSeq TmpSeq;
418639b7865SKazu Hirata       uint64_t NegImm12 = llvm::rotl<uint64_t>(Val, Rotate);
419af931a51SBaoshan Pang       assert(isInt<12>(NegImm12));
420c8c1d7afSCraig Topper       TmpSeq.emplace_back(RISCV::ADDI, NegImm12);
421e179b125SWang Pengcheng       TmpSeq.emplace_back(STI.hasFeature(RISCV::FeatureStdExtZbb)
422fc02eeb2SPhilipp Tomsich                               ? RISCV::RORI
423fc02eeb2SPhilipp Tomsich                               : RISCV::TH_SRRI,
424fc02eeb2SPhilipp Tomsich                           Rotate);
425af931a51SBaoshan Pang       Res = TmpSeq;
426af931a51SBaoshan Pang     }
427af931a51SBaoshan Pang   }
428d61b40edSCraig Topper   return Res;
429387d3c24SCraig Topper }
430387d3c24SCraig Topper 
431d2f8ba7dSSacha Coppey void generateMCInstSeq(int64_t Val, const MCSubtargetInfo &STI,
432d2f8ba7dSSacha Coppey                        MCRegister DestReg, SmallVectorImpl<MCInst> &Insts) {
433d2f8ba7dSSacha Coppey   RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(Val, STI);
434d2f8ba7dSSacha Coppey 
435d2f8ba7dSSacha Coppey   MCRegister SrcReg = RISCV::X0;
436d2f8ba7dSSacha Coppey   for (RISCVMatInt::Inst &Inst : Seq) {
437d2f8ba7dSSacha Coppey     switch (Inst.getOpndKind()) {
438d2f8ba7dSSacha Coppey     case RISCVMatInt::Imm:
439d2f8ba7dSSacha Coppey       Insts.push_back(MCInstBuilder(Inst.getOpcode())
440d2f8ba7dSSacha Coppey                           .addReg(DestReg)
441d2f8ba7dSSacha Coppey                           .addImm(Inst.getImm()));
442d2f8ba7dSSacha Coppey       break;
443d2f8ba7dSSacha Coppey     case RISCVMatInt::RegX0:
444d2f8ba7dSSacha Coppey       Insts.push_back(MCInstBuilder(Inst.getOpcode())
445d2f8ba7dSSacha Coppey                           .addReg(DestReg)
446d2f8ba7dSSacha Coppey                           .addReg(SrcReg)
447d2f8ba7dSSacha Coppey                           .addReg(RISCV::X0));
448d2f8ba7dSSacha Coppey       break;
449d2f8ba7dSSacha Coppey     case RISCVMatInt::RegReg:
450d2f8ba7dSSacha Coppey       Insts.push_back(MCInstBuilder(Inst.getOpcode())
451d2f8ba7dSSacha Coppey                           .addReg(DestReg)
452d2f8ba7dSSacha Coppey                           .addReg(SrcReg)
453d2f8ba7dSSacha Coppey                           .addReg(SrcReg));
454d2f8ba7dSSacha Coppey       break;
455d2f8ba7dSSacha Coppey     case RISCVMatInt::RegImm:
456d2f8ba7dSSacha Coppey       Insts.push_back(MCInstBuilder(Inst.getOpcode())
457d2f8ba7dSSacha Coppey                           .addReg(DestReg)
458d2f8ba7dSSacha Coppey                           .addReg(SrcReg)
459d2f8ba7dSSacha Coppey                           .addImm(Inst.getImm()));
460d2f8ba7dSSacha Coppey       break;
461d2f8ba7dSSacha Coppey     }
462d2f8ba7dSSacha Coppey 
463d2f8ba7dSSacha Coppey     // Only the first instruction has X0 as its source.
464d2f8ba7dSSacha Coppey     SrcReg = DestReg;
465d2f8ba7dSSacha Coppey   }
466d2f8ba7dSSacha Coppey }
467d2f8ba7dSSacha Coppey 
468e179b125SWang Pengcheng InstSeq generateTwoRegInstSeq(int64_t Val, const MCSubtargetInfo &STI,
4693c0990c1SCraig Topper                               unsigned &ShiftAmt, unsigned &AddOpc) {
4703c0990c1SCraig Topper   int64_t LoVal = SignExtend64<32>(Val);
4713c0990c1SCraig Topper   if (LoVal == 0)
4723c0990c1SCraig Topper     return RISCVMatInt::InstSeq();
4733c0990c1SCraig Topper 
4743c0990c1SCraig Topper   // Subtract the LoVal to emulate the effect of the final ADD.
4753c0990c1SCraig Topper   uint64_t Tmp = (uint64_t)Val - (uint64_t)LoVal;
4763c0990c1SCraig Topper   assert(Tmp != 0);
4773c0990c1SCraig Topper 
4783c0990c1SCraig Topper   // Use trailing zero counts to figure how far we need to shift LoVal to line
4793c0990c1SCraig Topper   // up with the remaining constant.
4803c0990c1SCraig Topper   // TODO: This algorithm assumes all non-zero bits in the low 32 bits of the
4813c0990c1SCraig Topper   // final constant come from LoVal.
4823c0990c1SCraig Topper   unsigned TzLo = llvm::countr_zero((uint64_t)LoVal);
4833c0990c1SCraig Topper   unsigned TzHi = llvm::countr_zero(Tmp);
4843c0990c1SCraig Topper   assert(TzLo < 32 && TzHi >= 32);
4853c0990c1SCraig Topper   ShiftAmt = TzHi - TzLo;
4863c0990c1SCraig Topper   AddOpc = RISCV::ADD;
4873c0990c1SCraig Topper 
4883c0990c1SCraig Topper   if (Tmp == ((uint64_t)LoVal << ShiftAmt))
489e179b125SWang Pengcheng     return RISCVMatInt::generateInstSeq(LoVal, STI);
4903c0990c1SCraig Topper 
4913c0990c1SCraig Topper   // If we have Zba, we can use (ADD_UW X, (SLLI X, 32)).
492e179b125SWang Pengcheng   if (STI.hasFeature(RISCV::FeatureStdExtZba) && Lo_32(Val) == Hi_32(Val)) {
4933c0990c1SCraig Topper     ShiftAmt = 32;
4943c0990c1SCraig Topper     AddOpc = RISCV::ADD_UW;
495e179b125SWang Pengcheng     return RISCVMatInt::generateInstSeq(LoVal, STI);
4963c0990c1SCraig Topper   }
4973c0990c1SCraig Topper 
4983c0990c1SCraig Topper   return RISCVMatInt::InstSeq();
4993c0990c1SCraig Topper }
5003c0990c1SCraig Topper 
501e179b125SWang Pengcheng int getIntMatCost(const APInt &Val, unsigned Size, const MCSubtargetInfo &STI,
502*8687f7cdSAlex Bradbury                   bool CompressionCost, bool FreeZeroes) {
503e179b125SWang Pengcheng   bool IsRV64 = STI.hasFeature(RISCV::Feature64Bit);
504e179b125SWang Pengcheng   bool HasRVC = CompressionCost && (STI.hasFeature(RISCV::FeatureStdExtC) ||
505e179b125SWang Pengcheng                                     STI.hasFeature(RISCV::FeatureStdExtZca));
506387d3c24SCraig Topper   int PlatRegSize = IsRV64 ? 64 : 32;
507387d3c24SCraig Topper 
508387d3c24SCraig Topper   // Split the constant into platform register sized chunks, and calculate cost
509387d3c24SCraig Topper   // of each chunk.
510387d3c24SCraig Topper   int Cost = 0;
511387d3c24SCraig Topper   for (unsigned ShiftVal = 0; ShiftVal < Size; ShiftVal += PlatRegSize) {
512387d3c24SCraig Topper     APInt Chunk = Val.ashr(ShiftVal).sextOrTrunc(PlatRegSize);
513*8687f7cdSAlex Bradbury     if (FreeZeroes && Chunk.getSExtValue() == 0)
514*8687f7cdSAlex Bradbury       continue;
515e179b125SWang Pengcheng     InstSeq MatSeq = generateInstSeq(Chunk.getSExtValue(), STI);
51681efb825SCraig Topper     Cost += getInstSeqCost(MatSeq, HasRVC);
517387d3c24SCraig Topper   }
518*8687f7cdSAlex Bradbury   return std::max(FreeZeroes ? 0 : 1, Cost);
519387d3c24SCraig Topper }
520d2ee2c9cSCraig Topper 
521d2ee2c9cSCraig Topper OpndKind Inst::getOpndKind() const {
522d2ee2c9cSCraig Topper   switch (Opc) {
523d2ee2c9cSCraig Topper   default:
524d2ee2c9cSCraig Topper     llvm_unreachable("Unexpected opcode!");
525d2ee2c9cSCraig Topper   case RISCV::LUI:
526d2ee2c9cSCraig Topper     return RISCVMatInt::Imm;
527d2ee2c9cSCraig Topper   case RISCV::ADD_UW:
528d2ee2c9cSCraig Topper     return RISCVMatInt::RegX0;
529d2ee2c9cSCraig Topper   case RISCV::SH1ADD:
530d2ee2c9cSCraig Topper   case RISCV::SH2ADD:
531d2ee2c9cSCraig Topper   case RISCV::SH3ADD:
532bb106125SCraig Topper   case RISCV::PACK:
533d2ee2c9cSCraig Topper     return RISCVMatInt::RegReg;
534d2ee2c9cSCraig Topper   case RISCV::ADDI:
535d2ee2c9cSCraig Topper   case RISCV::ADDIW:
536cbd45961SCraig Topper   case RISCV::XORI:
537d2ee2c9cSCraig Topper   case RISCV::SLLI:
538d2ee2c9cSCraig Topper   case RISCV::SRLI:
539d2ee2c9cSCraig Topper   case RISCV::SLLI_UW:
540d2ee2c9cSCraig Topper   case RISCV::RORI:
541d2ee2c9cSCraig Topper   case RISCV::BSETI:
542d2ee2c9cSCraig Topper   case RISCV::BCLRI:
543fc02eeb2SPhilipp Tomsich   case RISCV::TH_SRRI:
544d2ee2c9cSCraig Topper     return RISCVMatInt::RegImm;
545d2ee2c9cSCraig Topper   }
546d2ee2c9cSCraig Topper }
547d2ee2c9cSCraig Topper 
5480fe5f03eSjacquesguan } // namespace llvm::RISCVMatInt
549