xref: /llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp (revision af931a51b98f318955fc0bec71f28929e6535619)
1 //===- RISCVMatInt.cpp - Immediate materialisation -------------*- C++ -*--===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "RISCVMatInt.h"
10 #include "MCTargetDesc/RISCVMCTargetDesc.h"
11 #include "llvm/ADT/APInt.h"
12 #include "llvm/Support/MathExtras.h"
13 using namespace llvm;
14 
15 static int getInstSeqCost(RISCVMatInt::InstSeq &Res, bool HasRVC) {
16   if (!HasRVC)
17     return Res.size();
18 
19   int Cost = 0;
20   for (auto Instr : Res) {
21     bool Compressed;
22     switch (Instr.Opc) {
23     default:
24       llvm_unreachable("Unexpected opcode");
25     case RISCV::SLLI:
26     case RISCV::SRLI:
27       Compressed = true;
28       break;
29     case RISCV::ADDI:
30     case RISCV::ADDIW:
31     case RISCV::LUI:
32       Compressed = isInt<6>(Instr.Imm);
33       break;
34     case RISCV::ADDUW:
35       Compressed = false;
36       break;
37     }
38     // Two RVC instructions take the same space as one RVI instruction, but
39     // can take longer to execute than the single RVI instruction. Thus, we
40     // consider that two RVC instruction are slightly more costly than one
41     // RVI instruction. For longer sequences of RVC instructions the space
42     // savings can be worth it, though. The costs below try to model that.
43     if (!Compressed)
44       Cost += 100; // Baseline cost of one RVI instruction: 100%.
45     else
46       Cost += 70; // 70% cost of baseline.
47   }
48   return Cost;
49 }
50 
51 // Recursively generate a sequence for materializing an integer.
52 static void generateInstSeqImpl(int64_t Val,
53                                 const FeatureBitset &ActiveFeatures,
54                                 RISCVMatInt::InstSeq &Res) {
55   bool IsRV64 = ActiveFeatures[RISCV::Feature64Bit];
56 
57   if (isInt<32>(Val)) {
58     // Depending on the active bits in the immediate Value v, the following
59     // instruction sequences are emitted:
60     //
61     // v == 0                        : ADDI
62     // v[0,12) != 0 && v[12,32) == 0 : ADDI
63     // v[0,12) == 0 && v[12,32) != 0 : LUI
64     // v[0,32) != 0                  : LUI+ADDI(W)
65     int64_t Hi20 = ((Val + 0x800) >> 12) & 0xFFFFF;
66     int64_t Lo12 = SignExtend64<12>(Val);
67 
68     if (Hi20)
69       Res.push_back(RISCVMatInt::Inst(RISCV::LUI, Hi20));
70 
71     if (Lo12 || Hi20 == 0) {
72       unsigned AddiOpc = (IsRV64 && Hi20) ? RISCV::ADDIW : RISCV::ADDI;
73       Res.push_back(RISCVMatInt::Inst(AddiOpc, Lo12));
74     }
75     return;
76   }
77 
78   assert(IsRV64 && "Can't emit >32-bit imm for non-RV64 target");
79 
80   // In the worst case, for a full 64-bit constant, a sequence of 8 instructions
81   // (i.e., LUI+ADDIW+SLLI+ADDI+SLLI+ADDI+SLLI+ADDI) has to be emitted. Note
82   // that the first two instructions (LUI+ADDIW) can contribute up to 32 bits
83   // while the following ADDI instructions contribute up to 12 bits each.
84   //
85   // On the first glance, implementing this seems to be possible by simply
86   // emitting the most significant 32 bits (LUI+ADDIW) followed by as many left
87   // shift (SLLI) and immediate additions (ADDI) as needed. However, due to the
88   // fact that ADDI performs a sign extended addition, doing it like that would
89   // only be possible when at most 11 bits of the ADDI instructions are used.
90   // Using all 12 bits of the ADDI instructions, like done by GAS, actually
91   // requires that the constant is processed starting with the least significant
92   // bit.
93   //
94   // In the following, constants are processed from LSB to MSB but instruction
95   // emission is performed from MSB to LSB by recursively calling
96   // generateInstSeq. In each recursion, first the lowest 12 bits are removed
97   // from the constant and the optimal shift amount, which can be greater than
98   // 12 bits if the constant is sparse, is determined. Then, the shifted
99   // remaining constant is processed recursively and gets emitted as soon as it
100   // fits into 32 bits. The emission of the shifts and additions is subsequently
101   // performed when the recursion returns.
102 
103   int64_t Lo12 = SignExtend64<12>(Val);
104   int64_t Hi52 = ((uint64_t)Val + 0x800ull) >> 12;
105   int ShiftAmount = 12 + findFirstSet((uint64_t)Hi52);
106   Hi52 = SignExtend64(Hi52 >> (ShiftAmount - 12), 64 - ShiftAmount);
107 
108   // If the remaining bits don't fit in 12 bits, we might be able to reduce the
109   // shift amount in order to use LUI which will zero the lower 12 bits.
110   bool Unsigned = false;
111   if (ShiftAmount > 12 && !isInt<12>(Hi52)) {
112     if (isInt<32>((uint64_t)Hi52 << 12)) {
113       // Reduce the shift amount and add zeros to the LSBs so it will match LUI.
114       ShiftAmount -= 12;
115       Hi52 = (uint64_t)Hi52 << 12;
116     } else if (isUInt<32>((uint64_t)Hi52 << 12) &&
117                ActiveFeatures[RISCV::FeatureStdExtZba]) {
118       // Reduce the shift amount and add zeros to the LSBs so it will match
119       // LUI, then shift left with SLLI.UW to clear the upper 32 set bits.
120       ShiftAmount -= 12;
121       Hi52 = ((uint64_t)Hi52 << 12) | (0xffffffffull << 32);
122       Unsigned = true;
123     }
124   }
125 
126   // Try to use SLLIUW for Hi52 when it is uint32 but not int32.
127   if (isUInt<32>((uint64_t)Hi52) && !isInt<32>((uint64_t)Hi52) &&
128       ActiveFeatures[RISCV::FeatureStdExtZba]) {
129     // Use LUI+ADDI or LUI to compose, then clear the upper 32 bits with SLLIUW.
130     Hi52 = ((uint64_t)Hi52) | (0xffffffffull << 32);
131     Unsigned = true;
132   }
133 
134   generateInstSeqImpl(Hi52, ActiveFeatures, Res);
135 
136   if (Unsigned)
137     Res.push_back(RISCVMatInt::Inst(RISCV::SLLIUW, ShiftAmount));
138   else
139     Res.push_back(RISCVMatInt::Inst(RISCV::SLLI, ShiftAmount));
140   if (Lo12)
141     Res.push_back(RISCVMatInt::Inst(RISCV::ADDI, Lo12));
142 }
143 
144 static unsigned extractRotateInfo(int64_t Val) {
145   // for case: 0b111..1..xxxxxx1..1..
146   unsigned LeadingOnes = countLeadingOnes((uint64_t)Val);
147   unsigned TrailingOnes = countTrailingOnes((uint64_t)Val);
148   if (TrailingOnes > 0 && TrailingOnes < 64 &&
149       (LeadingOnes + TrailingOnes) > (64 - 12))
150     return 64 - TrailingOnes;
151 
152   // for case: 0bxxx1..1..1...xxx
153   unsigned UpperTrailingOnes = countTrailingOnes(Hi_32(Val));
154   unsigned LowerLeadingOnes = countLeadingOnes(Lo_32(Val));
155   if (UpperTrailingOnes < 32 &&
156       (UpperTrailingOnes + LowerLeadingOnes) > (64 - 12))
157     return 32 - UpperTrailingOnes;
158 
159   return 0;
160 }
161 
162 namespace llvm {
163 namespace RISCVMatInt {
164 InstSeq generateInstSeq(int64_t Val, const FeatureBitset &ActiveFeatures) {
165   RISCVMatInt::InstSeq Res;
166   generateInstSeqImpl(Val, ActiveFeatures, Res);
167 
168   // If the constant is positive we might be able to generate a shifted constant
169   // with no leading zeros and use a final SRLI to restore them.
170   if (Val > 0 && Res.size() > 2) {
171     assert(ActiveFeatures[RISCV::Feature64Bit] &&
172            "Expected RV32 to only need 2 instructions");
173     unsigned LeadingZeros = countLeadingZeros((uint64_t)Val);
174     uint64_t ShiftedVal = (uint64_t)Val << LeadingZeros;
175     // Fill in the bits that will be shifted out with 1s. An example where this
176     // helps is trailing one masks with 32 or more ones. This will generate
177     // ADDI -1 and an SRLI.
178     ShiftedVal |= maskTrailingOnes<uint64_t>(LeadingZeros);
179 
180     RISCVMatInt::InstSeq TmpSeq;
181     generateInstSeqImpl(ShiftedVal, ActiveFeatures, TmpSeq);
182     TmpSeq.push_back(RISCVMatInt::Inst(RISCV::SRLI, LeadingZeros));
183 
184     // Keep the new sequence if it is an improvement.
185     if (TmpSeq.size() < Res.size()) {
186       Res = TmpSeq;
187       // A 2 instruction sequence is the best we can do.
188       if (Res.size() <= 2)
189         return Res;
190     }
191 
192     // Some cases can benefit from filling the lower bits with zeros instead.
193     ShiftedVal &= maskTrailingZeros<uint64_t>(LeadingZeros);
194     TmpSeq.clear();
195     generateInstSeqImpl(ShiftedVal, ActiveFeatures, TmpSeq);
196     TmpSeq.push_back(RISCVMatInt::Inst(RISCV::SRLI, LeadingZeros));
197 
198     // Keep the new sequence if it is an improvement.
199     if (TmpSeq.size() < Res.size()) {
200       Res = TmpSeq;
201       // A 2 instruction sequence is the best we can do.
202       if (Res.size() <= 2)
203         return Res;
204     }
205 
206     // If we have exactly 32 leading zeros and Zba, we can try using zext.w at
207     // the end of the sequence.
208     if (LeadingZeros == 32 && ActiveFeatures[RISCV::FeatureStdExtZba]) {
209       // Try replacing upper bits with 1.
210       uint64_t LeadingOnesVal = Val | maskLeadingOnes<uint64_t>(LeadingZeros);
211       TmpSeq.clear();
212       generateInstSeqImpl(LeadingOnesVal, ActiveFeatures, TmpSeq);
213       TmpSeq.push_back(RISCVMatInt::Inst(RISCV::ADDUW, 0));
214 
215       // Keep the new sequence if it is an improvement.
216       if (TmpSeq.size() < Res.size()) {
217         Res = TmpSeq;
218         // A 2 instruction sequence is the best we can do.
219         if (Res.size() <= 2)
220           return Res;
221       }
222     }
223   }
224 
225   // Perform optimization with BCLRI/BSETI in the Zbs extension.
226   if (Res.size() > 2 && ActiveFeatures[RISCV::FeatureStdExtZbs]) {
227     assert(ActiveFeatures[RISCV::Feature64Bit] &&
228            "Expected RV32 to only need 2 instructions");
229 
230     // 1. For values in range 0xffffffff 7fffffff ~ 0xffffffff 00000000,
231     //    call generateInstSeqImpl with Val|0x80000000 (which is expected be
232     //    an int32), then emit (BCLRI r, 31).
233     // 2. For values in range 0x80000000 ~ 0xffffffff, call generateInstSeqImpl
234     //    with Val&~0x80000000 (which is expected to be an int32), then
235     //    emit (BSETI r, 31).
236     int64_t NewVal;
237     unsigned Opc;
238     if (Val < 0) {
239       Opc = RISCV::BCLRI;
240       NewVal = Val | 0x80000000ll;
241     } else {
242       Opc = RISCV::BSETI;
243       NewVal = Val & ~0x80000000ll;
244     }
245     if (isInt<32>(NewVal)) {
246       RISCVMatInt::InstSeq TmpSeq;
247       generateInstSeqImpl(NewVal, ActiveFeatures, TmpSeq);
248       TmpSeq.push_back(RISCVMatInt::Inst(Opc, 31));
249       if (TmpSeq.size() < Res.size())
250         Res = TmpSeq;
251     }
252 
253     // Try to use BCLRI for upper 32 bits if the original lower 32 bits are
254     // negative int32, or use BSETI for upper 32 bits if the original lower
255     // 32 bits are positive int32.
256     int32_t Lo = Val;
257     uint32_t Hi = Val >> 32;
258     Opc = 0;
259     RISCVMatInt::InstSeq TmpSeq;
260     generateInstSeqImpl(Lo, ActiveFeatures, TmpSeq);
261     // Check if it is profitable to use BCLRI/BSETI.
262     if (Lo > 0 && TmpSeq.size() + countPopulation(Hi) < Res.size()) {
263       Opc = RISCV::BSETI;
264     } else if (Lo < 0 && TmpSeq.size() + countPopulation(~Hi) < Res.size()) {
265       Opc = RISCV::BCLRI;
266       Hi = ~Hi;
267     }
268     // Search for each bit and build corresponding BCLRI/BSETI.
269     if (Opc > 0) {
270       while (Hi != 0) {
271         unsigned Bit = countTrailingZeros(Hi);
272         TmpSeq.push_back(RISCVMatInt::Inst(Opc, Bit + 32));
273         Hi &= ~(1 << Bit);
274       }
275       if (TmpSeq.size() < Res.size())
276         Res = TmpSeq;
277     }
278   }
279 
280   // Perform optimization with SH*ADD in the Zba extension.
281   if (Res.size() > 2 && ActiveFeatures[RISCV::FeatureStdExtZba]) {
282     assert(ActiveFeatures[RISCV::Feature64Bit] &&
283            "Expected RV32 to only need 2 instructions");
284     int64_t Div = 0;
285     unsigned Opc = 0;
286     RISCVMatInt::InstSeq TmpSeq;
287     // Select the opcode and divisor.
288     if ((Val % 3) == 0 && isInt<32>(Val / 3)) {
289       Div = 3;
290       Opc = RISCV::SH1ADD;
291     } else if ((Val % 5) == 0 && isInt<32>(Val / 5)) {
292       Div = 5;
293       Opc = RISCV::SH2ADD;
294     } else if ((Val % 9) == 0 && isInt<32>(Val / 9)) {
295       Div = 9;
296       Opc = RISCV::SH3ADD;
297     }
298     // Build the new instruction sequence.
299     if (Div > 0) {
300       generateInstSeqImpl(Val / Div, ActiveFeatures, TmpSeq);
301       TmpSeq.push_back(RISCVMatInt::Inst(Opc, 0));
302       if (TmpSeq.size() < Res.size())
303         Res = TmpSeq;
304     }
305     // Try to use LUI+SH*ADD+ADDI.
306     int64_t Hi52 = ((uint64_t)Val + 0x800ull) & ~0xfffull;
307     int64_t Lo12 = SignExtend64<12>(Val);
308     Div = 0;
309     if (isInt<32>(Hi52 / 3) && (Hi52 % 3) == 0) {
310       Div = 3;
311       Opc = RISCV::SH1ADD;
312     } else if (isInt<32>(Hi52 / 5) && (Hi52 % 5) == 0) {
313       Div = 5;
314       Opc = RISCV::SH2ADD;
315     } else if (isInt<32>(Hi52 / 9) && (Hi52 % 9) == 0) {
316       Div = 9;
317       Opc = RISCV::SH3ADD;
318     }
319     // Build the new instruction sequence.
320     if (Div > 0) {
321       // For Val that has zero Lo12 (implies Val equals to Hi52) should has
322       // already been processed to LUI+SH*ADD by previous optimization.
323       assert(Lo12 != 0 &&
324              "unexpected instruction sequence for immediate materialisation");
325       generateInstSeqImpl(Hi52 / Div, ActiveFeatures, TmpSeq);
326       TmpSeq.push_back(RISCVMatInt::Inst(Opc, 0));
327       TmpSeq.push_back(RISCVMatInt::Inst(RISCV::ADDI, Lo12));
328       if (TmpSeq.size() < Res.size())
329         Res = TmpSeq;
330     }
331   }
332 
333   // Perform optimization with rori in the Zbb extension.
334   if (Res.size() > 2 && ActiveFeatures[RISCV::FeatureStdExtZbb]) {
335     if (unsigned Rotate = extractRotateInfo(Val)) {
336       RISCVMatInt::InstSeq TmpSeq;
337       uint64_t NegImm12 =
338           ((uint64_t)Val >> (64 - Rotate)) | ((uint64_t)Val << Rotate);
339       assert(isInt<12>(NegImm12));
340       TmpSeq.push_back(RISCVMatInt::Inst(RISCV::ADDI, NegImm12));
341       TmpSeq.push_back(RISCVMatInt::Inst(RISCV::RORI, Rotate));
342       Res = TmpSeq;
343     }
344   }
345   return Res;
346 }
347 
348 int getIntMatCost(const APInt &Val, unsigned Size,
349                   const FeatureBitset &ActiveFeatures, bool CompressionCost) {
350   bool IsRV64 = ActiveFeatures[RISCV::Feature64Bit];
351   bool HasRVC = CompressionCost && ActiveFeatures[RISCV::FeatureStdExtC];
352   int PlatRegSize = IsRV64 ? 64 : 32;
353 
354   // Split the constant into platform register sized chunks, and calculate cost
355   // of each chunk.
356   int Cost = 0;
357   for (unsigned ShiftVal = 0; ShiftVal < Size; ShiftVal += PlatRegSize) {
358     APInt Chunk = Val.ashr(ShiftVal).sextOrTrunc(PlatRegSize);
359     InstSeq MatSeq = generateInstSeq(Chunk.getSExtValue(), ActiveFeatures);
360     Cost += getInstSeqCost(MatSeq, HasRVC);
361   }
362   return std::max(1, Cost);
363 }
364 } // namespace RISCVMatInt
365 } // namespace llvm
366