xref: /llvm-project/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp (revision dde5546b79f784ab71cac325e0a0698c67c4dcde)
1 //===-- RISCVLegalizerInfo.cpp ----------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// This file implements the targeting of the Machinelegalizer class for RISC-V.
10 /// \todo This should be generated by TableGen.
11 //===----------------------------------------------------------------------===//
12 
13 #include "RISCVLegalizerInfo.h"
14 #include "MCTargetDesc/RISCVMatInt.h"
15 #include "RISCVMachineFunctionInfo.h"
16 #include "RISCVSubtarget.h"
17 #include "llvm/CodeGen/GlobalISel/GIMatchTableExecutor.h"
18 #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
19 #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
20 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
21 #include "llvm/CodeGen/MachineConstantPool.h"
22 #include "llvm/CodeGen/MachineJumpTableInfo.h"
23 #include "llvm/CodeGen/MachineMemOperand.h"
24 #include "llvm/CodeGen/MachineOperand.h"
25 #include "llvm/CodeGen/MachineRegisterInfo.h"
26 #include "llvm/CodeGen/TargetOpcodes.h"
27 #include "llvm/CodeGen/ValueTypes.h"
28 #include "llvm/IR/DerivedTypes.h"
29 #include "llvm/IR/Type.h"
30 
31 using namespace llvm;
32 using namespace LegalityPredicates;
33 using namespace LegalizeMutations;
34 
35 static LegalityPredicate
36 typeIsLegalIntOrFPVec(unsigned TypeIdx,
37                       std::initializer_list<LLT> IntOrFPVecTys,
38                       const RISCVSubtarget &ST) {
39   LegalityPredicate P = [=, &ST](const LegalityQuery &Query) {
40     return ST.hasVInstructions() &&
41            (Query.Types[TypeIdx].getScalarSizeInBits() != 64 ||
42             ST.hasVInstructionsI64()) &&
43            (Query.Types[TypeIdx].getElementCount().getKnownMinValue() != 1 ||
44             ST.getELen() == 64);
45   };
46 
47   return all(typeInSet(TypeIdx, IntOrFPVecTys), P);
48 }
49 
50 static LegalityPredicate
51 typeIsLegalBoolVec(unsigned TypeIdx, std::initializer_list<LLT> BoolVecTys,
52                    const RISCVSubtarget &ST) {
53   LegalityPredicate P = [=, &ST](const LegalityQuery &Query) {
54     return ST.hasVInstructions() &&
55            (Query.Types[TypeIdx].getElementCount().getKnownMinValue() != 1 ||
56             ST.getELen() == 64);
57   };
58   return all(typeInSet(TypeIdx, BoolVecTys), P);
59 }
60 
61 static LegalityPredicate typeIsLegalPtrVec(unsigned TypeIdx,
62                                            std::initializer_list<LLT> PtrVecTys,
63                                            const RISCVSubtarget &ST) {
64   LegalityPredicate P = [=, &ST](const LegalityQuery &Query) {
65     return ST.hasVInstructions() &&
66            (Query.Types[TypeIdx].getElementCount().getKnownMinValue() != 1 ||
67             ST.getELen() == 64) &&
68            (Query.Types[TypeIdx].getElementCount().getKnownMinValue() != 16 ||
69             Query.Types[TypeIdx].getScalarSizeInBits() == 32);
70   };
71   return all(typeInSet(TypeIdx, PtrVecTys), P);
72 }
73 
74 RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST)
75     : STI(ST), XLen(STI.getXLen()), sXLen(LLT::scalar(XLen)) {
76   const LLT sDoubleXLen = LLT::scalar(2 * XLen);
77   const LLT p0 = LLT::pointer(0, XLen);
78   const LLT s1 = LLT::scalar(1);
79   const LLT s8 = LLT::scalar(8);
80   const LLT s16 = LLT::scalar(16);
81   const LLT s32 = LLT::scalar(32);
82   const LLT s64 = LLT::scalar(64);
83   const LLT s128 = LLT::scalar(128);
84 
85   const LLT nxv1s1 = LLT::scalable_vector(1, s1);
86   const LLT nxv2s1 = LLT::scalable_vector(2, s1);
87   const LLT nxv4s1 = LLT::scalable_vector(4, s1);
88   const LLT nxv8s1 = LLT::scalable_vector(8, s1);
89   const LLT nxv16s1 = LLT::scalable_vector(16, s1);
90   const LLT nxv32s1 = LLT::scalable_vector(32, s1);
91   const LLT nxv64s1 = LLT::scalable_vector(64, s1);
92 
93   const LLT nxv1s8 = LLT::scalable_vector(1, s8);
94   const LLT nxv2s8 = LLT::scalable_vector(2, s8);
95   const LLT nxv4s8 = LLT::scalable_vector(4, s8);
96   const LLT nxv8s8 = LLT::scalable_vector(8, s8);
97   const LLT nxv16s8 = LLT::scalable_vector(16, s8);
98   const LLT nxv32s8 = LLT::scalable_vector(32, s8);
99   const LLT nxv64s8 = LLT::scalable_vector(64, s8);
100 
101   const LLT nxv1s16 = LLT::scalable_vector(1, s16);
102   const LLT nxv2s16 = LLT::scalable_vector(2, s16);
103   const LLT nxv4s16 = LLT::scalable_vector(4, s16);
104   const LLT nxv8s16 = LLT::scalable_vector(8, s16);
105   const LLT nxv16s16 = LLT::scalable_vector(16, s16);
106   const LLT nxv32s16 = LLT::scalable_vector(32, s16);
107 
108   const LLT nxv1s32 = LLT::scalable_vector(1, s32);
109   const LLT nxv2s32 = LLT::scalable_vector(2, s32);
110   const LLT nxv4s32 = LLT::scalable_vector(4, s32);
111   const LLT nxv8s32 = LLT::scalable_vector(8, s32);
112   const LLT nxv16s32 = LLT::scalable_vector(16, s32);
113 
114   const LLT nxv1s64 = LLT::scalable_vector(1, s64);
115   const LLT nxv2s64 = LLT::scalable_vector(2, s64);
116   const LLT nxv4s64 = LLT::scalable_vector(4, s64);
117   const LLT nxv8s64 = LLT::scalable_vector(8, s64);
118 
119   const LLT nxv1p0 = LLT::scalable_vector(1, p0);
120   const LLT nxv2p0 = LLT::scalable_vector(2, p0);
121   const LLT nxv4p0 = LLT::scalable_vector(4, p0);
122   const LLT nxv8p0 = LLT::scalable_vector(8, p0);
123   const LLT nxv16p0 = LLT::scalable_vector(16, p0);
124 
125   using namespace TargetOpcode;
126 
127   auto BoolVecTys = {nxv1s1, nxv2s1, nxv4s1, nxv8s1, nxv16s1, nxv32s1, nxv64s1};
128 
129   auto IntOrFPVecTys = {nxv1s8,   nxv2s8,  nxv4s8,  nxv8s8,  nxv16s8, nxv32s8,
130                         nxv64s8,  nxv1s16, nxv2s16, nxv4s16, nxv8s16, nxv16s16,
131                         nxv32s16, nxv1s32, nxv2s32, nxv4s32, nxv8s32, nxv16s32,
132                         nxv1s64,  nxv2s64, nxv4s64, nxv8s64};
133 
134   auto PtrVecTys = {nxv1p0, nxv2p0, nxv4p0, nxv8p0, nxv16p0};
135 
136   getActionDefinitionsBuilder({G_ADD, G_SUB})
137       .legalFor({sXLen})
138       .legalIf(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST))
139       .customFor(ST.is64Bit(), {s32})
140       .widenScalarToNextPow2(0)
141       .clampScalar(0, sXLen, sXLen);
142 
143   getActionDefinitionsBuilder({G_AND, G_OR, G_XOR})
144       .legalFor({sXLen})
145       .legalIf(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST))
146       .widenScalarToNextPow2(0)
147       .clampScalar(0, sXLen, sXLen);
148 
149   getActionDefinitionsBuilder(
150       {G_UADDE, G_UADDO, G_USUBE, G_USUBO}).lower();
151 
152   getActionDefinitionsBuilder({G_SADDO, G_SSUBO}).minScalar(0, sXLen).lower();
153 
154   // TODO: Use Vector Single-Width Saturating Instructions for vector types.
155   getActionDefinitionsBuilder({G_UADDSAT, G_SADDSAT, G_USUBSAT, G_SSUBSAT})
156       .lower();
157 
158   getActionDefinitionsBuilder({G_SHL, G_ASHR, G_LSHR})
159       .legalFor({{sXLen, sXLen}})
160       .customFor(ST.is64Bit(), {{s32, s32}})
161       .widenScalarToNextPow2(0)
162       .clampScalar(1, sXLen, sXLen)
163       .clampScalar(0, sXLen, sXLen);
164 
165   getActionDefinitionsBuilder({G_ZEXT, G_SEXT, G_ANYEXT})
166       .legalFor({{s32, s16}})
167       .legalFor(ST.is64Bit(), {{s64, s16}, {s64, s32}})
168       .legalIf(all(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST),
169                    typeIsLegalIntOrFPVec(1, IntOrFPVecTys, ST)))
170       .customIf(typeIsLegalBoolVec(1, BoolVecTys, ST))
171       .maxScalar(0, sXLen);
172 
173   getActionDefinitionsBuilder(G_SEXT_INREG)
174       .customFor({sXLen})
175       .clampScalar(0, sXLen, sXLen)
176       .lower();
177 
178   // Merge/Unmerge
179   for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) {
180     auto &MergeUnmergeActions = getActionDefinitionsBuilder(Op);
181     unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1;
182     unsigned LitTyIdx = Op == G_MERGE_VALUES ? 1 : 0;
183     if (XLen == 32 && ST.hasStdExtD()) {
184       MergeUnmergeActions.legalIf(
185           all(typeIs(BigTyIdx, s64), typeIs(LitTyIdx, s32)));
186     }
187     MergeUnmergeActions.widenScalarToNextPow2(LitTyIdx, XLen)
188         .widenScalarToNextPow2(BigTyIdx, XLen)
189         .clampScalar(LitTyIdx, sXLen, sXLen)
190         .clampScalar(BigTyIdx, sXLen, sXLen);
191   }
192 
193   getActionDefinitionsBuilder({G_FSHL, G_FSHR}).lower();
194 
195   getActionDefinitionsBuilder({G_ROTR, G_ROTL})
196       .legalFor(ST.hasStdExtZbb() || ST.hasStdExtZbkb(), {{sXLen, sXLen}})
197       .customFor(ST.is64Bit() && (ST.hasStdExtZbb() || ST.hasStdExtZbkb()),
198                  {{s32, s32}})
199       .lower();
200 
201   getActionDefinitionsBuilder(G_BITREVERSE).maxScalar(0, sXLen).lower();
202 
203   getActionDefinitionsBuilder(G_BITCAST).legalIf(
204       all(LegalityPredicates::any(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST),
205                                   typeIsLegalBoolVec(0, BoolVecTys, ST)),
206           LegalityPredicates::any(typeIsLegalIntOrFPVec(1, IntOrFPVecTys, ST),
207                                   typeIsLegalBoolVec(1, BoolVecTys, ST))));
208 
209   auto &BSWAPActions = getActionDefinitionsBuilder(G_BSWAP);
210   if (ST.hasStdExtZbb() || ST.hasStdExtZbkb())
211     BSWAPActions.legalFor({sXLen}).clampScalar(0, sXLen, sXLen);
212   else
213     BSWAPActions.maxScalar(0, sXLen).lower();
214 
215   auto &CountZerosActions = getActionDefinitionsBuilder({G_CTLZ, G_CTTZ});
216   auto &CountZerosUndefActions =
217       getActionDefinitionsBuilder({G_CTLZ_ZERO_UNDEF, G_CTTZ_ZERO_UNDEF});
218   if (ST.hasStdExtZbb()) {
219     CountZerosActions.legalFor({{sXLen, sXLen}})
220         .customFor({{s32, s32}})
221         .clampScalar(0, s32, sXLen)
222         .widenScalarToNextPow2(0)
223         .scalarSameSizeAs(1, 0);
224   } else {
225     CountZerosActions.maxScalar(0, sXLen).scalarSameSizeAs(1, 0).lower();
226     CountZerosUndefActions.maxScalar(0, sXLen).scalarSameSizeAs(1, 0);
227   }
228   CountZerosUndefActions.lower();
229 
230   auto &CTPOPActions = getActionDefinitionsBuilder(G_CTPOP);
231   if (ST.hasStdExtZbb()) {
232     CTPOPActions.legalFor({{sXLen, sXLen}})
233         .clampScalar(0, sXLen, sXLen)
234         .scalarSameSizeAs(1, 0);
235   } else {
236     CTPOPActions.maxScalar(0, sXLen).scalarSameSizeAs(1, 0).lower();
237   }
238 
239   getActionDefinitionsBuilder(G_CONSTANT)
240       .legalFor({p0})
241       .legalFor(!ST.is64Bit(), {s32})
242       .customFor(ST.is64Bit(), {s64})
243       .widenScalarToNextPow2(0)
244       .clampScalar(0, sXLen, sXLen);
245 
246   // TODO: transform illegal vector types into legal vector type
247   getActionDefinitionsBuilder(G_FREEZE)
248       .legalFor({s16, s32, p0})
249       .legalFor(ST.is64Bit(), {s64})
250       .legalIf(typeIsLegalBoolVec(0, BoolVecTys, ST))
251       .legalIf(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST))
252       .widenScalarToNextPow2(0)
253       .clampScalar(0, s16, sXLen);
254 
255   // TODO: transform illegal vector types into legal vector type
256   // TODO: Merge with G_FREEZE?
257   getActionDefinitionsBuilder(
258       {G_IMPLICIT_DEF, G_CONSTANT_FOLD_BARRIER})
259       .legalFor({s32, sXLen, p0})
260       .legalIf(typeIsLegalBoolVec(0, BoolVecTys, ST))
261       .legalIf(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST))
262       .widenScalarToNextPow2(0)
263       .clampScalar(0, s32, sXLen);
264 
265   getActionDefinitionsBuilder(G_ICMP)
266       .legalFor({{sXLen, sXLen}, {sXLen, p0}})
267       .legalIf(all(typeIsLegalBoolVec(0, BoolVecTys, ST),
268                    typeIsLegalIntOrFPVec(1, IntOrFPVecTys, ST)))
269       .widenScalarOrEltToNextPow2OrMinSize(1, 8)
270       .clampScalar(1, sXLen, sXLen)
271       .clampScalar(0, sXLen, sXLen);
272 
273   getActionDefinitionsBuilder(G_SELECT)
274       .legalFor({{s32, sXLen}, {p0, sXLen}})
275       .legalIf(all(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST),
276                    typeIsLegalBoolVec(1, BoolVecTys, ST)))
277       .legalFor(XLen == 64 || ST.hasStdExtD(), {{s64, sXLen}})
278       .widenScalarToNextPow2(0)
279       .clampScalar(0, s32, (XLen == 64 || ST.hasStdExtD()) ? s64 : s32)
280       .clampScalar(1, sXLen, sXLen);
281 
282   auto &LoadActions = getActionDefinitionsBuilder(G_LOAD);
283   auto &StoreActions = getActionDefinitionsBuilder(G_STORE);
284   auto &ExtLoadActions = getActionDefinitionsBuilder({G_SEXTLOAD, G_ZEXTLOAD});
285 
286   // Return the alignment needed for scalar memory ops. If unaligned scalar mem
287   // is supported, we only require byte alignment. Otherwise, we need the memory
288   // op to be natively aligned.
289   auto getScalarMemAlign = [&ST](unsigned Size) {
290     return ST.enableUnalignedScalarMem() ? 8 : Size;
291   };
292 
293   LoadActions.legalForTypesWithMemDesc(
294       {{s16, p0, s8, getScalarMemAlign(8)},
295        {s32, p0, s8, getScalarMemAlign(8)},
296        {s16, p0, s16, getScalarMemAlign(16)},
297        {s32, p0, s16, getScalarMemAlign(16)},
298        {s32, p0, s32, getScalarMemAlign(32)},
299        {p0, p0, sXLen, getScalarMemAlign(XLen)}});
300   StoreActions.legalForTypesWithMemDesc(
301       {{s16, p0, s8, getScalarMemAlign(8)},
302        {s32, p0, s8, getScalarMemAlign(8)},
303        {s16, p0, s16, getScalarMemAlign(16)},
304        {s32, p0, s16, getScalarMemAlign(16)},
305        {s32, p0, s32, getScalarMemAlign(32)},
306        {p0, p0, sXLen, getScalarMemAlign(XLen)}});
307   ExtLoadActions.legalForTypesWithMemDesc(
308       {{sXLen, p0, s8, getScalarMemAlign(8)},
309        {sXLen, p0, s16, getScalarMemAlign(16)}});
310   if (XLen == 64) {
311     LoadActions.legalForTypesWithMemDesc(
312         {{s64, p0, s8, getScalarMemAlign(8)},
313          {s64, p0, s16, getScalarMemAlign(16)},
314          {s64, p0, s32, getScalarMemAlign(32)},
315          {s64, p0, s64, getScalarMemAlign(64)}});
316     StoreActions.legalForTypesWithMemDesc(
317         {{s64, p0, s8, getScalarMemAlign(8)},
318          {s64, p0, s16, getScalarMemAlign(16)},
319          {s64, p0, s32, getScalarMemAlign(32)},
320          {s64, p0, s64, getScalarMemAlign(64)}});
321     ExtLoadActions.legalForTypesWithMemDesc(
322         {{s64, p0, s32, getScalarMemAlign(32)}});
323   } else if (ST.hasStdExtD()) {
324     LoadActions.legalForTypesWithMemDesc(
325         {{s64, p0, s64, getScalarMemAlign(64)}});
326     StoreActions.legalForTypesWithMemDesc(
327         {{s64, p0, s64, getScalarMemAlign(64)}});
328   }
329 
330   // Vector loads/stores.
331   if (ST.hasVInstructions()) {
332     LoadActions.legalForTypesWithMemDesc({{nxv2s8, p0, nxv2s8, 8},
333                                           {nxv4s8, p0, nxv4s8, 8},
334                                           {nxv8s8, p0, nxv8s8, 8},
335                                           {nxv16s8, p0, nxv16s8, 8},
336                                           {nxv32s8, p0, nxv32s8, 8},
337                                           {nxv64s8, p0, nxv64s8, 8},
338                                           {nxv2s16, p0, nxv2s16, 16},
339                                           {nxv4s16, p0, nxv4s16, 16},
340                                           {nxv8s16, p0, nxv8s16, 16},
341                                           {nxv16s16, p0, nxv16s16, 16},
342                                           {nxv32s16, p0, nxv32s16, 16},
343                                           {nxv2s32, p0, nxv2s32, 32},
344                                           {nxv4s32, p0, nxv4s32, 32},
345                                           {nxv8s32, p0, nxv8s32, 32},
346                                           {nxv16s32, p0, nxv16s32, 32}});
347     StoreActions.legalForTypesWithMemDesc({{nxv2s8, p0, nxv2s8, 8},
348                                            {nxv4s8, p0, nxv4s8, 8},
349                                            {nxv8s8, p0, nxv8s8, 8},
350                                            {nxv16s8, p0, nxv16s8, 8},
351                                            {nxv32s8, p0, nxv32s8, 8},
352                                            {nxv64s8, p0, nxv64s8, 8},
353                                            {nxv2s16, p0, nxv2s16, 16},
354                                            {nxv4s16, p0, nxv4s16, 16},
355                                            {nxv8s16, p0, nxv8s16, 16},
356                                            {nxv16s16, p0, nxv16s16, 16},
357                                            {nxv32s16, p0, nxv32s16, 16},
358                                            {nxv2s32, p0, nxv2s32, 32},
359                                            {nxv4s32, p0, nxv4s32, 32},
360                                            {nxv8s32, p0, nxv8s32, 32},
361                                            {nxv16s32, p0, nxv16s32, 32}});
362 
363     if (ST.getELen() == 64) {
364       LoadActions.legalForTypesWithMemDesc({{nxv1s8, p0, nxv1s8, 8},
365                                             {nxv1s16, p0, nxv1s16, 16},
366                                             {nxv1s32, p0, nxv1s32, 32}});
367       StoreActions.legalForTypesWithMemDesc({{nxv1s8, p0, nxv1s8, 8},
368                                              {nxv1s16, p0, nxv1s16, 16},
369                                              {nxv1s32, p0, nxv1s32, 32}});
370     }
371 
372     if (ST.hasVInstructionsI64()) {
373       LoadActions.legalForTypesWithMemDesc({{nxv1s64, p0, nxv1s64, 64},
374                                             {nxv2s64, p0, nxv2s64, 64},
375                                             {nxv4s64, p0, nxv4s64, 64},
376                                             {nxv8s64, p0, nxv8s64, 64}});
377       StoreActions.legalForTypesWithMemDesc({{nxv1s64, p0, nxv1s64, 64},
378                                              {nxv2s64, p0, nxv2s64, 64},
379                                              {nxv4s64, p0, nxv4s64, 64},
380                                              {nxv8s64, p0, nxv8s64, 64}});
381     }
382 
383     // we will take the custom lowering logic if we have scalable vector types
384     // with non-standard alignments
385     LoadActions.customIf(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST));
386     StoreActions.customIf(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST));
387 
388     // Pointers require that XLen sized elements are legal.
389     if (XLen <= ST.getELen()) {
390       LoadActions.customIf(typeIsLegalPtrVec(0, PtrVecTys, ST));
391       StoreActions.customIf(typeIsLegalPtrVec(0, PtrVecTys, ST));
392     }
393   }
394 
395   LoadActions.widenScalarToNextPow2(0, /* MinSize = */ 8)
396       .lowerIfMemSizeNotByteSizePow2()
397       .clampScalar(0, s16, sXLen)
398       .lower();
399   StoreActions
400       .clampScalar(0, s16, sXLen)
401       .lowerIfMemSizeNotByteSizePow2()
402       .lower();
403 
404   ExtLoadActions.widenScalarToNextPow2(0).clampScalar(0, sXLen, sXLen).lower();
405 
406   getActionDefinitionsBuilder({G_PTR_ADD, G_PTRMASK}).legalFor({{p0, sXLen}});
407 
408   getActionDefinitionsBuilder(G_PTRTOINT)
409       .legalFor({{sXLen, p0}})
410       .clampScalar(0, sXLen, sXLen);
411 
412   getActionDefinitionsBuilder(G_INTTOPTR)
413       .legalFor({{p0, sXLen}})
414       .clampScalar(1, sXLen, sXLen);
415 
416   getActionDefinitionsBuilder(G_BRCOND).legalFor({sXLen}).minScalar(0, sXLen);
417 
418   getActionDefinitionsBuilder(G_BRJT).customFor({{p0, sXLen}});
419 
420   getActionDefinitionsBuilder(G_BRINDIRECT).legalFor({p0});
421 
422   getActionDefinitionsBuilder(G_PHI)
423       .legalFor({p0, s32, sXLen})
424       .widenScalarToNextPow2(0)
425       .clampScalar(0, s32, sXLen);
426 
427   getActionDefinitionsBuilder({G_GLOBAL_VALUE, G_JUMP_TABLE, G_CONSTANT_POOL})
428       .legalFor({p0});
429 
430   if (ST.hasStdExtZmmul()) {
431     getActionDefinitionsBuilder(G_MUL)
432         .legalFor({sXLen})
433         .widenScalarToNextPow2(0)
434         .clampScalar(0, sXLen, sXLen);
435 
436     // clang-format off
437     getActionDefinitionsBuilder({G_SMULH, G_UMULH})
438         .legalFor({sXLen})
439         .lower();
440     // clang-format on
441 
442     getActionDefinitionsBuilder({G_SMULO, G_UMULO}).minScalar(0, sXLen).lower();
443   } else {
444     getActionDefinitionsBuilder(G_MUL)
445         .libcallFor({sXLen, sDoubleXLen})
446         .widenScalarToNextPow2(0)
447         .clampScalar(0, sXLen, sDoubleXLen);
448 
449     getActionDefinitionsBuilder({G_SMULH, G_UMULH}).lowerFor({sXLen});
450 
451     getActionDefinitionsBuilder({G_SMULO, G_UMULO})
452         .minScalar(0, sXLen)
453         // Widen sXLen to sDoubleXLen so we can use a single libcall to get
454         // the low bits for the mul result and high bits to do the overflow
455         // check.
456         .widenScalarIf(typeIs(0, sXLen),
457                        LegalizeMutations::changeTo(0, sDoubleXLen))
458         .lower();
459   }
460 
461   if (ST.hasStdExtM()) {
462     getActionDefinitionsBuilder({G_SDIV, G_UDIV, G_UREM})
463         .legalFor({sXLen})
464         .customFor({s32})
465         .libcallFor({sDoubleXLen})
466         .clampScalar(0, s32, sDoubleXLen)
467         .widenScalarToNextPow2(0);
468     getActionDefinitionsBuilder(G_SREM)
469         .legalFor({sXLen})
470         .libcallFor({sDoubleXLen})
471         .clampScalar(0, sXLen, sDoubleXLen)
472         .widenScalarToNextPow2(0);
473   } else {
474     getActionDefinitionsBuilder({G_UDIV, G_SDIV, G_UREM, G_SREM})
475         .libcallFor({sXLen, sDoubleXLen})
476         .clampScalar(0, sXLen, sDoubleXLen)
477         .widenScalarToNextPow2(0);
478   }
479 
480   // TODO: Use libcall for sDoubleXLen.
481   getActionDefinitionsBuilder({G_SDIVREM, G_UDIVREM}).lower();
482 
483   getActionDefinitionsBuilder(G_ABS)
484       .customFor(ST.hasStdExtZbb(), {sXLen})
485       .minScalar(ST.hasStdExtZbb(), 0, sXLen)
486       .lower();
487 
488   getActionDefinitionsBuilder({G_UMAX, G_UMIN, G_SMAX, G_SMIN})
489       .legalFor(ST.hasStdExtZbb(), {sXLen})
490       .minScalar(ST.hasStdExtZbb(), 0, sXLen)
491       .lower();
492 
493   getActionDefinitionsBuilder({G_SCMP, G_UCMP}).lower();
494 
495   getActionDefinitionsBuilder(G_FRAME_INDEX).legalFor({p0});
496 
497   getActionDefinitionsBuilder({G_MEMCPY, G_MEMMOVE, G_MEMSET}).libcall();
498 
499   getActionDefinitionsBuilder({G_DYN_STACKALLOC, G_STACKSAVE, G_STACKRESTORE})
500       .lower();
501 
502   // FP Operations
503 
504   // FIXME: Support s128 for rv32 when libcall handling is able to use sret.
505   getActionDefinitionsBuilder(
506       {G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FMA, G_FSQRT, G_FMAXNUM, G_FMINNUM})
507       .legalFor(ST.hasStdExtF(), {s32})
508       .legalFor(ST.hasStdExtD(), {s64})
509       .legalFor(ST.hasStdExtZfh(), {s16})
510       .libcallFor({s32, s64})
511       .libcallFor(ST.is64Bit(), {s128});
512 
513   getActionDefinitionsBuilder({G_FNEG, G_FABS})
514       .legalFor(ST.hasStdExtF(), {s32})
515       .legalFor(ST.hasStdExtD(), {s64})
516       .legalFor(ST.hasStdExtZfh(), {s16})
517       .lowerFor({s32, s64, s128});
518 
519   getActionDefinitionsBuilder(G_FREM)
520       .libcallFor({s32, s64})
521       .libcallFor(ST.is64Bit(), {s128})
522       .minScalar(0, s32)
523       .scalarize(0);
524 
525   getActionDefinitionsBuilder(G_FCOPYSIGN)
526       .legalFor(ST.hasStdExtF(), {{s32, s32}})
527       .legalFor(ST.hasStdExtD(), {{s64, s64}, {s32, s64}, {s64, s32}})
528       .legalFor(ST.hasStdExtZfh(), {{s16, s16}, {s16, s32}, {s32, s16}})
529       .legalFor(ST.hasStdExtZfh() && ST.hasStdExtD(), {{s16, s64}, {s64, s16}})
530       .lower();
531 
532   // FIXME: Use Zfhmin.
533   getActionDefinitionsBuilder(G_FPTRUNC)
534       .legalFor(ST.hasStdExtD(), {{s32, s64}})
535       .legalFor(ST.hasStdExtZfh(), {{s16, s32}})
536       .legalFor(ST.hasStdExtZfh() && ST.hasStdExtD(), {{s16, s64}})
537       .libcallFor({{s32, s64}})
538       .libcallFor(ST.is64Bit(), {{s32, s128}, {s64, s128}});
539   getActionDefinitionsBuilder(G_FPEXT)
540       .legalFor(ST.hasStdExtD(), {{s64, s32}})
541       .legalFor(ST.hasStdExtZfh(), {{s32, s16}})
542       .legalFor(ST.hasStdExtZfh() && ST.hasStdExtD(), {{s64, s16}})
543       .libcallFor({{s64, s32}})
544       .libcallFor(ST.is64Bit(), {{s128, s32}, {s128, s64}});
545 
546   getActionDefinitionsBuilder(G_FCMP)
547       .legalFor(ST.hasStdExtF(), {{sXLen, s32}})
548       .legalFor(ST.hasStdExtD(), {{sXLen, s64}})
549       .legalFor(ST.hasStdExtZfh(), {{sXLen, s16}})
550       .clampScalar(0, sXLen, sXLen)
551       .libcallFor({{sXLen, s32}, {sXLen, s64}})
552       .libcallFor(ST.is64Bit(), {{sXLen, s128}});
553 
554   // TODO: Support vector version of G_IS_FPCLASS.
555   getActionDefinitionsBuilder(G_IS_FPCLASS)
556       .customFor(ST.hasStdExtF(), {{s1, s32}})
557       .customFor(ST.hasStdExtD(), {{s1, s64}})
558       .customFor(ST.hasStdExtZfh(), {{s1, s16}})
559       .lowerFor({{s1, s32}, {s1, s64}});
560 
561   getActionDefinitionsBuilder(G_FCONSTANT)
562       .legalFor(ST.hasStdExtF(), {s32})
563       .legalFor(ST.hasStdExtD(), {s64})
564       .legalFor(ST.hasStdExtZfh(), {s16})
565       .lowerFor({s32, s64, s128});
566 
567   getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI})
568       .legalFor(ST.hasStdExtF(), {{sXLen, s32}})
569       .legalFor(ST.hasStdExtD(), {{sXLen, s64}})
570       .legalFor(ST.hasStdExtZfh(), {{sXLen, s16}})
571       .customFor(ST.is64Bit() && ST.hasStdExtF(), {{s32, s32}})
572       .customFor(ST.is64Bit() && ST.hasStdExtD(), {{s32, s64}})
573       .customFor(ST.is64Bit() && ST.hasStdExtZfh(), {{s32, s16}})
574       .widenScalarToNextPow2(0)
575       .minScalar(0, s32)
576       .libcallFor({{s32, s32}, {s64, s32}, {s32, s64}, {s64, s64}})
577       .libcallFor(ST.is64Bit(), {{s32, s128}, {s64, s128}}) // FIXME RV32.
578       .libcallFor(ST.is64Bit(), {{s128, s32}, {s128, s64}, {s128, s128}});
579 
580   getActionDefinitionsBuilder({G_SITOFP, G_UITOFP})
581       .legalFor(ST.hasStdExtF(), {{s32, sXLen}})
582       .legalFor(ST.hasStdExtD(), {{s64, sXLen}})
583       .legalFor(ST.hasStdExtZfh(), {{s16, sXLen}})
584       .widenScalarToNextPow2(1)
585       // Promote to XLen if the operation is legal.
586       .widenScalarIf(
587           [=, &ST](const LegalityQuery &Query) {
588             return Query.Types[0].isScalar() && Query.Types[1].isScalar() &&
589                    (Query.Types[1].getSizeInBits() < ST.getXLen()) &&
590                    ((ST.hasStdExtF() && Query.Types[0].getSizeInBits() == 32) ||
591                     (ST.hasStdExtD() && Query.Types[0].getSizeInBits() == 64) ||
592                     (ST.hasStdExtZfh() &&
593                      Query.Types[0].getSizeInBits() == 16));
594           },
595           LegalizeMutations::changeTo(1, sXLen))
596       // Otherwise only promote to s32 since we have si libcalls.
597       .minScalar(1, s32)
598       .libcallFor({{s32, s32}, {s64, s32}, {s32, s64}, {s64, s64}})
599       .libcallFor(ST.is64Bit(), {{s128, s32}, {s128, s64}}) // FIXME RV32.
600       .libcallFor(ST.is64Bit(), {{s32, s128}, {s64, s128}, {s128, s128}});
601 
602   // FIXME: We can do custom inline expansion like SelectionDAG.
603   getActionDefinitionsBuilder({G_FCEIL, G_FFLOOR, G_FRINT, G_FNEARBYINT,
604                                G_INTRINSIC_TRUNC, G_INTRINSIC_ROUND,
605                                G_INTRINSIC_ROUNDEVEN})
606       .legalFor(ST.hasStdExtZfa(), {s32})
607       .legalFor(ST.hasStdExtZfa() && ST.hasStdExtD(), {s64})
608       .legalFor(ST.hasStdExtZfa() && ST.hasStdExtZfh(), {s16})
609       .libcallFor({s32, s64})
610       .libcallFor(ST.is64Bit(), {s128});
611 
612   getActionDefinitionsBuilder({G_FMAXIMUM, G_FMINIMUM})
613       .legalFor(ST.hasStdExtZfa(), {s32})
614       .legalFor(ST.hasStdExtZfa() && ST.hasStdExtD(), {s64})
615       .legalFor(ST.hasStdExtZfa() && ST.hasStdExtZfh(), {s16});
616 
617   getActionDefinitionsBuilder({G_FCOS, G_FSIN, G_FTAN, G_FPOW, G_FLOG, G_FLOG2,
618                                G_FLOG10, G_FEXP, G_FEXP2, G_FEXP10, G_FACOS,
619                                G_FASIN, G_FATAN, G_FATAN2, G_FCOSH, G_FSINH,
620                                G_FTANH})
621       .libcallFor({s32, s64})
622       .libcallFor(ST.is64Bit(), {s128});
623   getActionDefinitionsBuilder({G_FPOWI, G_FLDEXP})
624       .libcallFor({{s32, s32}, {s64, s32}})
625       .libcallFor(ST.is64Bit(), {s128, s32});
626 
627   getActionDefinitionsBuilder(G_VASTART).customFor({p0});
628 
629   // va_list must be a pointer, but most sized types are pretty easy to handle
630   // as the destination.
631   getActionDefinitionsBuilder(G_VAARG)
632       // TODO: Implement narrowScalar and widenScalar for G_VAARG for types
633       // other than sXLen.
634       .clampScalar(0, sXLen, sXLen)
635       .lowerForCartesianProduct({sXLen, p0}, {p0});
636 
637   getActionDefinitionsBuilder(G_VSCALE)
638       .clampScalar(0, sXLen, sXLen)
639       .customFor({sXLen});
640 
641   auto &SplatActions =
642       getActionDefinitionsBuilder(G_SPLAT_VECTOR)
643           .legalIf(all(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST),
644                        typeIs(1, sXLen)))
645           .customIf(all(typeIsLegalBoolVec(0, BoolVecTys, ST), typeIs(1, s1)));
646   // Handle case of s64 element vectors on RV32. If the subtarget does not have
647   // f64, then try to lower it to G_SPLAT_VECTOR_SPLIT_64_VL. If the subtarget
648   // does have f64, then we don't know whether the type is an f64 or an i64,
649   // so mark the G_SPLAT_VECTOR as legal and decide later what to do with it,
650   // depending on how the instructions it consumes are legalized. They are not
651   // legalized yet since legalization is in reverse postorder, so we cannot
652   // make the decision at this moment.
653   if (XLen == 32) {
654     if (ST.hasVInstructionsF64() && ST.hasStdExtD())
655       SplatActions.legalIf(all(
656           typeInSet(0, {nxv1s64, nxv2s64, nxv4s64, nxv8s64}), typeIs(1, s64)));
657     else if (ST.hasVInstructionsI64())
658       SplatActions.customIf(all(
659           typeInSet(0, {nxv1s64, nxv2s64, nxv4s64, nxv8s64}), typeIs(1, s64)));
660   }
661 
662   SplatActions.clampScalar(1, sXLen, sXLen);
663 
664   LegalityPredicate ExtractSubvecBitcastPred = [=](const LegalityQuery &Query) {
665     LLT DstTy = Query.Types[0];
666     LLT SrcTy = Query.Types[1];
667     return DstTy.getElementType() == LLT::scalar(1) &&
668            DstTy.getElementCount().getKnownMinValue() >= 8 &&
669            SrcTy.getElementCount().getKnownMinValue() >= 8;
670   };
671   getActionDefinitionsBuilder(G_EXTRACT_SUBVECTOR)
672       // We don't have the ability to slide mask vectors down indexed by their
673       // i1 elements; the smallest we can do is i8. Often we are able to bitcast
674       // to equivalent i8 vectors.
675       .bitcastIf(
676           all(typeIsLegalBoolVec(0, BoolVecTys, ST),
677               typeIsLegalBoolVec(1, BoolVecTys, ST), ExtractSubvecBitcastPred),
678           [=](const LegalityQuery &Query) {
679             LLT CastTy = LLT::vector(
680                 Query.Types[0].getElementCount().divideCoefficientBy(8), 8);
681             return std::pair(0, CastTy);
682           })
683       .customIf(LegalityPredicates::any(
684           all(typeIsLegalBoolVec(0, BoolVecTys, ST),
685               typeIsLegalBoolVec(1, BoolVecTys, ST)),
686           all(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST),
687               typeIsLegalIntOrFPVec(1, IntOrFPVecTys, ST))));
688 
689   getActionDefinitionsBuilder(G_INSERT_SUBVECTOR)
690       .customIf(all(typeIsLegalBoolVec(0, BoolVecTys, ST),
691                     typeIsLegalBoolVec(1, BoolVecTys, ST)))
692       .customIf(all(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST),
693                     typeIsLegalIntOrFPVec(1, IntOrFPVecTys, ST)));
694 
695   getLegacyLegalizerInfo().computeTables();
696   verify(*ST.getInstrInfo());
697 }
698 
699 bool RISCVLegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
700                                            MachineInstr &MI) const {
701   Intrinsic::ID IntrinsicID = cast<GIntrinsic>(MI).getIntrinsicID();
702   switch (IntrinsicID) {
703   default:
704     return false;
705   case Intrinsic::vacopy: {
706     // vacopy arguments must be legal because of the intrinsic signature.
707     // No need to check here.
708 
709     MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
710     MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
711     MachineFunction &MF = *MI.getMF();
712     const DataLayout &DL = MIRBuilder.getDataLayout();
713     LLVMContext &Ctx = MF.getFunction().getContext();
714 
715     Register DstLst = MI.getOperand(1).getReg();
716     LLT PtrTy = MRI.getType(DstLst);
717 
718     // Load the source va_list
719     Align Alignment = DL.getABITypeAlign(getTypeForLLT(PtrTy, Ctx));
720     MachineMemOperand *LoadMMO = MF.getMachineMemOperand(
721         MachinePointerInfo(), MachineMemOperand::MOLoad, PtrTy, Alignment);
722     auto Tmp = MIRBuilder.buildLoad(PtrTy, MI.getOperand(2), *LoadMMO);
723 
724     // Store the result in the destination va_list
725     MachineMemOperand *StoreMMO = MF.getMachineMemOperand(
726         MachinePointerInfo(), MachineMemOperand::MOStore, PtrTy, Alignment);
727     MIRBuilder.buildStore(Tmp, DstLst, *StoreMMO);
728 
729     MI.eraseFromParent();
730     return true;
731   }
732   }
733 }
734 
735 bool RISCVLegalizerInfo::legalizeVAStart(MachineInstr &MI,
736                                          MachineIRBuilder &MIRBuilder) const {
737   // Stores the address of the VarArgsFrameIndex slot into the memory location
738   assert(MI.getOpcode() == TargetOpcode::G_VASTART);
739   MachineFunction *MF = MI.getParent()->getParent();
740   RISCVMachineFunctionInfo *FuncInfo = MF->getInfo<RISCVMachineFunctionInfo>();
741   int FI = FuncInfo->getVarArgsFrameIndex();
742   LLT AddrTy = MIRBuilder.getMRI()->getType(MI.getOperand(0).getReg());
743   auto FINAddr = MIRBuilder.buildFrameIndex(AddrTy, FI);
744   assert(MI.hasOneMemOperand());
745   MIRBuilder.buildStore(FINAddr, MI.getOperand(0).getReg(),
746                         *MI.memoperands()[0]);
747   MI.eraseFromParent();
748   return true;
749 }
750 
751 bool RISCVLegalizerInfo::legalizeBRJT(MachineInstr &MI,
752                                       MachineIRBuilder &MIRBuilder) const {
753   MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
754   auto &MF = *MI.getParent()->getParent();
755   const MachineJumpTableInfo *MJTI = MF.getJumpTableInfo();
756   unsigned EntrySize = MJTI->getEntrySize(MF.getDataLayout());
757 
758   Register PtrReg = MI.getOperand(0).getReg();
759   LLT PtrTy = MRI.getType(PtrReg);
760   Register IndexReg = MI.getOperand(2).getReg();
761   LLT IndexTy = MRI.getType(IndexReg);
762 
763   if (!isPowerOf2_32(EntrySize))
764     return false;
765 
766   auto ShiftAmt = MIRBuilder.buildConstant(IndexTy, Log2_32(EntrySize));
767   IndexReg = MIRBuilder.buildShl(IndexTy, IndexReg, ShiftAmt).getReg(0);
768 
769   auto Addr = MIRBuilder.buildPtrAdd(PtrTy, PtrReg, IndexReg);
770 
771   MachineMemOperand *MMO = MF.getMachineMemOperand(
772       MachinePointerInfo::getJumpTable(MF), MachineMemOperand::MOLoad,
773       EntrySize, Align(MJTI->getEntryAlignment(MF.getDataLayout())));
774 
775   Register TargetReg;
776   switch (MJTI->getEntryKind()) {
777   default:
778     return false;
779   case MachineJumpTableInfo::EK_LabelDifference32: {
780     // For PIC, the sequence is:
781     // BRIND(load(Jumptable + index) + RelocBase)
782     // RelocBase can be JumpTable, GOT or some sort of global base.
783     unsigned LoadOpc =
784         STI.is64Bit() ? TargetOpcode::G_SEXTLOAD : TargetOpcode::G_LOAD;
785     auto Load = MIRBuilder.buildLoadInstr(LoadOpc, IndexTy, Addr, *MMO);
786     TargetReg = MIRBuilder.buildPtrAdd(PtrTy, PtrReg, Load).getReg(0);
787     break;
788   }
789   case MachineJumpTableInfo::EK_Custom32: {
790     auto Load = MIRBuilder.buildLoadInstr(TargetOpcode::G_SEXTLOAD, IndexTy,
791                                           Addr, *MMO);
792     TargetReg = MIRBuilder.buildIntToPtr(PtrTy, Load).getReg(0);
793     break;
794   }
795   case MachineJumpTableInfo::EK_BlockAddress:
796     TargetReg = MIRBuilder.buildLoad(PtrTy, Addr, *MMO).getReg(0);
797     break;
798   }
799 
800   MIRBuilder.buildBrIndirect(TargetReg);
801 
802   MI.eraseFromParent();
803   return true;
804 }
805 
806 bool RISCVLegalizerInfo::shouldBeInConstantPool(const APInt &APImm,
807                                                 bool ShouldOptForSize) const {
808   assert(APImm.getBitWidth() == 32 || APImm.getBitWidth() == 64);
809   int64_t Imm = APImm.getSExtValue();
810   // All simm32 constants should be handled by isel.
811   // NOTE: The getMaxBuildIntsCost call below should return a value >= 2 making
812   // this check redundant, but small immediates are common so this check
813   // should have better compile time.
814   if (isInt<32>(Imm))
815     return false;
816 
817   // We only need to cost the immediate, if constant pool lowering is enabled.
818   if (!STI.useConstantPoolForLargeInts())
819     return false;
820 
821   RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(Imm, STI);
822   if (Seq.size() <= STI.getMaxBuildIntsCost())
823     return false;
824 
825   // Optimizations below are disabled for opt size. If we're optimizing for
826   // size, use a constant pool.
827   if (ShouldOptForSize)
828     return true;
829   //
830   // Special case. See if we can build the constant as (ADD (SLLI X, C), X) do
831   // that if it will avoid a constant pool.
832   // It will require an extra temporary register though.
833   // If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where
834   // low and high 32 bits are the same and bit 31 and 63 are set.
835   unsigned ShiftAmt, AddOpc;
836   RISCVMatInt::InstSeq SeqLo =
837       RISCVMatInt::generateTwoRegInstSeq(Imm, STI, ShiftAmt, AddOpc);
838   return !(!SeqLo.empty() && (SeqLo.size() + 2) <= STI.getMaxBuildIntsCost());
839 }
840 
841 bool RISCVLegalizerInfo::legalizeVScale(MachineInstr &MI,
842                                         MachineIRBuilder &MIB) const {
843   const LLT XLenTy(STI.getXLenVT());
844   Register Dst = MI.getOperand(0).getReg();
845 
846   // We define our scalable vector types for lmul=1 to use a 64 bit known
847   // minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate
848   // vscale as VLENB / 8.
849   static_assert(RISCV::RVVBitsPerBlock == 64, "Unexpected bits per block!");
850   if (STI.getRealMinVLen() < RISCV::RVVBitsPerBlock)
851     // Support for VLEN==32 is incomplete.
852     return false;
853 
854   // We assume VLENB is a multiple of 8. We manually choose the best shift
855   // here because SimplifyDemandedBits isn't always able to simplify it.
856   uint64_t Val = MI.getOperand(1).getCImm()->getZExtValue();
857   if (isPowerOf2_64(Val)) {
858     uint64_t Log2 = Log2_64(Val);
859     if (Log2 < 3) {
860       auto VLENB = MIB.buildInstr(RISCV::G_READ_VLENB, {XLenTy}, {});
861       MIB.buildLShr(Dst, VLENB, MIB.buildConstant(XLenTy, 3 - Log2));
862     } else if (Log2 > 3) {
863       auto VLENB = MIB.buildInstr(RISCV::G_READ_VLENB, {XLenTy}, {});
864       MIB.buildShl(Dst, VLENB, MIB.buildConstant(XLenTy, Log2 - 3));
865     } else {
866       MIB.buildInstr(RISCV::G_READ_VLENB, {Dst}, {});
867     }
868   } else if ((Val % 8) == 0) {
869     // If the multiplier is a multiple of 8, scale it down to avoid needing
870     // to shift the VLENB value.
871     auto VLENB = MIB.buildInstr(RISCV::G_READ_VLENB, {XLenTy}, {});
872     MIB.buildMul(Dst, VLENB, MIB.buildConstant(XLenTy, Val / 8));
873   } else {
874     auto VLENB = MIB.buildInstr(RISCV::G_READ_VLENB, {XLenTy}, {});
875     auto VScale = MIB.buildLShr(XLenTy, VLENB, MIB.buildConstant(XLenTy, 3));
876     MIB.buildMul(Dst, VScale, MIB.buildConstant(XLenTy, Val));
877   }
878   MI.eraseFromParent();
879   return true;
880 }
881 
882 // Custom-lower extensions from mask vectors by using a vselect either with 1
883 // for zero/any-extension or -1 for sign-extension:
884 //   (vXiN = (s|z)ext vXi1:vmask) -> (vXiN = vselect vmask, (-1 or 1), 0)
885 // Note that any-extension is lowered identically to zero-extension.
886 bool RISCVLegalizerInfo::legalizeExt(MachineInstr &MI,
887                                      MachineIRBuilder &MIB) const {
888 
889   unsigned Opc = MI.getOpcode();
890   assert(Opc == TargetOpcode::G_ZEXT || Opc == TargetOpcode::G_SEXT ||
891          Opc == TargetOpcode::G_ANYEXT);
892 
893   MachineRegisterInfo &MRI = *MIB.getMRI();
894   Register Dst = MI.getOperand(0).getReg();
895   Register Src = MI.getOperand(1).getReg();
896 
897   LLT DstTy = MRI.getType(Dst);
898   int64_t ExtTrueVal = Opc == TargetOpcode::G_SEXT ? -1 : 1;
899   LLT DstEltTy = DstTy.getElementType();
900   auto SplatZero = MIB.buildSplatVector(DstTy, MIB.buildConstant(DstEltTy, 0));
901   auto SplatTrue =
902       MIB.buildSplatVector(DstTy, MIB.buildConstant(DstEltTy, ExtTrueVal));
903   MIB.buildSelect(Dst, Src, SplatTrue, SplatZero);
904 
905   MI.eraseFromParent();
906   return true;
907 }
908 
909 bool RISCVLegalizerInfo::legalizeLoadStore(MachineInstr &MI,
910                                            LegalizerHelper &Helper,
911                                            MachineIRBuilder &MIB) const {
912   assert((isa<GLoad>(MI) || isa<GStore>(MI)) &&
913          "Machine instructions must be Load/Store.");
914   MachineRegisterInfo &MRI = *MIB.getMRI();
915   MachineFunction *MF = MI.getMF();
916   const DataLayout &DL = MIB.getDataLayout();
917   LLVMContext &Ctx = MF->getFunction().getContext();
918 
919   Register DstReg = MI.getOperand(0).getReg();
920   LLT DataTy = MRI.getType(DstReg);
921   if (!DataTy.isVector())
922     return false;
923 
924   if (!MI.hasOneMemOperand())
925     return false;
926 
927   MachineMemOperand *MMO = *MI.memoperands_begin();
928 
929   const auto *TLI = STI.getTargetLowering();
930   EVT VT = EVT::getEVT(getTypeForLLT(DataTy, Ctx));
931 
932   if (TLI->allowsMemoryAccessForAlignment(Ctx, DL, VT, *MMO))
933     return true;
934 
935   unsigned EltSizeBits = DataTy.getScalarSizeInBits();
936   assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
937          "Unexpected unaligned RVV load type");
938 
939   // Calculate the new vector type with i8 elements
940   unsigned NumElements =
941       DataTy.getElementCount().getKnownMinValue() * (EltSizeBits / 8);
942   LLT NewDataTy = LLT::scalable_vector(NumElements, 8);
943 
944   Helper.bitcast(MI, 0, NewDataTy);
945 
946   return true;
947 }
948 
949 /// Return the type of the mask type suitable for masking the provided
950 /// vector type.  This is simply an i1 element type vector of the same
951 /// (possibly scalable) length.
952 static LLT getMaskTypeFor(LLT VecTy) {
953   assert(VecTy.isVector());
954   ElementCount EC = VecTy.getElementCount();
955   return LLT::vector(EC, LLT::scalar(1));
956 }
957 
958 /// Creates an all ones mask suitable for masking a vector of type VecTy with
959 /// vector length VL.
960 static MachineInstrBuilder buildAllOnesMask(LLT VecTy, const SrcOp &VL,
961                                             MachineIRBuilder &MIB,
962                                             MachineRegisterInfo &MRI) {
963   LLT MaskTy = getMaskTypeFor(VecTy);
964   return MIB.buildInstr(RISCV::G_VMSET_VL, {MaskTy}, {VL});
965 }
966 
967 /// Gets the two common "VL" operands: an all-ones mask and the vector length.
968 /// VecTy is a scalable vector type.
969 static std::pair<MachineInstrBuilder, MachineInstrBuilder>
970 buildDefaultVLOps(LLT VecTy, MachineIRBuilder &MIB, MachineRegisterInfo &MRI) {
971   assert(VecTy.isScalableVector() && "Expecting scalable container type");
972   const RISCVSubtarget &STI = MIB.getMF().getSubtarget<RISCVSubtarget>();
973   LLT XLenTy(STI.getXLenVT());
974   auto VL = MIB.buildConstant(XLenTy, -1);
975   auto Mask = buildAllOnesMask(VecTy, VL, MIB, MRI);
976   return {Mask, VL};
977 }
978 
979 static MachineInstrBuilder
980 buildSplatPartsS64WithVL(const DstOp &Dst, const SrcOp &Passthru, Register Lo,
981                          Register Hi, const SrcOp &VL, MachineIRBuilder &MIB,
982                          MachineRegisterInfo &MRI) {
983   // TODO: If the Hi bits of the splat are undefined, then it's fine to just
984   // splat Lo even if it might be sign extended. I don't think we have
985   // introduced a case where we're build a s64 where the upper bits are undef
986   // yet.
987 
988   // Fall back to a stack store and stride x0 vector load.
989   // TODO: need to lower G_SPLAT_VECTOR_SPLIT_I64. This is done in
990   // preprocessDAG in SDAG.
991   return MIB.buildInstr(RISCV::G_SPLAT_VECTOR_SPLIT_I64_VL, {Dst},
992                         {Passthru, Lo, Hi, VL});
993 }
994 
995 static MachineInstrBuilder
996 buildSplatSplitS64WithVL(const DstOp &Dst, const SrcOp &Passthru,
997                          const SrcOp &Scalar, const SrcOp &VL,
998                          MachineIRBuilder &MIB, MachineRegisterInfo &MRI) {
999   assert(Scalar.getLLTTy(MRI) == LLT::scalar(64) && "Unexpected VecTy!");
1000   auto Unmerge = MIB.buildUnmerge(LLT::scalar(32), Scalar);
1001   return buildSplatPartsS64WithVL(Dst, Passthru, Unmerge.getReg(0),
1002                                   Unmerge.getReg(1), VL, MIB, MRI);
1003 }
1004 
1005 // Lower splats of s1 types to G_ICMP. For each mask vector type, we have a
1006 // legal equivalently-sized i8 type, so we can use that as a go-between.
1007 // Splats of s1 types that have constant value can be legalized as VMSET_VL or
1008 // VMCLR_VL.
1009 bool RISCVLegalizerInfo::legalizeSplatVector(MachineInstr &MI,
1010                                              MachineIRBuilder &MIB) const {
1011   assert(MI.getOpcode() == TargetOpcode::G_SPLAT_VECTOR);
1012 
1013   MachineRegisterInfo &MRI = *MIB.getMRI();
1014 
1015   Register Dst = MI.getOperand(0).getReg();
1016   Register SplatVal = MI.getOperand(1).getReg();
1017 
1018   LLT VecTy = MRI.getType(Dst);
1019   LLT XLenTy(STI.getXLenVT());
1020 
1021   // Handle case of s64 element vectors on rv32
1022   if (XLenTy.getSizeInBits() == 32 &&
1023       VecTy.getElementType().getSizeInBits() == 64) {
1024     auto [_, VL] = buildDefaultVLOps(MRI.getType(Dst), MIB, MRI);
1025     buildSplatSplitS64WithVL(Dst, MIB.buildUndef(VecTy), SplatVal, VL, MIB,
1026                              MRI);
1027     MI.eraseFromParent();
1028     return true;
1029   }
1030 
1031   // All-zeros or all-ones splats are handled specially.
1032   MachineInstr &SplatValMI = *MRI.getVRegDef(SplatVal);
1033   if (isAllOnesOrAllOnesSplat(SplatValMI, MRI)) {
1034     auto VL = buildDefaultVLOps(VecTy, MIB, MRI).second;
1035     MIB.buildInstr(RISCV::G_VMSET_VL, {Dst}, {VL});
1036     MI.eraseFromParent();
1037     return true;
1038   }
1039   if (isNullOrNullSplat(SplatValMI, MRI)) {
1040     auto VL = buildDefaultVLOps(VecTy, MIB, MRI).second;
1041     MIB.buildInstr(RISCV::G_VMCLR_VL, {Dst}, {VL});
1042     MI.eraseFromParent();
1043     return true;
1044   }
1045 
1046   // Handle non-constant mask splat (i.e. not sure if it's all zeros or all
1047   // ones) by promoting it to an s8 splat.
1048   LLT InterEltTy = LLT::scalar(8);
1049   LLT InterTy = VecTy.changeElementType(InterEltTy);
1050   auto ZExtSplatVal = MIB.buildZExt(InterEltTy, SplatVal);
1051   auto And =
1052       MIB.buildAnd(InterEltTy, ZExtSplatVal, MIB.buildConstant(InterEltTy, 1));
1053   auto LHS = MIB.buildSplatVector(InterTy, And);
1054   auto ZeroSplat =
1055       MIB.buildSplatVector(InterTy, MIB.buildConstant(InterEltTy, 0));
1056   MIB.buildICmp(CmpInst::Predicate::ICMP_NE, Dst, LHS, ZeroSplat);
1057   MI.eraseFromParent();
1058   return true;
1059 }
1060 
1061 static LLT getLMUL1Ty(LLT VecTy) {
1062   assert(VecTy.getElementType().getSizeInBits() <= 64 &&
1063          "Unexpected vector LLT");
1064   return LLT::scalable_vector(RISCV::RVVBitsPerBlock /
1065                                   VecTy.getElementType().getSizeInBits(),
1066                               VecTy.getElementType());
1067 }
1068 
1069 bool RISCVLegalizerInfo::legalizeExtractSubvector(MachineInstr &MI,
1070                                                   MachineIRBuilder &MIB) const {
1071   GExtractSubvector &ES = cast<GExtractSubvector>(MI);
1072 
1073   MachineRegisterInfo &MRI = *MIB.getMRI();
1074 
1075   Register Dst = ES.getReg(0);
1076   Register Src = ES.getSrcVec();
1077   uint64_t Idx = ES.getIndexImm();
1078 
1079   // With an index of 0 this is a cast-like subvector, which can be performed
1080   // with subregister operations.
1081   if (Idx == 0)
1082     return true;
1083 
1084   LLT LitTy = MRI.getType(Dst);
1085   LLT BigTy = MRI.getType(Src);
1086 
1087   if (LitTy.getElementType() == LLT::scalar(1)) {
1088     // We can't slide this mask vector up indexed by its i1 elements.
1089     // This poses a problem when we wish to insert a scalable vector which
1090     // can't be re-expressed as a larger type. Just choose the slow path and
1091     // extend to a larger type, then truncate back down.
1092     LLT ExtBigTy = BigTy.changeElementType(LLT::scalar(8));
1093     LLT ExtLitTy = LitTy.changeElementType(LLT::scalar(8));
1094     auto BigZExt = MIB.buildZExt(ExtBigTy, Src);
1095     auto ExtractZExt = MIB.buildExtractSubvector(ExtLitTy, BigZExt, Idx);
1096     auto SplatZero = MIB.buildSplatVector(
1097         ExtLitTy, MIB.buildConstant(ExtLitTy.getElementType(), 0));
1098     MIB.buildICmp(CmpInst::Predicate::ICMP_NE, Dst, ExtractZExt, SplatZero);
1099     MI.eraseFromParent();
1100     return true;
1101   }
1102 
1103   // extract_subvector scales the index by vscale if the subvector is scalable,
1104   // and decomposeSubvectorInsertExtractToSubRegs takes this into account.
1105   const RISCVRegisterInfo *TRI = STI.getRegisterInfo();
1106   MVT LitTyMVT = getMVTForLLT(LitTy);
1107   auto Decompose =
1108       RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
1109           getMVTForLLT(BigTy), LitTyMVT, Idx, TRI);
1110   unsigned RemIdx = Decompose.second;
1111 
1112   // If the Idx has been completely eliminated then this is a subvector extract
1113   // which naturally aligns to a vector register. These can easily be handled
1114   // using subregister manipulation.
1115   if (RemIdx == 0)
1116     return true;
1117 
1118   // Else LitTy is M1 or smaller and may need to be slid down: if LitTy
1119   // was > M1 then the index would need to be a multiple of VLMAX, and so would
1120   // divide exactly.
1121   assert(
1122       RISCVVType::decodeVLMUL(RISCVTargetLowering::getLMUL(LitTyMVT)).second ||
1123       RISCVTargetLowering::getLMUL(LitTyMVT) == RISCVII::VLMUL::LMUL_1);
1124 
1125   // If the vector type is an LMUL-group type, extract a subvector equal to the
1126   // nearest full vector register type.
1127   LLT InterLitTy = BigTy;
1128   Register Vec = Src;
1129   if (TypeSize::isKnownGT(BigTy.getSizeInBits(),
1130                           getLMUL1Ty(BigTy).getSizeInBits())) {
1131     // If BigTy has an LMUL > 1, then LitTy should have a smaller LMUL, and
1132     // we should have successfully decomposed the extract into a subregister.
1133     assert(Decompose.first != RISCV::NoSubRegister);
1134     InterLitTy = getLMUL1Ty(BigTy);
1135     // SDAG builds a TargetExtractSubreg. We cannot create a a Copy with SubReg
1136     // specified on the source Register (the equivalent) since generic virtual
1137     // register does not allow subregister index.
1138     Vec = MIB.buildExtractSubvector(InterLitTy, Src, Idx - RemIdx).getReg(0);
1139   }
1140 
1141   // Slide this vector register down by the desired number of elements in order
1142   // to place the desired subvector starting at element 0.
1143   const LLT XLenTy(STI.getXLenVT());
1144   auto SlidedownAmt = MIB.buildVScale(XLenTy, RemIdx);
1145   auto [Mask, VL] = buildDefaultVLOps(LitTy, MIB, MRI);
1146   uint64_t Policy = RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC;
1147   auto Slidedown = MIB.buildInstr(
1148       RISCV::G_VSLIDEDOWN_VL, {InterLitTy},
1149       {MIB.buildUndef(InterLitTy), Vec, SlidedownAmt, Mask, VL, Policy});
1150 
1151   // Now the vector is in the right position, extract our final subvector. This
1152   // should resolve to a COPY.
1153   MIB.buildExtractSubvector(Dst, Slidedown, 0);
1154 
1155   MI.eraseFromParent();
1156   return true;
1157 }
1158 
1159 bool RISCVLegalizerInfo::legalizeInsertSubvector(MachineInstr &MI,
1160                                                  LegalizerHelper &Helper,
1161                                                  MachineIRBuilder &MIB) const {
1162   GInsertSubvector &IS = cast<GInsertSubvector>(MI);
1163 
1164   MachineRegisterInfo &MRI = *MIB.getMRI();
1165 
1166   Register Dst = IS.getReg(0);
1167   Register BigVec = IS.getBigVec();
1168   Register LitVec = IS.getSubVec();
1169   uint64_t Idx = IS.getIndexImm();
1170 
1171   LLT BigTy = MRI.getType(BigVec);
1172   LLT LitTy = MRI.getType(LitVec);
1173 
1174   if (Idx == 0 ||
1175       MRI.getVRegDef(BigVec)->getOpcode() == TargetOpcode::G_IMPLICIT_DEF)
1176     return true;
1177 
1178   // We don't have the ability to slide mask vectors up indexed by their i1
1179   // elements; the smallest we can do is i8. Often we are able to bitcast to
1180   // equivalent i8 vectors. Otherwise, we can must zeroextend to equivalent i8
1181   // vectors and truncate down after the insert.
1182   if (LitTy.getElementType() == LLT::scalar(1)) {
1183     auto BigTyMinElts = BigTy.getElementCount().getKnownMinValue();
1184     auto LitTyMinElts = LitTy.getElementCount().getKnownMinValue();
1185     if (BigTyMinElts >= 8 && LitTyMinElts >= 8)
1186       return Helper.bitcast(
1187           IS, 0,
1188           LLT::vector(BigTy.getElementCount().divideCoefficientBy(8), 8));
1189 
1190     // We can't slide this mask vector up indexed by its i1 elements.
1191     // This poses a problem when we wish to insert a scalable vector which
1192     // can't be re-expressed as a larger type. Just choose the slow path and
1193     // extend to a larger type, then truncate back down.
1194     LLT ExtBigTy = BigTy.changeElementType(LLT::scalar(8));
1195     return Helper.widenScalar(IS, 0, ExtBigTy);
1196   }
1197 
1198   const RISCVRegisterInfo *TRI = STI.getRegisterInfo();
1199   unsigned SubRegIdx, RemIdx;
1200   std::tie(SubRegIdx, RemIdx) =
1201       RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
1202           getMVTForLLT(BigTy), getMVTForLLT(LitTy), Idx, TRI);
1203 
1204   TypeSize VecRegSize = TypeSize::getScalable(RISCV::RVVBitsPerBlock);
1205   assert(isPowerOf2_64(
1206       STI.expandVScale(LitTy.getSizeInBits()).getKnownMinValue()));
1207   bool ExactlyVecRegSized =
1208       STI.expandVScale(LitTy.getSizeInBits())
1209           .isKnownMultipleOf(STI.expandVScale(VecRegSize));
1210 
1211   // If the Idx has been completely eliminated and this subvector's size is a
1212   // vector register or a multiple thereof, or the surrounding elements are
1213   // undef, then this is a subvector insert which naturally aligns to a vector
1214   // register. These can easily be handled using subregister manipulation.
1215   if (RemIdx == 0 && ExactlyVecRegSized)
1216     return true;
1217 
1218   // If the subvector is smaller than a vector register, then the insertion
1219   // must preserve the undisturbed elements of the register. We do this by
1220   // lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1 vector type
1221   // (which resolves to a subregister copy), performing a VSLIDEUP to place the
1222   // subvector within the vector register, and an INSERT_SUBVECTOR of that
1223   // LMUL=1 type back into the larger vector (resolving to another subregister
1224   // operation). See below for how our VSLIDEUP works. We go via a LMUL=1 type
1225   // to avoid allocating a large register group to hold our subvector.
1226 
1227   // VSLIDEUP works by leaving elements 0<i<OFFSET undisturbed, elements
1228   // OFFSET<=i<VL set to the "subvector" and vl<=i<VLMAX set to the tail policy
1229   // (in our case undisturbed). This means we can set up a subvector insertion
1230   // where OFFSET is the insertion offset, and the VL is the OFFSET plus the
1231   // size of the subvector.
1232   const LLT XLenTy(STI.getXLenVT());
1233   LLT InterLitTy = BigTy;
1234   Register AlignedExtract = BigVec;
1235   unsigned AlignedIdx = Idx - RemIdx;
1236   if (TypeSize::isKnownGT(BigTy.getSizeInBits(),
1237                           getLMUL1Ty(BigTy).getSizeInBits())) {
1238     InterLitTy = getLMUL1Ty(BigTy);
1239     // Extract a subvector equal to the nearest full vector register type. This
1240     // should resolve to a G_EXTRACT on a subreg.
1241     AlignedExtract =
1242         MIB.buildExtractSubvector(InterLitTy, BigVec, AlignedIdx).getReg(0);
1243   }
1244 
1245   auto Insert = MIB.buildInsertSubvector(InterLitTy, MIB.buildUndef(InterLitTy),
1246                                          LitVec, 0);
1247 
1248   auto [Mask, _] = buildDefaultVLOps(BigTy, MIB, MRI);
1249   auto VL = MIB.buildVScale(XLenTy, LitTy.getElementCount().getKnownMinValue());
1250 
1251   // If we're inserting into the lowest elements, use a tail undisturbed
1252   // vmv.v.v.
1253   MachineInstrBuilder Inserted;
1254   bool NeedInsertSubvec =
1255       TypeSize::isKnownGT(BigTy.getSizeInBits(), InterLitTy.getSizeInBits());
1256   Register InsertedDst =
1257       NeedInsertSubvec ? MRI.createGenericVirtualRegister(InterLitTy) : Dst;
1258   if (RemIdx == 0) {
1259     Inserted = MIB.buildInstr(RISCV::G_VMV_V_V_VL, {InsertedDst},
1260                               {AlignedExtract, Insert, VL});
1261   } else {
1262     auto SlideupAmt = MIB.buildVScale(XLenTy, RemIdx);
1263     // Construct the vector length corresponding to RemIdx + length(LitTy).
1264     VL = MIB.buildAdd(XLenTy, SlideupAmt, VL);
1265     // Use tail agnostic policy if we're inserting over InterLitTy's tail.
1266     ElementCount EndIndex =
1267         ElementCount::getScalable(RemIdx) + LitTy.getElementCount();
1268     uint64_t Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED;
1269     if (STI.expandVScale(EndIndex) ==
1270         STI.expandVScale(InterLitTy.getElementCount()))
1271       Policy = RISCVII::TAIL_AGNOSTIC;
1272 
1273     Inserted =
1274         MIB.buildInstr(RISCV::G_VSLIDEUP_VL, {InsertedDst},
1275                        {AlignedExtract, Insert, SlideupAmt, Mask, VL, Policy});
1276   }
1277 
1278   // If required, insert this subvector back into the correct vector register.
1279   // This should resolve to an INSERT_SUBREG instruction.
1280   if (NeedInsertSubvec)
1281     MIB.buildInsertSubvector(Dst, BigVec, Inserted, AlignedIdx);
1282 
1283   MI.eraseFromParent();
1284   return true;
1285 }
1286 
1287 static unsigned getRISCVWOpcode(unsigned Opcode) {
1288   switch (Opcode) {
1289   default:
1290     llvm_unreachable("Unexpected opcode");
1291   case TargetOpcode::G_ASHR:
1292     return RISCV::G_SRAW;
1293   case TargetOpcode::G_LSHR:
1294     return RISCV::G_SRLW;
1295   case TargetOpcode::G_SHL:
1296     return RISCV::G_SLLW;
1297   case TargetOpcode::G_SDIV:
1298     return RISCV::G_DIVW;
1299   case TargetOpcode::G_UDIV:
1300     return RISCV::G_DIVUW;
1301   case TargetOpcode::G_UREM:
1302     return RISCV::G_REMUW;
1303   case TargetOpcode::G_ROTL:
1304     return RISCV::G_ROLW;
1305   case TargetOpcode::G_ROTR:
1306     return RISCV::G_RORW;
1307   case TargetOpcode::G_CTLZ:
1308     return RISCV::G_CLZW;
1309   case TargetOpcode::G_CTTZ:
1310     return RISCV::G_CTZW;
1311   case TargetOpcode::G_FPTOSI:
1312     return RISCV::G_FCVT_W_RV64;
1313   case TargetOpcode::G_FPTOUI:
1314     return RISCV::G_FCVT_WU_RV64;
1315   }
1316 }
1317 
1318 bool RISCVLegalizerInfo::legalizeCustom(
1319     LegalizerHelper &Helper, MachineInstr &MI,
1320     LostDebugLocObserver &LocObserver) const {
1321   MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
1322   MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
1323   MachineFunction &MF = *MI.getParent()->getParent();
1324   switch (MI.getOpcode()) {
1325   default:
1326     // No idea what to do.
1327     return false;
1328   case TargetOpcode::G_ABS:
1329     return Helper.lowerAbsToMaxNeg(MI);
1330   // TODO: G_FCONSTANT
1331   case TargetOpcode::G_CONSTANT: {
1332     const Function &F = MF.getFunction();
1333     // TODO: if PSI and BFI are present, add " ||
1334     // llvm::shouldOptForSize(*CurMBB, PSI, BFI)".
1335     bool ShouldOptForSize = F.hasOptSize() || F.hasMinSize();
1336     const ConstantInt *ConstVal = MI.getOperand(1).getCImm();
1337     if (!shouldBeInConstantPool(ConstVal->getValue(), ShouldOptForSize))
1338       return true;
1339     return Helper.lowerConstant(MI);
1340   }
1341   case TargetOpcode::G_SUB:
1342   case TargetOpcode::G_ADD: {
1343     Helper.Observer.changingInstr(MI);
1344     Helper.widenScalarSrc(MI, sXLen, 1, TargetOpcode::G_ANYEXT);
1345     Helper.widenScalarSrc(MI, sXLen, 2, TargetOpcode::G_ANYEXT);
1346 
1347     Register DstALU = MRI.createGenericVirtualRegister(sXLen);
1348 
1349     MachineOperand &MO = MI.getOperand(0);
1350     MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
1351     auto DstSext = MIRBuilder.buildSExtInReg(sXLen, DstALU, 32);
1352 
1353     MIRBuilder.buildInstr(TargetOpcode::G_TRUNC, {MO}, {DstSext});
1354     MO.setReg(DstALU);
1355 
1356     Helper.Observer.changedInstr(MI);
1357     return true;
1358   }
1359   case TargetOpcode::G_SEXT_INREG: {
1360     LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
1361     int64_t SizeInBits = MI.getOperand(2).getImm();
1362     // Source size of 32 is sext.w.
1363     if (DstTy.getSizeInBits() == 64 && SizeInBits == 32)
1364       return true;
1365 
1366     if (STI.hasStdExtZbb() && (SizeInBits == 8 || SizeInBits == 16))
1367       return true;
1368 
1369     return Helper.lower(MI, 0, /* Unused hint type */ LLT()) ==
1370            LegalizerHelper::Legalized;
1371   }
1372   case TargetOpcode::G_ASHR:
1373   case TargetOpcode::G_LSHR:
1374   case TargetOpcode::G_SHL: {
1375     if (getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI)) {
1376       // We don't need a custom node for shift by constant. Just widen the
1377       // source and the shift amount.
1378       unsigned ExtOpc = TargetOpcode::G_ANYEXT;
1379       if (MI.getOpcode() == TargetOpcode::G_ASHR)
1380         ExtOpc = TargetOpcode::G_SEXT;
1381       else if (MI.getOpcode() == TargetOpcode::G_LSHR)
1382         ExtOpc = TargetOpcode::G_ZEXT;
1383 
1384       Helper.Observer.changingInstr(MI);
1385       Helper.widenScalarSrc(MI, sXLen, 1, ExtOpc);
1386       Helper.widenScalarSrc(MI, sXLen, 2, TargetOpcode::G_ZEXT);
1387       Helper.widenScalarDst(MI, sXLen);
1388       Helper.Observer.changedInstr(MI);
1389       return true;
1390     }
1391 
1392     Helper.Observer.changingInstr(MI);
1393     Helper.widenScalarSrc(MI, sXLen, 1, TargetOpcode::G_ANYEXT);
1394     Helper.widenScalarSrc(MI, sXLen, 2, TargetOpcode::G_ANYEXT);
1395     Helper.widenScalarDst(MI, sXLen);
1396     MI.setDesc(MIRBuilder.getTII().get(getRISCVWOpcode(MI.getOpcode())));
1397     Helper.Observer.changedInstr(MI);
1398     return true;
1399   }
1400   case TargetOpcode::G_SDIV:
1401   case TargetOpcode::G_UDIV:
1402   case TargetOpcode::G_UREM:
1403   case TargetOpcode::G_ROTL:
1404   case TargetOpcode::G_ROTR: {
1405     Helper.Observer.changingInstr(MI);
1406     Helper.widenScalarSrc(MI, sXLen, 1, TargetOpcode::G_ANYEXT);
1407     Helper.widenScalarSrc(MI, sXLen, 2, TargetOpcode::G_ANYEXT);
1408     Helper.widenScalarDst(MI, sXLen);
1409     MI.setDesc(MIRBuilder.getTII().get(getRISCVWOpcode(MI.getOpcode())));
1410     Helper.Observer.changedInstr(MI);
1411     return true;
1412   }
1413   case TargetOpcode::G_CTLZ:
1414   case TargetOpcode::G_CTTZ: {
1415     Helper.Observer.changingInstr(MI);
1416     Helper.widenScalarSrc(MI, sXLen, 1, TargetOpcode::G_ANYEXT);
1417     Helper.widenScalarDst(MI, sXLen);
1418     MI.setDesc(MIRBuilder.getTII().get(getRISCVWOpcode(MI.getOpcode())));
1419     Helper.Observer.changedInstr(MI);
1420     return true;
1421   }
1422   case TargetOpcode::G_FPTOSI:
1423   case TargetOpcode::G_FPTOUI: {
1424     Helper.Observer.changingInstr(MI);
1425     Helper.widenScalarDst(MI, sXLen);
1426     MI.setDesc(MIRBuilder.getTII().get(getRISCVWOpcode(MI.getOpcode())));
1427     MI.addOperand(MachineOperand::CreateImm(RISCVFPRndMode::RTZ));
1428     Helper.Observer.changedInstr(MI);
1429     return true;
1430   }
1431   case TargetOpcode::G_IS_FPCLASS: {
1432     Register GISFPCLASS = MI.getOperand(0).getReg();
1433     Register Src = MI.getOperand(1).getReg();
1434     const MachineOperand &ImmOp = MI.getOperand(2);
1435     MachineIRBuilder MIB(MI);
1436 
1437     // Turn LLVM IR's floating point classes to that in RISC-V,
1438     // by simply rotating the 10-bit immediate right by two bits.
1439     APInt GFpClassImm(10, static_cast<uint64_t>(ImmOp.getImm()));
1440     auto FClassMask = MIB.buildConstant(sXLen, GFpClassImm.rotr(2).zext(XLen));
1441     auto ConstZero = MIB.buildConstant(sXLen, 0);
1442 
1443     auto GFClass = MIB.buildInstr(RISCV::G_FCLASS, {sXLen}, {Src});
1444     auto And = MIB.buildAnd(sXLen, GFClass, FClassMask);
1445     MIB.buildICmp(CmpInst::ICMP_NE, GISFPCLASS, And, ConstZero);
1446 
1447     MI.eraseFromParent();
1448     return true;
1449   }
1450   case TargetOpcode::G_BRJT:
1451     return legalizeBRJT(MI, MIRBuilder);
1452   case TargetOpcode::G_VASTART:
1453     return legalizeVAStart(MI, MIRBuilder);
1454   case TargetOpcode::G_VSCALE:
1455     return legalizeVScale(MI, MIRBuilder);
1456   case TargetOpcode::G_ZEXT:
1457   case TargetOpcode::G_SEXT:
1458   case TargetOpcode::G_ANYEXT:
1459     return legalizeExt(MI, MIRBuilder);
1460   case TargetOpcode::G_SPLAT_VECTOR:
1461     return legalizeSplatVector(MI, MIRBuilder);
1462   case TargetOpcode::G_EXTRACT_SUBVECTOR:
1463     return legalizeExtractSubvector(MI, MIRBuilder);
1464   case TargetOpcode::G_INSERT_SUBVECTOR:
1465     return legalizeInsertSubvector(MI, Helper, MIRBuilder);
1466   case TargetOpcode::G_LOAD:
1467   case TargetOpcode::G_STORE:
1468     return legalizeLoadStore(MI, Helper, MIRBuilder);
1469   }
1470 
1471   llvm_unreachable("expected switch to return");
1472 }
1473