1 //===-- RISCVLegalizerInfo.cpp ----------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// \file 9 /// This file implements the targeting of the Machinelegalizer class for RISC-V. 10 /// \todo This should be generated by TableGen. 11 //===----------------------------------------------------------------------===// 12 13 #include "RISCVLegalizerInfo.h" 14 #include "MCTargetDesc/RISCVMatInt.h" 15 #include "RISCVMachineFunctionInfo.h" 16 #include "RISCVSubtarget.h" 17 #include "llvm/CodeGen/GlobalISel/GIMatchTableExecutor.h" 18 #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" 19 #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h" 20 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" 21 #include "llvm/CodeGen/MachineConstantPool.h" 22 #include "llvm/CodeGen/MachineJumpTableInfo.h" 23 #include "llvm/CodeGen/MachineMemOperand.h" 24 #include "llvm/CodeGen/MachineOperand.h" 25 #include "llvm/CodeGen/MachineRegisterInfo.h" 26 #include "llvm/CodeGen/TargetOpcodes.h" 27 #include "llvm/CodeGen/ValueTypes.h" 28 #include "llvm/IR/DerivedTypes.h" 29 #include "llvm/IR/Type.h" 30 31 using namespace llvm; 32 using namespace LegalityPredicates; 33 using namespace LegalizeMutations; 34 35 static LegalityPredicate 36 typeIsLegalIntOrFPVec(unsigned TypeIdx, 37 std::initializer_list<LLT> IntOrFPVecTys, 38 const RISCVSubtarget &ST) { 39 LegalityPredicate P = [=, &ST](const LegalityQuery &Query) { 40 return ST.hasVInstructions() && 41 (Query.Types[TypeIdx].getScalarSizeInBits() != 64 || 42 ST.hasVInstructionsI64()) && 43 (Query.Types[TypeIdx].getElementCount().getKnownMinValue() != 1 || 44 ST.getELen() == 64); 45 }; 46 47 return all(typeInSet(TypeIdx, IntOrFPVecTys), P); 48 } 49 50 static LegalityPredicate 51 typeIsLegalBoolVec(unsigned TypeIdx, std::initializer_list<LLT> BoolVecTys, 52 const RISCVSubtarget &ST) { 53 LegalityPredicate P = [=, &ST](const LegalityQuery &Query) { 54 return ST.hasVInstructions() && 55 (Query.Types[TypeIdx].getElementCount().getKnownMinValue() != 1 || 56 ST.getELen() == 64); 57 }; 58 return all(typeInSet(TypeIdx, BoolVecTys), P); 59 } 60 61 static LegalityPredicate typeIsLegalPtrVec(unsigned TypeIdx, 62 std::initializer_list<LLT> PtrVecTys, 63 const RISCVSubtarget &ST) { 64 LegalityPredicate P = [=, &ST](const LegalityQuery &Query) { 65 return ST.hasVInstructions() && 66 (Query.Types[TypeIdx].getElementCount().getKnownMinValue() != 1 || 67 ST.getELen() == 64) && 68 (Query.Types[TypeIdx].getElementCount().getKnownMinValue() != 16 || 69 Query.Types[TypeIdx].getScalarSizeInBits() == 32); 70 }; 71 return all(typeInSet(TypeIdx, PtrVecTys), P); 72 } 73 74 RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST) 75 : STI(ST), XLen(STI.getXLen()), sXLen(LLT::scalar(XLen)) { 76 const LLT sDoubleXLen = LLT::scalar(2 * XLen); 77 const LLT p0 = LLT::pointer(0, XLen); 78 const LLT s1 = LLT::scalar(1); 79 const LLT s8 = LLT::scalar(8); 80 const LLT s16 = LLT::scalar(16); 81 const LLT s32 = LLT::scalar(32); 82 const LLT s64 = LLT::scalar(64); 83 const LLT s128 = LLT::scalar(128); 84 85 const LLT nxv1s1 = LLT::scalable_vector(1, s1); 86 const LLT nxv2s1 = LLT::scalable_vector(2, s1); 87 const LLT nxv4s1 = LLT::scalable_vector(4, s1); 88 const LLT nxv8s1 = LLT::scalable_vector(8, s1); 89 const LLT nxv16s1 = LLT::scalable_vector(16, s1); 90 const LLT nxv32s1 = LLT::scalable_vector(32, s1); 91 const LLT nxv64s1 = LLT::scalable_vector(64, s1); 92 93 const LLT nxv1s8 = LLT::scalable_vector(1, s8); 94 const LLT nxv2s8 = LLT::scalable_vector(2, s8); 95 const LLT nxv4s8 = LLT::scalable_vector(4, s8); 96 const LLT nxv8s8 = LLT::scalable_vector(8, s8); 97 const LLT nxv16s8 = LLT::scalable_vector(16, s8); 98 const LLT nxv32s8 = LLT::scalable_vector(32, s8); 99 const LLT nxv64s8 = LLT::scalable_vector(64, s8); 100 101 const LLT nxv1s16 = LLT::scalable_vector(1, s16); 102 const LLT nxv2s16 = LLT::scalable_vector(2, s16); 103 const LLT nxv4s16 = LLT::scalable_vector(4, s16); 104 const LLT nxv8s16 = LLT::scalable_vector(8, s16); 105 const LLT nxv16s16 = LLT::scalable_vector(16, s16); 106 const LLT nxv32s16 = LLT::scalable_vector(32, s16); 107 108 const LLT nxv1s32 = LLT::scalable_vector(1, s32); 109 const LLT nxv2s32 = LLT::scalable_vector(2, s32); 110 const LLT nxv4s32 = LLT::scalable_vector(4, s32); 111 const LLT nxv8s32 = LLT::scalable_vector(8, s32); 112 const LLT nxv16s32 = LLT::scalable_vector(16, s32); 113 114 const LLT nxv1s64 = LLT::scalable_vector(1, s64); 115 const LLT nxv2s64 = LLT::scalable_vector(2, s64); 116 const LLT nxv4s64 = LLT::scalable_vector(4, s64); 117 const LLT nxv8s64 = LLT::scalable_vector(8, s64); 118 119 const LLT nxv1p0 = LLT::scalable_vector(1, p0); 120 const LLT nxv2p0 = LLT::scalable_vector(2, p0); 121 const LLT nxv4p0 = LLT::scalable_vector(4, p0); 122 const LLT nxv8p0 = LLT::scalable_vector(8, p0); 123 const LLT nxv16p0 = LLT::scalable_vector(16, p0); 124 125 using namespace TargetOpcode; 126 127 auto BoolVecTys = {nxv1s1, nxv2s1, nxv4s1, nxv8s1, nxv16s1, nxv32s1, nxv64s1}; 128 129 auto IntOrFPVecTys = {nxv1s8, nxv2s8, nxv4s8, nxv8s8, nxv16s8, nxv32s8, 130 nxv64s8, nxv1s16, nxv2s16, nxv4s16, nxv8s16, nxv16s16, 131 nxv32s16, nxv1s32, nxv2s32, nxv4s32, nxv8s32, nxv16s32, 132 nxv1s64, nxv2s64, nxv4s64, nxv8s64}; 133 134 auto PtrVecTys = {nxv1p0, nxv2p0, nxv4p0, nxv8p0, nxv16p0}; 135 136 getActionDefinitionsBuilder({G_ADD, G_SUB}) 137 .legalFor({sXLen}) 138 .legalIf(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST)) 139 .customFor(ST.is64Bit(), {s32}) 140 .widenScalarToNextPow2(0) 141 .clampScalar(0, sXLen, sXLen); 142 143 getActionDefinitionsBuilder({G_AND, G_OR, G_XOR}) 144 .legalFor({sXLen}) 145 .legalIf(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST)) 146 .widenScalarToNextPow2(0) 147 .clampScalar(0, sXLen, sXLen); 148 149 getActionDefinitionsBuilder( 150 {G_UADDE, G_UADDO, G_USUBE, G_USUBO}).lower(); 151 152 getActionDefinitionsBuilder({G_SADDO, G_SSUBO}).minScalar(0, sXLen).lower(); 153 154 // TODO: Use Vector Single-Width Saturating Instructions for vector types. 155 getActionDefinitionsBuilder({G_UADDSAT, G_SADDSAT, G_USUBSAT, G_SSUBSAT}) 156 .lower(); 157 158 getActionDefinitionsBuilder({G_SHL, G_ASHR, G_LSHR}) 159 .legalFor({{sXLen, sXLen}}) 160 .customFor(ST.is64Bit(), {{s32, s32}}) 161 .widenScalarToNextPow2(0) 162 .clampScalar(1, sXLen, sXLen) 163 .clampScalar(0, sXLen, sXLen); 164 165 getActionDefinitionsBuilder({G_ZEXT, G_SEXT, G_ANYEXT}) 166 .legalFor({{s32, s16}}) 167 .legalFor(ST.is64Bit(), {{s64, s16}, {s64, s32}}) 168 .legalIf(all(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST), 169 typeIsLegalIntOrFPVec(1, IntOrFPVecTys, ST))) 170 .customIf(typeIsLegalBoolVec(1, BoolVecTys, ST)) 171 .maxScalar(0, sXLen); 172 173 getActionDefinitionsBuilder(G_SEXT_INREG) 174 .customFor({sXLen}) 175 .clampScalar(0, sXLen, sXLen) 176 .lower(); 177 178 // Merge/Unmerge 179 for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) { 180 auto &MergeUnmergeActions = getActionDefinitionsBuilder(Op); 181 unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1; 182 unsigned LitTyIdx = Op == G_MERGE_VALUES ? 1 : 0; 183 if (XLen == 32 && ST.hasStdExtD()) { 184 MergeUnmergeActions.legalIf( 185 all(typeIs(BigTyIdx, s64), typeIs(LitTyIdx, s32))); 186 } 187 MergeUnmergeActions.widenScalarToNextPow2(LitTyIdx, XLen) 188 .widenScalarToNextPow2(BigTyIdx, XLen) 189 .clampScalar(LitTyIdx, sXLen, sXLen) 190 .clampScalar(BigTyIdx, sXLen, sXLen); 191 } 192 193 getActionDefinitionsBuilder({G_FSHL, G_FSHR}).lower(); 194 195 getActionDefinitionsBuilder({G_ROTR, G_ROTL}) 196 .legalFor(ST.hasStdExtZbb() || ST.hasStdExtZbkb(), {{sXLen, sXLen}}) 197 .customFor(ST.is64Bit() && (ST.hasStdExtZbb() || ST.hasStdExtZbkb()), 198 {{s32, s32}}) 199 .lower(); 200 201 getActionDefinitionsBuilder(G_BITREVERSE).maxScalar(0, sXLen).lower(); 202 203 getActionDefinitionsBuilder(G_BITCAST).legalIf( 204 all(LegalityPredicates::any(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST), 205 typeIsLegalBoolVec(0, BoolVecTys, ST)), 206 LegalityPredicates::any(typeIsLegalIntOrFPVec(1, IntOrFPVecTys, ST), 207 typeIsLegalBoolVec(1, BoolVecTys, ST)))); 208 209 auto &BSWAPActions = getActionDefinitionsBuilder(G_BSWAP); 210 if (ST.hasStdExtZbb() || ST.hasStdExtZbkb()) 211 BSWAPActions.legalFor({sXLen}).clampScalar(0, sXLen, sXLen); 212 else 213 BSWAPActions.maxScalar(0, sXLen).lower(); 214 215 auto &CountZerosActions = getActionDefinitionsBuilder({G_CTLZ, G_CTTZ}); 216 auto &CountZerosUndefActions = 217 getActionDefinitionsBuilder({G_CTLZ_ZERO_UNDEF, G_CTTZ_ZERO_UNDEF}); 218 if (ST.hasStdExtZbb()) { 219 CountZerosActions.legalFor({{sXLen, sXLen}}) 220 .customFor({{s32, s32}}) 221 .clampScalar(0, s32, sXLen) 222 .widenScalarToNextPow2(0) 223 .scalarSameSizeAs(1, 0); 224 } else { 225 CountZerosActions.maxScalar(0, sXLen).scalarSameSizeAs(1, 0).lower(); 226 CountZerosUndefActions.maxScalar(0, sXLen).scalarSameSizeAs(1, 0); 227 } 228 CountZerosUndefActions.lower(); 229 230 auto &CTPOPActions = getActionDefinitionsBuilder(G_CTPOP); 231 if (ST.hasStdExtZbb()) { 232 CTPOPActions.legalFor({{sXLen, sXLen}}) 233 .clampScalar(0, sXLen, sXLen) 234 .scalarSameSizeAs(1, 0); 235 } else { 236 CTPOPActions.maxScalar(0, sXLen).scalarSameSizeAs(1, 0).lower(); 237 } 238 239 getActionDefinitionsBuilder(G_CONSTANT) 240 .legalFor({p0}) 241 .legalFor(!ST.is64Bit(), {s32}) 242 .customFor(ST.is64Bit(), {s64}) 243 .widenScalarToNextPow2(0) 244 .clampScalar(0, sXLen, sXLen); 245 246 // TODO: transform illegal vector types into legal vector type 247 getActionDefinitionsBuilder(G_FREEZE) 248 .legalFor({s16, s32, p0}) 249 .legalFor(ST.is64Bit(), {s64}) 250 .legalIf(typeIsLegalBoolVec(0, BoolVecTys, ST)) 251 .legalIf(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST)) 252 .widenScalarToNextPow2(0) 253 .clampScalar(0, s16, sXLen); 254 255 // TODO: transform illegal vector types into legal vector type 256 // TODO: Merge with G_FREEZE? 257 getActionDefinitionsBuilder( 258 {G_IMPLICIT_DEF, G_CONSTANT_FOLD_BARRIER}) 259 .legalFor({s32, sXLen, p0}) 260 .legalIf(typeIsLegalBoolVec(0, BoolVecTys, ST)) 261 .legalIf(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST)) 262 .widenScalarToNextPow2(0) 263 .clampScalar(0, s32, sXLen); 264 265 getActionDefinitionsBuilder(G_ICMP) 266 .legalFor({{sXLen, sXLen}, {sXLen, p0}}) 267 .legalIf(all(typeIsLegalBoolVec(0, BoolVecTys, ST), 268 typeIsLegalIntOrFPVec(1, IntOrFPVecTys, ST))) 269 .widenScalarOrEltToNextPow2OrMinSize(1, 8) 270 .clampScalar(1, sXLen, sXLen) 271 .clampScalar(0, sXLen, sXLen); 272 273 getActionDefinitionsBuilder(G_SELECT) 274 .legalFor({{s32, sXLen}, {p0, sXLen}}) 275 .legalIf(all(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST), 276 typeIsLegalBoolVec(1, BoolVecTys, ST))) 277 .legalFor(XLen == 64 || ST.hasStdExtD(), {{s64, sXLen}}) 278 .widenScalarToNextPow2(0) 279 .clampScalar(0, s32, (XLen == 64 || ST.hasStdExtD()) ? s64 : s32) 280 .clampScalar(1, sXLen, sXLen); 281 282 auto &LoadActions = getActionDefinitionsBuilder(G_LOAD); 283 auto &StoreActions = getActionDefinitionsBuilder(G_STORE); 284 auto &ExtLoadActions = getActionDefinitionsBuilder({G_SEXTLOAD, G_ZEXTLOAD}); 285 286 // Return the alignment needed for scalar memory ops. If unaligned scalar mem 287 // is supported, we only require byte alignment. Otherwise, we need the memory 288 // op to be natively aligned. 289 auto getScalarMemAlign = [&ST](unsigned Size) { 290 return ST.enableUnalignedScalarMem() ? 8 : Size; 291 }; 292 293 LoadActions.legalForTypesWithMemDesc( 294 {{s16, p0, s8, getScalarMemAlign(8)}, 295 {s32, p0, s8, getScalarMemAlign(8)}, 296 {s16, p0, s16, getScalarMemAlign(16)}, 297 {s32, p0, s16, getScalarMemAlign(16)}, 298 {s32, p0, s32, getScalarMemAlign(32)}, 299 {p0, p0, sXLen, getScalarMemAlign(XLen)}}); 300 StoreActions.legalForTypesWithMemDesc( 301 {{s16, p0, s8, getScalarMemAlign(8)}, 302 {s32, p0, s8, getScalarMemAlign(8)}, 303 {s16, p0, s16, getScalarMemAlign(16)}, 304 {s32, p0, s16, getScalarMemAlign(16)}, 305 {s32, p0, s32, getScalarMemAlign(32)}, 306 {p0, p0, sXLen, getScalarMemAlign(XLen)}}); 307 ExtLoadActions.legalForTypesWithMemDesc( 308 {{sXLen, p0, s8, getScalarMemAlign(8)}, 309 {sXLen, p0, s16, getScalarMemAlign(16)}}); 310 if (XLen == 64) { 311 LoadActions.legalForTypesWithMemDesc( 312 {{s64, p0, s8, getScalarMemAlign(8)}, 313 {s64, p0, s16, getScalarMemAlign(16)}, 314 {s64, p0, s32, getScalarMemAlign(32)}, 315 {s64, p0, s64, getScalarMemAlign(64)}}); 316 StoreActions.legalForTypesWithMemDesc( 317 {{s64, p0, s8, getScalarMemAlign(8)}, 318 {s64, p0, s16, getScalarMemAlign(16)}, 319 {s64, p0, s32, getScalarMemAlign(32)}, 320 {s64, p0, s64, getScalarMemAlign(64)}}); 321 ExtLoadActions.legalForTypesWithMemDesc( 322 {{s64, p0, s32, getScalarMemAlign(32)}}); 323 } else if (ST.hasStdExtD()) { 324 LoadActions.legalForTypesWithMemDesc( 325 {{s64, p0, s64, getScalarMemAlign(64)}}); 326 StoreActions.legalForTypesWithMemDesc( 327 {{s64, p0, s64, getScalarMemAlign(64)}}); 328 } 329 330 // Vector loads/stores. 331 if (ST.hasVInstructions()) { 332 LoadActions.legalForTypesWithMemDesc({{nxv2s8, p0, nxv2s8, 8}, 333 {nxv4s8, p0, nxv4s8, 8}, 334 {nxv8s8, p0, nxv8s8, 8}, 335 {nxv16s8, p0, nxv16s8, 8}, 336 {nxv32s8, p0, nxv32s8, 8}, 337 {nxv64s8, p0, nxv64s8, 8}, 338 {nxv2s16, p0, nxv2s16, 16}, 339 {nxv4s16, p0, nxv4s16, 16}, 340 {nxv8s16, p0, nxv8s16, 16}, 341 {nxv16s16, p0, nxv16s16, 16}, 342 {nxv32s16, p0, nxv32s16, 16}, 343 {nxv2s32, p0, nxv2s32, 32}, 344 {nxv4s32, p0, nxv4s32, 32}, 345 {nxv8s32, p0, nxv8s32, 32}, 346 {nxv16s32, p0, nxv16s32, 32}}); 347 StoreActions.legalForTypesWithMemDesc({{nxv2s8, p0, nxv2s8, 8}, 348 {nxv4s8, p0, nxv4s8, 8}, 349 {nxv8s8, p0, nxv8s8, 8}, 350 {nxv16s8, p0, nxv16s8, 8}, 351 {nxv32s8, p0, nxv32s8, 8}, 352 {nxv64s8, p0, nxv64s8, 8}, 353 {nxv2s16, p0, nxv2s16, 16}, 354 {nxv4s16, p0, nxv4s16, 16}, 355 {nxv8s16, p0, nxv8s16, 16}, 356 {nxv16s16, p0, nxv16s16, 16}, 357 {nxv32s16, p0, nxv32s16, 16}, 358 {nxv2s32, p0, nxv2s32, 32}, 359 {nxv4s32, p0, nxv4s32, 32}, 360 {nxv8s32, p0, nxv8s32, 32}, 361 {nxv16s32, p0, nxv16s32, 32}}); 362 363 if (ST.getELen() == 64) { 364 LoadActions.legalForTypesWithMemDesc({{nxv1s8, p0, nxv1s8, 8}, 365 {nxv1s16, p0, nxv1s16, 16}, 366 {nxv1s32, p0, nxv1s32, 32}}); 367 StoreActions.legalForTypesWithMemDesc({{nxv1s8, p0, nxv1s8, 8}, 368 {nxv1s16, p0, nxv1s16, 16}, 369 {nxv1s32, p0, nxv1s32, 32}}); 370 } 371 372 if (ST.hasVInstructionsI64()) { 373 LoadActions.legalForTypesWithMemDesc({{nxv1s64, p0, nxv1s64, 64}, 374 {nxv2s64, p0, nxv2s64, 64}, 375 {nxv4s64, p0, nxv4s64, 64}, 376 {nxv8s64, p0, nxv8s64, 64}}); 377 StoreActions.legalForTypesWithMemDesc({{nxv1s64, p0, nxv1s64, 64}, 378 {nxv2s64, p0, nxv2s64, 64}, 379 {nxv4s64, p0, nxv4s64, 64}, 380 {nxv8s64, p0, nxv8s64, 64}}); 381 } 382 383 // we will take the custom lowering logic if we have scalable vector types 384 // with non-standard alignments 385 LoadActions.customIf(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST)); 386 StoreActions.customIf(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST)); 387 388 // Pointers require that XLen sized elements are legal. 389 if (XLen <= ST.getELen()) { 390 LoadActions.customIf(typeIsLegalPtrVec(0, PtrVecTys, ST)); 391 StoreActions.customIf(typeIsLegalPtrVec(0, PtrVecTys, ST)); 392 } 393 } 394 395 LoadActions.widenScalarToNextPow2(0, /* MinSize = */ 8) 396 .lowerIfMemSizeNotByteSizePow2() 397 .clampScalar(0, s16, sXLen) 398 .lower(); 399 StoreActions 400 .clampScalar(0, s16, sXLen) 401 .lowerIfMemSizeNotByteSizePow2() 402 .lower(); 403 404 ExtLoadActions.widenScalarToNextPow2(0).clampScalar(0, sXLen, sXLen).lower(); 405 406 getActionDefinitionsBuilder({G_PTR_ADD, G_PTRMASK}).legalFor({{p0, sXLen}}); 407 408 getActionDefinitionsBuilder(G_PTRTOINT) 409 .legalFor({{sXLen, p0}}) 410 .clampScalar(0, sXLen, sXLen); 411 412 getActionDefinitionsBuilder(G_INTTOPTR) 413 .legalFor({{p0, sXLen}}) 414 .clampScalar(1, sXLen, sXLen); 415 416 getActionDefinitionsBuilder(G_BRCOND).legalFor({sXLen}).minScalar(0, sXLen); 417 418 getActionDefinitionsBuilder(G_BRJT).customFor({{p0, sXLen}}); 419 420 getActionDefinitionsBuilder(G_BRINDIRECT).legalFor({p0}); 421 422 getActionDefinitionsBuilder(G_PHI) 423 .legalFor({p0, s32, sXLen}) 424 .widenScalarToNextPow2(0) 425 .clampScalar(0, s32, sXLen); 426 427 getActionDefinitionsBuilder({G_GLOBAL_VALUE, G_JUMP_TABLE, G_CONSTANT_POOL}) 428 .legalFor({p0}); 429 430 if (ST.hasStdExtZmmul()) { 431 getActionDefinitionsBuilder(G_MUL) 432 .legalFor({sXLen}) 433 .widenScalarToNextPow2(0) 434 .clampScalar(0, sXLen, sXLen); 435 436 // clang-format off 437 getActionDefinitionsBuilder({G_SMULH, G_UMULH}) 438 .legalFor({sXLen}) 439 .lower(); 440 // clang-format on 441 442 getActionDefinitionsBuilder({G_SMULO, G_UMULO}).minScalar(0, sXLen).lower(); 443 } else { 444 getActionDefinitionsBuilder(G_MUL) 445 .libcallFor({sXLen, sDoubleXLen}) 446 .widenScalarToNextPow2(0) 447 .clampScalar(0, sXLen, sDoubleXLen); 448 449 getActionDefinitionsBuilder({G_SMULH, G_UMULH}).lowerFor({sXLen}); 450 451 getActionDefinitionsBuilder({G_SMULO, G_UMULO}) 452 .minScalar(0, sXLen) 453 // Widen sXLen to sDoubleXLen so we can use a single libcall to get 454 // the low bits for the mul result and high bits to do the overflow 455 // check. 456 .widenScalarIf(typeIs(0, sXLen), 457 LegalizeMutations::changeTo(0, sDoubleXLen)) 458 .lower(); 459 } 460 461 if (ST.hasStdExtM()) { 462 getActionDefinitionsBuilder({G_SDIV, G_UDIV, G_UREM}) 463 .legalFor({sXLen}) 464 .customFor({s32}) 465 .libcallFor({sDoubleXLen}) 466 .clampScalar(0, s32, sDoubleXLen) 467 .widenScalarToNextPow2(0); 468 getActionDefinitionsBuilder(G_SREM) 469 .legalFor({sXLen}) 470 .libcallFor({sDoubleXLen}) 471 .clampScalar(0, sXLen, sDoubleXLen) 472 .widenScalarToNextPow2(0); 473 } else { 474 getActionDefinitionsBuilder({G_UDIV, G_SDIV, G_UREM, G_SREM}) 475 .libcallFor({sXLen, sDoubleXLen}) 476 .clampScalar(0, sXLen, sDoubleXLen) 477 .widenScalarToNextPow2(0); 478 } 479 480 // TODO: Use libcall for sDoubleXLen. 481 getActionDefinitionsBuilder({G_SDIVREM, G_UDIVREM}).lower(); 482 483 getActionDefinitionsBuilder(G_ABS) 484 .customFor(ST.hasStdExtZbb(), {sXLen}) 485 .minScalar(ST.hasStdExtZbb(), 0, sXLen) 486 .lower(); 487 488 getActionDefinitionsBuilder({G_UMAX, G_UMIN, G_SMAX, G_SMIN}) 489 .legalFor(ST.hasStdExtZbb(), {sXLen}) 490 .minScalar(ST.hasStdExtZbb(), 0, sXLen) 491 .lower(); 492 493 getActionDefinitionsBuilder({G_SCMP, G_UCMP}).lower(); 494 495 getActionDefinitionsBuilder(G_FRAME_INDEX).legalFor({p0}); 496 497 getActionDefinitionsBuilder({G_MEMCPY, G_MEMMOVE, G_MEMSET}).libcall(); 498 499 getActionDefinitionsBuilder({G_DYN_STACKALLOC, G_STACKSAVE, G_STACKRESTORE}) 500 .lower(); 501 502 // FP Operations 503 504 // FIXME: Support s128 for rv32 when libcall handling is able to use sret. 505 getActionDefinitionsBuilder( 506 {G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FMA, G_FSQRT, G_FMAXNUM, G_FMINNUM}) 507 .legalFor(ST.hasStdExtF(), {s32}) 508 .legalFor(ST.hasStdExtD(), {s64}) 509 .legalFor(ST.hasStdExtZfh(), {s16}) 510 .libcallFor({s32, s64}) 511 .libcallFor(ST.is64Bit(), {s128}); 512 513 getActionDefinitionsBuilder({G_FNEG, G_FABS}) 514 .legalFor(ST.hasStdExtF(), {s32}) 515 .legalFor(ST.hasStdExtD(), {s64}) 516 .legalFor(ST.hasStdExtZfh(), {s16}) 517 .lowerFor({s32, s64, s128}); 518 519 getActionDefinitionsBuilder(G_FREM) 520 .libcallFor({s32, s64}) 521 .libcallFor(ST.is64Bit(), {s128}) 522 .minScalar(0, s32) 523 .scalarize(0); 524 525 getActionDefinitionsBuilder(G_FCOPYSIGN) 526 .legalFor(ST.hasStdExtF(), {{s32, s32}}) 527 .legalFor(ST.hasStdExtD(), {{s64, s64}, {s32, s64}, {s64, s32}}) 528 .legalFor(ST.hasStdExtZfh(), {{s16, s16}, {s16, s32}, {s32, s16}}) 529 .legalFor(ST.hasStdExtZfh() && ST.hasStdExtD(), {{s16, s64}, {s64, s16}}) 530 .lower(); 531 532 // FIXME: Use Zfhmin. 533 getActionDefinitionsBuilder(G_FPTRUNC) 534 .legalFor(ST.hasStdExtD(), {{s32, s64}}) 535 .legalFor(ST.hasStdExtZfh(), {{s16, s32}}) 536 .legalFor(ST.hasStdExtZfh() && ST.hasStdExtD(), {{s16, s64}}) 537 .libcallFor({{s32, s64}}) 538 .libcallFor(ST.is64Bit(), {{s32, s128}, {s64, s128}}); 539 getActionDefinitionsBuilder(G_FPEXT) 540 .legalFor(ST.hasStdExtD(), {{s64, s32}}) 541 .legalFor(ST.hasStdExtZfh(), {{s32, s16}}) 542 .legalFor(ST.hasStdExtZfh() && ST.hasStdExtD(), {{s64, s16}}) 543 .libcallFor({{s64, s32}}) 544 .libcallFor(ST.is64Bit(), {{s128, s32}, {s128, s64}}); 545 546 getActionDefinitionsBuilder(G_FCMP) 547 .legalFor(ST.hasStdExtF(), {{sXLen, s32}}) 548 .legalFor(ST.hasStdExtD(), {{sXLen, s64}}) 549 .legalFor(ST.hasStdExtZfh(), {{sXLen, s16}}) 550 .clampScalar(0, sXLen, sXLen) 551 .libcallFor({{sXLen, s32}, {sXLen, s64}}) 552 .libcallFor(ST.is64Bit(), {{sXLen, s128}}); 553 554 // TODO: Support vector version of G_IS_FPCLASS. 555 getActionDefinitionsBuilder(G_IS_FPCLASS) 556 .customFor(ST.hasStdExtF(), {{s1, s32}}) 557 .customFor(ST.hasStdExtD(), {{s1, s64}}) 558 .customFor(ST.hasStdExtZfh(), {{s1, s16}}) 559 .lowerFor({{s1, s32}, {s1, s64}}); 560 561 getActionDefinitionsBuilder(G_FCONSTANT) 562 .legalFor(ST.hasStdExtF(), {s32}) 563 .legalFor(ST.hasStdExtD(), {s64}) 564 .legalFor(ST.hasStdExtZfh(), {s16}) 565 .lowerFor({s32, s64, s128}); 566 567 getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI}) 568 .legalFor(ST.hasStdExtF(), {{sXLen, s32}}) 569 .legalFor(ST.hasStdExtD(), {{sXLen, s64}}) 570 .legalFor(ST.hasStdExtZfh(), {{sXLen, s16}}) 571 .customFor(ST.is64Bit() && ST.hasStdExtF(), {{s32, s32}}) 572 .customFor(ST.is64Bit() && ST.hasStdExtD(), {{s32, s64}}) 573 .customFor(ST.is64Bit() && ST.hasStdExtZfh(), {{s32, s16}}) 574 .widenScalarToNextPow2(0) 575 .minScalar(0, s32) 576 .libcallFor({{s32, s32}, {s64, s32}, {s32, s64}, {s64, s64}}) 577 .libcallFor(ST.is64Bit(), {{s32, s128}, {s64, s128}}) // FIXME RV32. 578 .libcallFor(ST.is64Bit(), {{s128, s32}, {s128, s64}, {s128, s128}}); 579 580 getActionDefinitionsBuilder({G_SITOFP, G_UITOFP}) 581 .legalFor(ST.hasStdExtF(), {{s32, sXLen}}) 582 .legalFor(ST.hasStdExtD(), {{s64, sXLen}}) 583 .legalFor(ST.hasStdExtZfh(), {{s16, sXLen}}) 584 .widenScalarToNextPow2(1) 585 // Promote to XLen if the operation is legal. 586 .widenScalarIf( 587 [=, &ST](const LegalityQuery &Query) { 588 return Query.Types[0].isScalar() && Query.Types[1].isScalar() && 589 (Query.Types[1].getSizeInBits() < ST.getXLen()) && 590 ((ST.hasStdExtF() && Query.Types[0].getSizeInBits() == 32) || 591 (ST.hasStdExtD() && Query.Types[0].getSizeInBits() == 64) || 592 (ST.hasStdExtZfh() && 593 Query.Types[0].getSizeInBits() == 16)); 594 }, 595 LegalizeMutations::changeTo(1, sXLen)) 596 // Otherwise only promote to s32 since we have si libcalls. 597 .minScalar(1, s32) 598 .libcallFor({{s32, s32}, {s64, s32}, {s32, s64}, {s64, s64}}) 599 .libcallFor(ST.is64Bit(), {{s128, s32}, {s128, s64}}) // FIXME RV32. 600 .libcallFor(ST.is64Bit(), {{s32, s128}, {s64, s128}, {s128, s128}}); 601 602 // FIXME: We can do custom inline expansion like SelectionDAG. 603 getActionDefinitionsBuilder({G_FCEIL, G_FFLOOR, G_FRINT, G_FNEARBYINT, 604 G_INTRINSIC_TRUNC, G_INTRINSIC_ROUND, 605 G_INTRINSIC_ROUNDEVEN}) 606 .legalFor(ST.hasStdExtZfa(), {s32}) 607 .legalFor(ST.hasStdExtZfa() && ST.hasStdExtD(), {s64}) 608 .legalFor(ST.hasStdExtZfa() && ST.hasStdExtZfh(), {s16}) 609 .libcallFor({s32, s64}) 610 .libcallFor(ST.is64Bit(), {s128}); 611 612 getActionDefinitionsBuilder({G_FMAXIMUM, G_FMINIMUM}) 613 .legalFor(ST.hasStdExtZfa(), {s32}) 614 .legalFor(ST.hasStdExtZfa() && ST.hasStdExtD(), {s64}) 615 .legalFor(ST.hasStdExtZfa() && ST.hasStdExtZfh(), {s16}); 616 617 getActionDefinitionsBuilder({G_FCOS, G_FSIN, G_FTAN, G_FPOW, G_FLOG, G_FLOG2, 618 G_FLOG10, G_FEXP, G_FEXP2, G_FEXP10, G_FACOS, 619 G_FASIN, G_FATAN, G_FATAN2, G_FCOSH, G_FSINH, 620 G_FTANH}) 621 .libcallFor({s32, s64}) 622 .libcallFor(ST.is64Bit(), {s128}); 623 getActionDefinitionsBuilder({G_FPOWI, G_FLDEXP}) 624 .libcallFor({{s32, s32}, {s64, s32}}) 625 .libcallFor(ST.is64Bit(), {s128, s32}); 626 627 getActionDefinitionsBuilder(G_VASTART).customFor({p0}); 628 629 // va_list must be a pointer, but most sized types are pretty easy to handle 630 // as the destination. 631 getActionDefinitionsBuilder(G_VAARG) 632 // TODO: Implement narrowScalar and widenScalar for G_VAARG for types 633 // other than sXLen. 634 .clampScalar(0, sXLen, sXLen) 635 .lowerForCartesianProduct({sXLen, p0}, {p0}); 636 637 getActionDefinitionsBuilder(G_VSCALE) 638 .clampScalar(0, sXLen, sXLen) 639 .customFor({sXLen}); 640 641 auto &SplatActions = 642 getActionDefinitionsBuilder(G_SPLAT_VECTOR) 643 .legalIf(all(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST), 644 typeIs(1, sXLen))) 645 .customIf(all(typeIsLegalBoolVec(0, BoolVecTys, ST), typeIs(1, s1))); 646 // Handle case of s64 element vectors on RV32. If the subtarget does not have 647 // f64, then try to lower it to G_SPLAT_VECTOR_SPLIT_64_VL. If the subtarget 648 // does have f64, then we don't know whether the type is an f64 or an i64, 649 // so mark the G_SPLAT_VECTOR as legal and decide later what to do with it, 650 // depending on how the instructions it consumes are legalized. They are not 651 // legalized yet since legalization is in reverse postorder, so we cannot 652 // make the decision at this moment. 653 if (XLen == 32) { 654 if (ST.hasVInstructionsF64() && ST.hasStdExtD()) 655 SplatActions.legalIf(all( 656 typeInSet(0, {nxv1s64, nxv2s64, nxv4s64, nxv8s64}), typeIs(1, s64))); 657 else if (ST.hasVInstructionsI64()) 658 SplatActions.customIf(all( 659 typeInSet(0, {nxv1s64, nxv2s64, nxv4s64, nxv8s64}), typeIs(1, s64))); 660 } 661 662 SplatActions.clampScalar(1, sXLen, sXLen); 663 664 LegalityPredicate ExtractSubvecBitcastPred = [=](const LegalityQuery &Query) { 665 LLT DstTy = Query.Types[0]; 666 LLT SrcTy = Query.Types[1]; 667 return DstTy.getElementType() == LLT::scalar(1) && 668 DstTy.getElementCount().getKnownMinValue() >= 8 && 669 SrcTy.getElementCount().getKnownMinValue() >= 8; 670 }; 671 getActionDefinitionsBuilder(G_EXTRACT_SUBVECTOR) 672 // We don't have the ability to slide mask vectors down indexed by their 673 // i1 elements; the smallest we can do is i8. Often we are able to bitcast 674 // to equivalent i8 vectors. 675 .bitcastIf( 676 all(typeIsLegalBoolVec(0, BoolVecTys, ST), 677 typeIsLegalBoolVec(1, BoolVecTys, ST), ExtractSubvecBitcastPred), 678 [=](const LegalityQuery &Query) { 679 LLT CastTy = LLT::vector( 680 Query.Types[0].getElementCount().divideCoefficientBy(8), 8); 681 return std::pair(0, CastTy); 682 }) 683 .customIf(LegalityPredicates::any( 684 all(typeIsLegalBoolVec(0, BoolVecTys, ST), 685 typeIsLegalBoolVec(1, BoolVecTys, ST)), 686 all(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST), 687 typeIsLegalIntOrFPVec(1, IntOrFPVecTys, ST)))); 688 689 getActionDefinitionsBuilder(G_INSERT_SUBVECTOR) 690 .customIf(all(typeIsLegalBoolVec(0, BoolVecTys, ST), 691 typeIsLegalBoolVec(1, BoolVecTys, ST))) 692 .customIf(all(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST), 693 typeIsLegalIntOrFPVec(1, IntOrFPVecTys, ST))); 694 695 getLegacyLegalizerInfo().computeTables(); 696 verify(*ST.getInstrInfo()); 697 } 698 699 bool RISCVLegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper, 700 MachineInstr &MI) const { 701 Intrinsic::ID IntrinsicID = cast<GIntrinsic>(MI).getIntrinsicID(); 702 switch (IntrinsicID) { 703 default: 704 return false; 705 case Intrinsic::vacopy: { 706 // vacopy arguments must be legal because of the intrinsic signature. 707 // No need to check here. 708 709 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder; 710 MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); 711 MachineFunction &MF = *MI.getMF(); 712 const DataLayout &DL = MIRBuilder.getDataLayout(); 713 LLVMContext &Ctx = MF.getFunction().getContext(); 714 715 Register DstLst = MI.getOperand(1).getReg(); 716 LLT PtrTy = MRI.getType(DstLst); 717 718 // Load the source va_list 719 Align Alignment = DL.getABITypeAlign(getTypeForLLT(PtrTy, Ctx)); 720 MachineMemOperand *LoadMMO = MF.getMachineMemOperand( 721 MachinePointerInfo(), MachineMemOperand::MOLoad, PtrTy, Alignment); 722 auto Tmp = MIRBuilder.buildLoad(PtrTy, MI.getOperand(2), *LoadMMO); 723 724 // Store the result in the destination va_list 725 MachineMemOperand *StoreMMO = MF.getMachineMemOperand( 726 MachinePointerInfo(), MachineMemOperand::MOStore, PtrTy, Alignment); 727 MIRBuilder.buildStore(Tmp, DstLst, *StoreMMO); 728 729 MI.eraseFromParent(); 730 return true; 731 } 732 } 733 } 734 735 bool RISCVLegalizerInfo::legalizeVAStart(MachineInstr &MI, 736 MachineIRBuilder &MIRBuilder) const { 737 // Stores the address of the VarArgsFrameIndex slot into the memory location 738 assert(MI.getOpcode() == TargetOpcode::G_VASTART); 739 MachineFunction *MF = MI.getParent()->getParent(); 740 RISCVMachineFunctionInfo *FuncInfo = MF->getInfo<RISCVMachineFunctionInfo>(); 741 int FI = FuncInfo->getVarArgsFrameIndex(); 742 LLT AddrTy = MIRBuilder.getMRI()->getType(MI.getOperand(0).getReg()); 743 auto FINAddr = MIRBuilder.buildFrameIndex(AddrTy, FI); 744 assert(MI.hasOneMemOperand()); 745 MIRBuilder.buildStore(FINAddr, MI.getOperand(0).getReg(), 746 *MI.memoperands()[0]); 747 MI.eraseFromParent(); 748 return true; 749 } 750 751 bool RISCVLegalizerInfo::legalizeBRJT(MachineInstr &MI, 752 MachineIRBuilder &MIRBuilder) const { 753 MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); 754 auto &MF = *MI.getParent()->getParent(); 755 const MachineJumpTableInfo *MJTI = MF.getJumpTableInfo(); 756 unsigned EntrySize = MJTI->getEntrySize(MF.getDataLayout()); 757 758 Register PtrReg = MI.getOperand(0).getReg(); 759 LLT PtrTy = MRI.getType(PtrReg); 760 Register IndexReg = MI.getOperand(2).getReg(); 761 LLT IndexTy = MRI.getType(IndexReg); 762 763 if (!isPowerOf2_32(EntrySize)) 764 return false; 765 766 auto ShiftAmt = MIRBuilder.buildConstant(IndexTy, Log2_32(EntrySize)); 767 IndexReg = MIRBuilder.buildShl(IndexTy, IndexReg, ShiftAmt).getReg(0); 768 769 auto Addr = MIRBuilder.buildPtrAdd(PtrTy, PtrReg, IndexReg); 770 771 MachineMemOperand *MMO = MF.getMachineMemOperand( 772 MachinePointerInfo::getJumpTable(MF), MachineMemOperand::MOLoad, 773 EntrySize, Align(MJTI->getEntryAlignment(MF.getDataLayout()))); 774 775 Register TargetReg; 776 switch (MJTI->getEntryKind()) { 777 default: 778 return false; 779 case MachineJumpTableInfo::EK_LabelDifference32: { 780 // For PIC, the sequence is: 781 // BRIND(load(Jumptable + index) + RelocBase) 782 // RelocBase can be JumpTable, GOT or some sort of global base. 783 unsigned LoadOpc = 784 STI.is64Bit() ? TargetOpcode::G_SEXTLOAD : TargetOpcode::G_LOAD; 785 auto Load = MIRBuilder.buildLoadInstr(LoadOpc, IndexTy, Addr, *MMO); 786 TargetReg = MIRBuilder.buildPtrAdd(PtrTy, PtrReg, Load).getReg(0); 787 break; 788 } 789 case MachineJumpTableInfo::EK_Custom32: { 790 auto Load = MIRBuilder.buildLoadInstr(TargetOpcode::G_SEXTLOAD, IndexTy, 791 Addr, *MMO); 792 TargetReg = MIRBuilder.buildIntToPtr(PtrTy, Load).getReg(0); 793 break; 794 } 795 case MachineJumpTableInfo::EK_BlockAddress: 796 TargetReg = MIRBuilder.buildLoad(PtrTy, Addr, *MMO).getReg(0); 797 break; 798 } 799 800 MIRBuilder.buildBrIndirect(TargetReg); 801 802 MI.eraseFromParent(); 803 return true; 804 } 805 806 bool RISCVLegalizerInfo::shouldBeInConstantPool(const APInt &APImm, 807 bool ShouldOptForSize) const { 808 assert(APImm.getBitWidth() == 32 || APImm.getBitWidth() == 64); 809 int64_t Imm = APImm.getSExtValue(); 810 // All simm32 constants should be handled by isel. 811 // NOTE: The getMaxBuildIntsCost call below should return a value >= 2 making 812 // this check redundant, but small immediates are common so this check 813 // should have better compile time. 814 if (isInt<32>(Imm)) 815 return false; 816 817 // We only need to cost the immediate, if constant pool lowering is enabled. 818 if (!STI.useConstantPoolForLargeInts()) 819 return false; 820 821 RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(Imm, STI); 822 if (Seq.size() <= STI.getMaxBuildIntsCost()) 823 return false; 824 825 // Optimizations below are disabled for opt size. If we're optimizing for 826 // size, use a constant pool. 827 if (ShouldOptForSize) 828 return true; 829 // 830 // Special case. See if we can build the constant as (ADD (SLLI X, C), X) do 831 // that if it will avoid a constant pool. 832 // It will require an extra temporary register though. 833 // If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where 834 // low and high 32 bits are the same and bit 31 and 63 are set. 835 unsigned ShiftAmt, AddOpc; 836 RISCVMatInt::InstSeq SeqLo = 837 RISCVMatInt::generateTwoRegInstSeq(Imm, STI, ShiftAmt, AddOpc); 838 return !(!SeqLo.empty() && (SeqLo.size() + 2) <= STI.getMaxBuildIntsCost()); 839 } 840 841 bool RISCVLegalizerInfo::legalizeVScale(MachineInstr &MI, 842 MachineIRBuilder &MIB) const { 843 const LLT XLenTy(STI.getXLenVT()); 844 Register Dst = MI.getOperand(0).getReg(); 845 846 // We define our scalable vector types for lmul=1 to use a 64 bit known 847 // minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate 848 // vscale as VLENB / 8. 849 static_assert(RISCV::RVVBitsPerBlock == 64, "Unexpected bits per block!"); 850 if (STI.getRealMinVLen() < RISCV::RVVBitsPerBlock) 851 // Support for VLEN==32 is incomplete. 852 return false; 853 854 // We assume VLENB is a multiple of 8. We manually choose the best shift 855 // here because SimplifyDemandedBits isn't always able to simplify it. 856 uint64_t Val = MI.getOperand(1).getCImm()->getZExtValue(); 857 if (isPowerOf2_64(Val)) { 858 uint64_t Log2 = Log2_64(Val); 859 if (Log2 < 3) { 860 auto VLENB = MIB.buildInstr(RISCV::G_READ_VLENB, {XLenTy}, {}); 861 MIB.buildLShr(Dst, VLENB, MIB.buildConstant(XLenTy, 3 - Log2)); 862 } else if (Log2 > 3) { 863 auto VLENB = MIB.buildInstr(RISCV::G_READ_VLENB, {XLenTy}, {}); 864 MIB.buildShl(Dst, VLENB, MIB.buildConstant(XLenTy, Log2 - 3)); 865 } else { 866 MIB.buildInstr(RISCV::G_READ_VLENB, {Dst}, {}); 867 } 868 } else if ((Val % 8) == 0) { 869 // If the multiplier is a multiple of 8, scale it down to avoid needing 870 // to shift the VLENB value. 871 auto VLENB = MIB.buildInstr(RISCV::G_READ_VLENB, {XLenTy}, {}); 872 MIB.buildMul(Dst, VLENB, MIB.buildConstant(XLenTy, Val / 8)); 873 } else { 874 auto VLENB = MIB.buildInstr(RISCV::G_READ_VLENB, {XLenTy}, {}); 875 auto VScale = MIB.buildLShr(XLenTy, VLENB, MIB.buildConstant(XLenTy, 3)); 876 MIB.buildMul(Dst, VScale, MIB.buildConstant(XLenTy, Val)); 877 } 878 MI.eraseFromParent(); 879 return true; 880 } 881 882 // Custom-lower extensions from mask vectors by using a vselect either with 1 883 // for zero/any-extension or -1 for sign-extension: 884 // (vXiN = (s|z)ext vXi1:vmask) -> (vXiN = vselect vmask, (-1 or 1), 0) 885 // Note that any-extension is lowered identically to zero-extension. 886 bool RISCVLegalizerInfo::legalizeExt(MachineInstr &MI, 887 MachineIRBuilder &MIB) const { 888 889 unsigned Opc = MI.getOpcode(); 890 assert(Opc == TargetOpcode::G_ZEXT || Opc == TargetOpcode::G_SEXT || 891 Opc == TargetOpcode::G_ANYEXT); 892 893 MachineRegisterInfo &MRI = *MIB.getMRI(); 894 Register Dst = MI.getOperand(0).getReg(); 895 Register Src = MI.getOperand(1).getReg(); 896 897 LLT DstTy = MRI.getType(Dst); 898 int64_t ExtTrueVal = Opc == TargetOpcode::G_SEXT ? -1 : 1; 899 LLT DstEltTy = DstTy.getElementType(); 900 auto SplatZero = MIB.buildSplatVector(DstTy, MIB.buildConstant(DstEltTy, 0)); 901 auto SplatTrue = 902 MIB.buildSplatVector(DstTy, MIB.buildConstant(DstEltTy, ExtTrueVal)); 903 MIB.buildSelect(Dst, Src, SplatTrue, SplatZero); 904 905 MI.eraseFromParent(); 906 return true; 907 } 908 909 bool RISCVLegalizerInfo::legalizeLoadStore(MachineInstr &MI, 910 LegalizerHelper &Helper, 911 MachineIRBuilder &MIB) const { 912 assert((isa<GLoad>(MI) || isa<GStore>(MI)) && 913 "Machine instructions must be Load/Store."); 914 MachineRegisterInfo &MRI = *MIB.getMRI(); 915 MachineFunction *MF = MI.getMF(); 916 const DataLayout &DL = MIB.getDataLayout(); 917 LLVMContext &Ctx = MF->getFunction().getContext(); 918 919 Register DstReg = MI.getOperand(0).getReg(); 920 LLT DataTy = MRI.getType(DstReg); 921 if (!DataTy.isVector()) 922 return false; 923 924 if (!MI.hasOneMemOperand()) 925 return false; 926 927 MachineMemOperand *MMO = *MI.memoperands_begin(); 928 929 const auto *TLI = STI.getTargetLowering(); 930 EVT VT = EVT::getEVT(getTypeForLLT(DataTy, Ctx)); 931 932 if (TLI->allowsMemoryAccessForAlignment(Ctx, DL, VT, *MMO)) 933 return true; 934 935 unsigned EltSizeBits = DataTy.getScalarSizeInBits(); 936 assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) && 937 "Unexpected unaligned RVV load type"); 938 939 // Calculate the new vector type with i8 elements 940 unsigned NumElements = 941 DataTy.getElementCount().getKnownMinValue() * (EltSizeBits / 8); 942 LLT NewDataTy = LLT::scalable_vector(NumElements, 8); 943 944 Helper.bitcast(MI, 0, NewDataTy); 945 946 return true; 947 } 948 949 /// Return the type of the mask type suitable for masking the provided 950 /// vector type. This is simply an i1 element type vector of the same 951 /// (possibly scalable) length. 952 static LLT getMaskTypeFor(LLT VecTy) { 953 assert(VecTy.isVector()); 954 ElementCount EC = VecTy.getElementCount(); 955 return LLT::vector(EC, LLT::scalar(1)); 956 } 957 958 /// Creates an all ones mask suitable for masking a vector of type VecTy with 959 /// vector length VL. 960 static MachineInstrBuilder buildAllOnesMask(LLT VecTy, const SrcOp &VL, 961 MachineIRBuilder &MIB, 962 MachineRegisterInfo &MRI) { 963 LLT MaskTy = getMaskTypeFor(VecTy); 964 return MIB.buildInstr(RISCV::G_VMSET_VL, {MaskTy}, {VL}); 965 } 966 967 /// Gets the two common "VL" operands: an all-ones mask and the vector length. 968 /// VecTy is a scalable vector type. 969 static std::pair<MachineInstrBuilder, MachineInstrBuilder> 970 buildDefaultVLOps(LLT VecTy, MachineIRBuilder &MIB, MachineRegisterInfo &MRI) { 971 assert(VecTy.isScalableVector() && "Expecting scalable container type"); 972 const RISCVSubtarget &STI = MIB.getMF().getSubtarget<RISCVSubtarget>(); 973 LLT XLenTy(STI.getXLenVT()); 974 auto VL = MIB.buildConstant(XLenTy, -1); 975 auto Mask = buildAllOnesMask(VecTy, VL, MIB, MRI); 976 return {Mask, VL}; 977 } 978 979 static MachineInstrBuilder 980 buildSplatPartsS64WithVL(const DstOp &Dst, const SrcOp &Passthru, Register Lo, 981 Register Hi, const SrcOp &VL, MachineIRBuilder &MIB, 982 MachineRegisterInfo &MRI) { 983 // TODO: If the Hi bits of the splat are undefined, then it's fine to just 984 // splat Lo even if it might be sign extended. I don't think we have 985 // introduced a case where we're build a s64 where the upper bits are undef 986 // yet. 987 988 // Fall back to a stack store and stride x0 vector load. 989 // TODO: need to lower G_SPLAT_VECTOR_SPLIT_I64. This is done in 990 // preprocessDAG in SDAG. 991 return MIB.buildInstr(RISCV::G_SPLAT_VECTOR_SPLIT_I64_VL, {Dst}, 992 {Passthru, Lo, Hi, VL}); 993 } 994 995 static MachineInstrBuilder 996 buildSplatSplitS64WithVL(const DstOp &Dst, const SrcOp &Passthru, 997 const SrcOp &Scalar, const SrcOp &VL, 998 MachineIRBuilder &MIB, MachineRegisterInfo &MRI) { 999 assert(Scalar.getLLTTy(MRI) == LLT::scalar(64) && "Unexpected VecTy!"); 1000 auto Unmerge = MIB.buildUnmerge(LLT::scalar(32), Scalar); 1001 return buildSplatPartsS64WithVL(Dst, Passthru, Unmerge.getReg(0), 1002 Unmerge.getReg(1), VL, MIB, MRI); 1003 } 1004 1005 // Lower splats of s1 types to G_ICMP. For each mask vector type, we have a 1006 // legal equivalently-sized i8 type, so we can use that as a go-between. 1007 // Splats of s1 types that have constant value can be legalized as VMSET_VL or 1008 // VMCLR_VL. 1009 bool RISCVLegalizerInfo::legalizeSplatVector(MachineInstr &MI, 1010 MachineIRBuilder &MIB) const { 1011 assert(MI.getOpcode() == TargetOpcode::G_SPLAT_VECTOR); 1012 1013 MachineRegisterInfo &MRI = *MIB.getMRI(); 1014 1015 Register Dst = MI.getOperand(0).getReg(); 1016 Register SplatVal = MI.getOperand(1).getReg(); 1017 1018 LLT VecTy = MRI.getType(Dst); 1019 LLT XLenTy(STI.getXLenVT()); 1020 1021 // Handle case of s64 element vectors on rv32 1022 if (XLenTy.getSizeInBits() == 32 && 1023 VecTy.getElementType().getSizeInBits() == 64) { 1024 auto [_, VL] = buildDefaultVLOps(MRI.getType(Dst), MIB, MRI); 1025 buildSplatSplitS64WithVL(Dst, MIB.buildUndef(VecTy), SplatVal, VL, MIB, 1026 MRI); 1027 MI.eraseFromParent(); 1028 return true; 1029 } 1030 1031 // All-zeros or all-ones splats are handled specially. 1032 MachineInstr &SplatValMI = *MRI.getVRegDef(SplatVal); 1033 if (isAllOnesOrAllOnesSplat(SplatValMI, MRI)) { 1034 auto VL = buildDefaultVLOps(VecTy, MIB, MRI).second; 1035 MIB.buildInstr(RISCV::G_VMSET_VL, {Dst}, {VL}); 1036 MI.eraseFromParent(); 1037 return true; 1038 } 1039 if (isNullOrNullSplat(SplatValMI, MRI)) { 1040 auto VL = buildDefaultVLOps(VecTy, MIB, MRI).second; 1041 MIB.buildInstr(RISCV::G_VMCLR_VL, {Dst}, {VL}); 1042 MI.eraseFromParent(); 1043 return true; 1044 } 1045 1046 // Handle non-constant mask splat (i.e. not sure if it's all zeros or all 1047 // ones) by promoting it to an s8 splat. 1048 LLT InterEltTy = LLT::scalar(8); 1049 LLT InterTy = VecTy.changeElementType(InterEltTy); 1050 auto ZExtSplatVal = MIB.buildZExt(InterEltTy, SplatVal); 1051 auto And = 1052 MIB.buildAnd(InterEltTy, ZExtSplatVal, MIB.buildConstant(InterEltTy, 1)); 1053 auto LHS = MIB.buildSplatVector(InterTy, And); 1054 auto ZeroSplat = 1055 MIB.buildSplatVector(InterTy, MIB.buildConstant(InterEltTy, 0)); 1056 MIB.buildICmp(CmpInst::Predicate::ICMP_NE, Dst, LHS, ZeroSplat); 1057 MI.eraseFromParent(); 1058 return true; 1059 } 1060 1061 static LLT getLMUL1Ty(LLT VecTy) { 1062 assert(VecTy.getElementType().getSizeInBits() <= 64 && 1063 "Unexpected vector LLT"); 1064 return LLT::scalable_vector(RISCV::RVVBitsPerBlock / 1065 VecTy.getElementType().getSizeInBits(), 1066 VecTy.getElementType()); 1067 } 1068 1069 bool RISCVLegalizerInfo::legalizeExtractSubvector(MachineInstr &MI, 1070 MachineIRBuilder &MIB) const { 1071 GExtractSubvector &ES = cast<GExtractSubvector>(MI); 1072 1073 MachineRegisterInfo &MRI = *MIB.getMRI(); 1074 1075 Register Dst = ES.getReg(0); 1076 Register Src = ES.getSrcVec(); 1077 uint64_t Idx = ES.getIndexImm(); 1078 1079 // With an index of 0 this is a cast-like subvector, which can be performed 1080 // with subregister operations. 1081 if (Idx == 0) 1082 return true; 1083 1084 LLT LitTy = MRI.getType(Dst); 1085 LLT BigTy = MRI.getType(Src); 1086 1087 if (LitTy.getElementType() == LLT::scalar(1)) { 1088 // We can't slide this mask vector up indexed by its i1 elements. 1089 // This poses a problem when we wish to insert a scalable vector which 1090 // can't be re-expressed as a larger type. Just choose the slow path and 1091 // extend to a larger type, then truncate back down. 1092 LLT ExtBigTy = BigTy.changeElementType(LLT::scalar(8)); 1093 LLT ExtLitTy = LitTy.changeElementType(LLT::scalar(8)); 1094 auto BigZExt = MIB.buildZExt(ExtBigTy, Src); 1095 auto ExtractZExt = MIB.buildExtractSubvector(ExtLitTy, BigZExt, Idx); 1096 auto SplatZero = MIB.buildSplatVector( 1097 ExtLitTy, MIB.buildConstant(ExtLitTy.getElementType(), 0)); 1098 MIB.buildICmp(CmpInst::Predicate::ICMP_NE, Dst, ExtractZExt, SplatZero); 1099 MI.eraseFromParent(); 1100 return true; 1101 } 1102 1103 // extract_subvector scales the index by vscale if the subvector is scalable, 1104 // and decomposeSubvectorInsertExtractToSubRegs takes this into account. 1105 const RISCVRegisterInfo *TRI = STI.getRegisterInfo(); 1106 MVT LitTyMVT = getMVTForLLT(LitTy); 1107 auto Decompose = 1108 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs( 1109 getMVTForLLT(BigTy), LitTyMVT, Idx, TRI); 1110 unsigned RemIdx = Decompose.second; 1111 1112 // If the Idx has been completely eliminated then this is a subvector extract 1113 // which naturally aligns to a vector register. These can easily be handled 1114 // using subregister manipulation. 1115 if (RemIdx == 0) 1116 return true; 1117 1118 // Else LitTy is M1 or smaller and may need to be slid down: if LitTy 1119 // was > M1 then the index would need to be a multiple of VLMAX, and so would 1120 // divide exactly. 1121 assert( 1122 RISCVVType::decodeVLMUL(RISCVTargetLowering::getLMUL(LitTyMVT)).second || 1123 RISCVTargetLowering::getLMUL(LitTyMVT) == RISCVII::VLMUL::LMUL_1); 1124 1125 // If the vector type is an LMUL-group type, extract a subvector equal to the 1126 // nearest full vector register type. 1127 LLT InterLitTy = BigTy; 1128 Register Vec = Src; 1129 if (TypeSize::isKnownGT(BigTy.getSizeInBits(), 1130 getLMUL1Ty(BigTy).getSizeInBits())) { 1131 // If BigTy has an LMUL > 1, then LitTy should have a smaller LMUL, and 1132 // we should have successfully decomposed the extract into a subregister. 1133 assert(Decompose.first != RISCV::NoSubRegister); 1134 InterLitTy = getLMUL1Ty(BigTy); 1135 // SDAG builds a TargetExtractSubreg. We cannot create a a Copy with SubReg 1136 // specified on the source Register (the equivalent) since generic virtual 1137 // register does not allow subregister index. 1138 Vec = MIB.buildExtractSubvector(InterLitTy, Src, Idx - RemIdx).getReg(0); 1139 } 1140 1141 // Slide this vector register down by the desired number of elements in order 1142 // to place the desired subvector starting at element 0. 1143 const LLT XLenTy(STI.getXLenVT()); 1144 auto SlidedownAmt = MIB.buildVScale(XLenTy, RemIdx); 1145 auto [Mask, VL] = buildDefaultVLOps(LitTy, MIB, MRI); 1146 uint64_t Policy = RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC; 1147 auto Slidedown = MIB.buildInstr( 1148 RISCV::G_VSLIDEDOWN_VL, {InterLitTy}, 1149 {MIB.buildUndef(InterLitTy), Vec, SlidedownAmt, Mask, VL, Policy}); 1150 1151 // Now the vector is in the right position, extract our final subvector. This 1152 // should resolve to a COPY. 1153 MIB.buildExtractSubvector(Dst, Slidedown, 0); 1154 1155 MI.eraseFromParent(); 1156 return true; 1157 } 1158 1159 bool RISCVLegalizerInfo::legalizeInsertSubvector(MachineInstr &MI, 1160 LegalizerHelper &Helper, 1161 MachineIRBuilder &MIB) const { 1162 GInsertSubvector &IS = cast<GInsertSubvector>(MI); 1163 1164 MachineRegisterInfo &MRI = *MIB.getMRI(); 1165 1166 Register Dst = IS.getReg(0); 1167 Register BigVec = IS.getBigVec(); 1168 Register LitVec = IS.getSubVec(); 1169 uint64_t Idx = IS.getIndexImm(); 1170 1171 LLT BigTy = MRI.getType(BigVec); 1172 LLT LitTy = MRI.getType(LitVec); 1173 1174 if (Idx == 0 || 1175 MRI.getVRegDef(BigVec)->getOpcode() == TargetOpcode::G_IMPLICIT_DEF) 1176 return true; 1177 1178 // We don't have the ability to slide mask vectors up indexed by their i1 1179 // elements; the smallest we can do is i8. Often we are able to bitcast to 1180 // equivalent i8 vectors. Otherwise, we can must zeroextend to equivalent i8 1181 // vectors and truncate down after the insert. 1182 if (LitTy.getElementType() == LLT::scalar(1)) { 1183 auto BigTyMinElts = BigTy.getElementCount().getKnownMinValue(); 1184 auto LitTyMinElts = LitTy.getElementCount().getKnownMinValue(); 1185 if (BigTyMinElts >= 8 && LitTyMinElts >= 8) 1186 return Helper.bitcast( 1187 IS, 0, 1188 LLT::vector(BigTy.getElementCount().divideCoefficientBy(8), 8)); 1189 1190 // We can't slide this mask vector up indexed by its i1 elements. 1191 // This poses a problem when we wish to insert a scalable vector which 1192 // can't be re-expressed as a larger type. Just choose the slow path and 1193 // extend to a larger type, then truncate back down. 1194 LLT ExtBigTy = BigTy.changeElementType(LLT::scalar(8)); 1195 return Helper.widenScalar(IS, 0, ExtBigTy); 1196 } 1197 1198 const RISCVRegisterInfo *TRI = STI.getRegisterInfo(); 1199 unsigned SubRegIdx, RemIdx; 1200 std::tie(SubRegIdx, RemIdx) = 1201 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs( 1202 getMVTForLLT(BigTy), getMVTForLLT(LitTy), Idx, TRI); 1203 1204 TypeSize VecRegSize = TypeSize::getScalable(RISCV::RVVBitsPerBlock); 1205 assert(isPowerOf2_64( 1206 STI.expandVScale(LitTy.getSizeInBits()).getKnownMinValue())); 1207 bool ExactlyVecRegSized = 1208 STI.expandVScale(LitTy.getSizeInBits()) 1209 .isKnownMultipleOf(STI.expandVScale(VecRegSize)); 1210 1211 // If the Idx has been completely eliminated and this subvector's size is a 1212 // vector register or a multiple thereof, or the surrounding elements are 1213 // undef, then this is a subvector insert which naturally aligns to a vector 1214 // register. These can easily be handled using subregister manipulation. 1215 if (RemIdx == 0 && ExactlyVecRegSized) 1216 return true; 1217 1218 // If the subvector is smaller than a vector register, then the insertion 1219 // must preserve the undisturbed elements of the register. We do this by 1220 // lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1 vector type 1221 // (which resolves to a subregister copy), performing a VSLIDEUP to place the 1222 // subvector within the vector register, and an INSERT_SUBVECTOR of that 1223 // LMUL=1 type back into the larger vector (resolving to another subregister 1224 // operation). See below for how our VSLIDEUP works. We go via a LMUL=1 type 1225 // to avoid allocating a large register group to hold our subvector. 1226 1227 // VSLIDEUP works by leaving elements 0<i<OFFSET undisturbed, elements 1228 // OFFSET<=i<VL set to the "subvector" and vl<=i<VLMAX set to the tail policy 1229 // (in our case undisturbed). This means we can set up a subvector insertion 1230 // where OFFSET is the insertion offset, and the VL is the OFFSET plus the 1231 // size of the subvector. 1232 const LLT XLenTy(STI.getXLenVT()); 1233 LLT InterLitTy = BigTy; 1234 Register AlignedExtract = BigVec; 1235 unsigned AlignedIdx = Idx - RemIdx; 1236 if (TypeSize::isKnownGT(BigTy.getSizeInBits(), 1237 getLMUL1Ty(BigTy).getSizeInBits())) { 1238 InterLitTy = getLMUL1Ty(BigTy); 1239 // Extract a subvector equal to the nearest full vector register type. This 1240 // should resolve to a G_EXTRACT on a subreg. 1241 AlignedExtract = 1242 MIB.buildExtractSubvector(InterLitTy, BigVec, AlignedIdx).getReg(0); 1243 } 1244 1245 auto Insert = MIB.buildInsertSubvector(InterLitTy, MIB.buildUndef(InterLitTy), 1246 LitVec, 0); 1247 1248 auto [Mask, _] = buildDefaultVLOps(BigTy, MIB, MRI); 1249 auto VL = MIB.buildVScale(XLenTy, LitTy.getElementCount().getKnownMinValue()); 1250 1251 // If we're inserting into the lowest elements, use a tail undisturbed 1252 // vmv.v.v. 1253 MachineInstrBuilder Inserted; 1254 bool NeedInsertSubvec = 1255 TypeSize::isKnownGT(BigTy.getSizeInBits(), InterLitTy.getSizeInBits()); 1256 Register InsertedDst = 1257 NeedInsertSubvec ? MRI.createGenericVirtualRegister(InterLitTy) : Dst; 1258 if (RemIdx == 0) { 1259 Inserted = MIB.buildInstr(RISCV::G_VMV_V_V_VL, {InsertedDst}, 1260 {AlignedExtract, Insert, VL}); 1261 } else { 1262 auto SlideupAmt = MIB.buildVScale(XLenTy, RemIdx); 1263 // Construct the vector length corresponding to RemIdx + length(LitTy). 1264 VL = MIB.buildAdd(XLenTy, SlideupAmt, VL); 1265 // Use tail agnostic policy if we're inserting over InterLitTy's tail. 1266 ElementCount EndIndex = 1267 ElementCount::getScalable(RemIdx) + LitTy.getElementCount(); 1268 uint64_t Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED; 1269 if (STI.expandVScale(EndIndex) == 1270 STI.expandVScale(InterLitTy.getElementCount())) 1271 Policy = RISCVII::TAIL_AGNOSTIC; 1272 1273 Inserted = 1274 MIB.buildInstr(RISCV::G_VSLIDEUP_VL, {InsertedDst}, 1275 {AlignedExtract, Insert, SlideupAmt, Mask, VL, Policy}); 1276 } 1277 1278 // If required, insert this subvector back into the correct vector register. 1279 // This should resolve to an INSERT_SUBREG instruction. 1280 if (NeedInsertSubvec) 1281 MIB.buildInsertSubvector(Dst, BigVec, Inserted, AlignedIdx); 1282 1283 MI.eraseFromParent(); 1284 return true; 1285 } 1286 1287 static unsigned getRISCVWOpcode(unsigned Opcode) { 1288 switch (Opcode) { 1289 default: 1290 llvm_unreachable("Unexpected opcode"); 1291 case TargetOpcode::G_ASHR: 1292 return RISCV::G_SRAW; 1293 case TargetOpcode::G_LSHR: 1294 return RISCV::G_SRLW; 1295 case TargetOpcode::G_SHL: 1296 return RISCV::G_SLLW; 1297 case TargetOpcode::G_SDIV: 1298 return RISCV::G_DIVW; 1299 case TargetOpcode::G_UDIV: 1300 return RISCV::G_DIVUW; 1301 case TargetOpcode::G_UREM: 1302 return RISCV::G_REMUW; 1303 case TargetOpcode::G_ROTL: 1304 return RISCV::G_ROLW; 1305 case TargetOpcode::G_ROTR: 1306 return RISCV::G_RORW; 1307 case TargetOpcode::G_CTLZ: 1308 return RISCV::G_CLZW; 1309 case TargetOpcode::G_CTTZ: 1310 return RISCV::G_CTZW; 1311 case TargetOpcode::G_FPTOSI: 1312 return RISCV::G_FCVT_W_RV64; 1313 case TargetOpcode::G_FPTOUI: 1314 return RISCV::G_FCVT_WU_RV64; 1315 } 1316 } 1317 1318 bool RISCVLegalizerInfo::legalizeCustom( 1319 LegalizerHelper &Helper, MachineInstr &MI, 1320 LostDebugLocObserver &LocObserver) const { 1321 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder; 1322 MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); 1323 MachineFunction &MF = *MI.getParent()->getParent(); 1324 switch (MI.getOpcode()) { 1325 default: 1326 // No idea what to do. 1327 return false; 1328 case TargetOpcode::G_ABS: 1329 return Helper.lowerAbsToMaxNeg(MI); 1330 // TODO: G_FCONSTANT 1331 case TargetOpcode::G_CONSTANT: { 1332 const Function &F = MF.getFunction(); 1333 // TODO: if PSI and BFI are present, add " || 1334 // llvm::shouldOptForSize(*CurMBB, PSI, BFI)". 1335 bool ShouldOptForSize = F.hasOptSize() || F.hasMinSize(); 1336 const ConstantInt *ConstVal = MI.getOperand(1).getCImm(); 1337 if (!shouldBeInConstantPool(ConstVal->getValue(), ShouldOptForSize)) 1338 return true; 1339 return Helper.lowerConstant(MI); 1340 } 1341 case TargetOpcode::G_SUB: 1342 case TargetOpcode::G_ADD: { 1343 Helper.Observer.changingInstr(MI); 1344 Helper.widenScalarSrc(MI, sXLen, 1, TargetOpcode::G_ANYEXT); 1345 Helper.widenScalarSrc(MI, sXLen, 2, TargetOpcode::G_ANYEXT); 1346 1347 Register DstALU = MRI.createGenericVirtualRegister(sXLen); 1348 1349 MachineOperand &MO = MI.getOperand(0); 1350 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt()); 1351 auto DstSext = MIRBuilder.buildSExtInReg(sXLen, DstALU, 32); 1352 1353 MIRBuilder.buildInstr(TargetOpcode::G_TRUNC, {MO}, {DstSext}); 1354 MO.setReg(DstALU); 1355 1356 Helper.Observer.changedInstr(MI); 1357 return true; 1358 } 1359 case TargetOpcode::G_SEXT_INREG: { 1360 LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); 1361 int64_t SizeInBits = MI.getOperand(2).getImm(); 1362 // Source size of 32 is sext.w. 1363 if (DstTy.getSizeInBits() == 64 && SizeInBits == 32) 1364 return true; 1365 1366 if (STI.hasStdExtZbb() && (SizeInBits == 8 || SizeInBits == 16)) 1367 return true; 1368 1369 return Helper.lower(MI, 0, /* Unused hint type */ LLT()) == 1370 LegalizerHelper::Legalized; 1371 } 1372 case TargetOpcode::G_ASHR: 1373 case TargetOpcode::G_LSHR: 1374 case TargetOpcode::G_SHL: { 1375 if (getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI)) { 1376 // We don't need a custom node for shift by constant. Just widen the 1377 // source and the shift amount. 1378 unsigned ExtOpc = TargetOpcode::G_ANYEXT; 1379 if (MI.getOpcode() == TargetOpcode::G_ASHR) 1380 ExtOpc = TargetOpcode::G_SEXT; 1381 else if (MI.getOpcode() == TargetOpcode::G_LSHR) 1382 ExtOpc = TargetOpcode::G_ZEXT; 1383 1384 Helper.Observer.changingInstr(MI); 1385 Helper.widenScalarSrc(MI, sXLen, 1, ExtOpc); 1386 Helper.widenScalarSrc(MI, sXLen, 2, TargetOpcode::G_ZEXT); 1387 Helper.widenScalarDst(MI, sXLen); 1388 Helper.Observer.changedInstr(MI); 1389 return true; 1390 } 1391 1392 Helper.Observer.changingInstr(MI); 1393 Helper.widenScalarSrc(MI, sXLen, 1, TargetOpcode::G_ANYEXT); 1394 Helper.widenScalarSrc(MI, sXLen, 2, TargetOpcode::G_ANYEXT); 1395 Helper.widenScalarDst(MI, sXLen); 1396 MI.setDesc(MIRBuilder.getTII().get(getRISCVWOpcode(MI.getOpcode()))); 1397 Helper.Observer.changedInstr(MI); 1398 return true; 1399 } 1400 case TargetOpcode::G_SDIV: 1401 case TargetOpcode::G_UDIV: 1402 case TargetOpcode::G_UREM: 1403 case TargetOpcode::G_ROTL: 1404 case TargetOpcode::G_ROTR: { 1405 Helper.Observer.changingInstr(MI); 1406 Helper.widenScalarSrc(MI, sXLen, 1, TargetOpcode::G_ANYEXT); 1407 Helper.widenScalarSrc(MI, sXLen, 2, TargetOpcode::G_ANYEXT); 1408 Helper.widenScalarDst(MI, sXLen); 1409 MI.setDesc(MIRBuilder.getTII().get(getRISCVWOpcode(MI.getOpcode()))); 1410 Helper.Observer.changedInstr(MI); 1411 return true; 1412 } 1413 case TargetOpcode::G_CTLZ: 1414 case TargetOpcode::G_CTTZ: { 1415 Helper.Observer.changingInstr(MI); 1416 Helper.widenScalarSrc(MI, sXLen, 1, TargetOpcode::G_ANYEXT); 1417 Helper.widenScalarDst(MI, sXLen); 1418 MI.setDesc(MIRBuilder.getTII().get(getRISCVWOpcode(MI.getOpcode()))); 1419 Helper.Observer.changedInstr(MI); 1420 return true; 1421 } 1422 case TargetOpcode::G_FPTOSI: 1423 case TargetOpcode::G_FPTOUI: { 1424 Helper.Observer.changingInstr(MI); 1425 Helper.widenScalarDst(MI, sXLen); 1426 MI.setDesc(MIRBuilder.getTII().get(getRISCVWOpcode(MI.getOpcode()))); 1427 MI.addOperand(MachineOperand::CreateImm(RISCVFPRndMode::RTZ)); 1428 Helper.Observer.changedInstr(MI); 1429 return true; 1430 } 1431 case TargetOpcode::G_IS_FPCLASS: { 1432 Register GISFPCLASS = MI.getOperand(0).getReg(); 1433 Register Src = MI.getOperand(1).getReg(); 1434 const MachineOperand &ImmOp = MI.getOperand(2); 1435 MachineIRBuilder MIB(MI); 1436 1437 // Turn LLVM IR's floating point classes to that in RISC-V, 1438 // by simply rotating the 10-bit immediate right by two bits. 1439 APInt GFpClassImm(10, static_cast<uint64_t>(ImmOp.getImm())); 1440 auto FClassMask = MIB.buildConstant(sXLen, GFpClassImm.rotr(2).zext(XLen)); 1441 auto ConstZero = MIB.buildConstant(sXLen, 0); 1442 1443 auto GFClass = MIB.buildInstr(RISCV::G_FCLASS, {sXLen}, {Src}); 1444 auto And = MIB.buildAnd(sXLen, GFClass, FClassMask); 1445 MIB.buildICmp(CmpInst::ICMP_NE, GISFPCLASS, And, ConstZero); 1446 1447 MI.eraseFromParent(); 1448 return true; 1449 } 1450 case TargetOpcode::G_BRJT: 1451 return legalizeBRJT(MI, MIRBuilder); 1452 case TargetOpcode::G_VASTART: 1453 return legalizeVAStart(MI, MIRBuilder); 1454 case TargetOpcode::G_VSCALE: 1455 return legalizeVScale(MI, MIRBuilder); 1456 case TargetOpcode::G_ZEXT: 1457 case TargetOpcode::G_SEXT: 1458 case TargetOpcode::G_ANYEXT: 1459 return legalizeExt(MI, MIRBuilder); 1460 case TargetOpcode::G_SPLAT_VECTOR: 1461 return legalizeSplatVector(MI, MIRBuilder); 1462 case TargetOpcode::G_EXTRACT_SUBVECTOR: 1463 return legalizeExtractSubvector(MI, MIRBuilder); 1464 case TargetOpcode::G_INSERT_SUBVECTOR: 1465 return legalizeInsertSubvector(MI, Helper, MIRBuilder); 1466 case TargetOpcode::G_LOAD: 1467 case TargetOpcode::G_STORE: 1468 return legalizeLoadStore(MI, Helper, MIRBuilder); 1469 } 1470 1471 llvm_unreachable("expected switch to return"); 1472 } 1473