1 //=- LoongArchISelLowering.cpp - LoongArch DAG Lowering Implementation ---===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines the interfaces that LoongArch uses to lower LLVM code into 10 // a selection DAG. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "LoongArchISelLowering.h" 15 #include "LoongArch.h" 16 #include "LoongArchMachineFunctionInfo.h" 17 #include "LoongArchRegisterInfo.h" 18 #include "LoongArchSubtarget.h" 19 #include "MCTargetDesc/LoongArchBaseInfo.h" 20 #include "MCTargetDesc/LoongArchMCTargetDesc.h" 21 #include "llvm/ADT/Statistic.h" 22 #include "llvm/ADT/StringExtras.h" 23 #include "llvm/CodeGen/ISDOpcodes.h" 24 #include "llvm/CodeGen/RuntimeLibcallUtil.h" 25 #include "llvm/CodeGen/SelectionDAGNodes.h" 26 #include "llvm/IR/IRBuilder.h" 27 #include "llvm/IR/IntrinsicInst.h" 28 #include "llvm/IR/IntrinsicsLoongArch.h" 29 #include "llvm/Support/CodeGen.h" 30 #include "llvm/Support/Debug.h" 31 #include "llvm/Support/ErrorHandling.h" 32 #include "llvm/Support/KnownBits.h" 33 #include "llvm/Support/MathExtras.h" 34 35 using namespace llvm; 36 37 #define DEBUG_TYPE "loongarch-isel-lowering" 38 39 STATISTIC(NumTailCalls, "Number of tail calls"); 40 41 static cl::opt<bool> ZeroDivCheck("loongarch-check-zero-division", cl::Hidden, 42 cl::desc("Trap on integer division by zero."), 43 cl::init(false)); 44 45 LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, 46 const LoongArchSubtarget &STI) 47 : TargetLowering(TM), Subtarget(STI) { 48 49 MVT GRLenVT = Subtarget.getGRLenVT(); 50 51 // Set up the register classes. 52 53 addRegisterClass(GRLenVT, &LoongArch::GPRRegClass); 54 if (Subtarget.hasBasicF()) 55 addRegisterClass(MVT::f32, &LoongArch::FPR32RegClass); 56 if (Subtarget.hasBasicD()) 57 addRegisterClass(MVT::f64, &LoongArch::FPR64RegClass); 58 59 static const MVT::SimpleValueType LSXVTs[] = { 60 MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32, MVT::v2f64}; 61 static const MVT::SimpleValueType LASXVTs[] = { 62 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64, MVT::v8f32, MVT::v4f64}; 63 64 if (Subtarget.hasExtLSX()) 65 for (MVT VT : LSXVTs) 66 addRegisterClass(VT, &LoongArch::LSX128RegClass); 67 68 if (Subtarget.hasExtLASX()) 69 for (MVT VT : LASXVTs) 70 addRegisterClass(VT, &LoongArch::LASX256RegClass); 71 72 // Set operations for LA32 and LA64. 73 74 setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, GRLenVT, 75 MVT::i1, Promote); 76 77 setOperationAction(ISD::SHL_PARTS, GRLenVT, Custom); 78 setOperationAction(ISD::SRA_PARTS, GRLenVT, Custom); 79 setOperationAction(ISD::SRL_PARTS, GRLenVT, Custom); 80 setOperationAction(ISD::FP_TO_SINT, GRLenVT, Custom); 81 setOperationAction(ISD::ROTL, GRLenVT, Expand); 82 setOperationAction(ISD::CTPOP, GRLenVT, Expand); 83 84 setOperationAction({ISD::GlobalAddress, ISD::BlockAddress, ISD::ConstantPool, 85 ISD::JumpTable, ISD::GlobalTLSAddress}, 86 GRLenVT, Custom); 87 88 setOperationAction(ISD::EH_DWARF_CFA, GRLenVT, Custom); 89 90 setOperationAction(ISD::DYNAMIC_STACKALLOC, GRLenVT, Expand); 91 setOperationAction({ISD::STACKSAVE, ISD::STACKRESTORE}, MVT::Other, Expand); 92 setOperationAction(ISD::VASTART, MVT::Other, Custom); 93 setOperationAction({ISD::VAARG, ISD::VACOPY, ISD::VAEND}, MVT::Other, Expand); 94 95 setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal); 96 setOperationAction(ISD::TRAP, MVT::Other, Legal); 97 98 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); 99 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); 100 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 101 102 setOperationAction(ISD::PREFETCH, MVT::Other, Legal); 103 104 // Expand bitreverse.i16 with native-width bitrev and shift for now, before 105 // we get to know which of sll and revb.2h is faster. 106 setOperationAction(ISD::BITREVERSE, MVT::i8, Custom); 107 setOperationAction(ISD::BITREVERSE, GRLenVT, Legal); 108 109 // LA32 does not have REVB.2W and REVB.D due to the 64-bit operands, and 110 // the narrower REVB.W does not exist. But LA32 does have REVB.2H, so i16 111 // and i32 could still be byte-swapped relatively cheaply. 112 setOperationAction(ISD::BSWAP, MVT::i16, Custom); 113 114 setOperationAction(ISD::BR_JT, MVT::Other, Expand); 115 setOperationAction(ISD::BR_CC, GRLenVT, Expand); 116 setOperationAction(ISD::SELECT_CC, GRLenVT, Expand); 117 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); 118 setOperationAction({ISD::SMUL_LOHI, ISD::UMUL_LOHI}, GRLenVT, Expand); 119 120 setOperationAction(ISD::FP_TO_UINT, GRLenVT, Custom); 121 setOperationAction(ISD::UINT_TO_FP, GRLenVT, Expand); 122 123 // Set operations for LA64 only. 124 125 if (Subtarget.is64Bit()) { 126 setOperationAction(ISD::ADD, MVT::i32, Custom); 127 setOperationAction(ISD::SUB, MVT::i32, Custom); 128 setOperationAction(ISD::SHL, MVT::i32, Custom); 129 setOperationAction(ISD::SRA, MVT::i32, Custom); 130 setOperationAction(ISD::SRL, MVT::i32, Custom); 131 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); 132 setOperationAction(ISD::BITCAST, MVT::i32, Custom); 133 setOperationAction(ISD::ROTR, MVT::i32, Custom); 134 setOperationAction(ISD::ROTL, MVT::i32, Custom); 135 setOperationAction(ISD::CTTZ, MVT::i32, Custom); 136 setOperationAction(ISD::CTLZ, MVT::i32, Custom); 137 setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom); 138 setOperationAction(ISD::READ_REGISTER, MVT::i32, Custom); 139 setOperationAction(ISD::WRITE_REGISTER, MVT::i32, Custom); 140 setOperationAction(ISD::INTRINSIC_VOID, MVT::i32, Custom); 141 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i32, Custom); 142 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i32, Custom); 143 144 setOperationAction(ISD::BITREVERSE, MVT::i32, Custom); 145 setOperationAction(ISD::BSWAP, MVT::i32, Custom); 146 setOperationAction({ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM}, MVT::i32, 147 Custom); 148 setOperationAction(ISD::LROUND, MVT::i32, Custom); 149 } 150 151 // Set operations for LA32 only. 152 153 if (!Subtarget.is64Bit()) { 154 setOperationAction(ISD::READ_REGISTER, MVT::i64, Custom); 155 setOperationAction(ISD::WRITE_REGISTER, MVT::i64, Custom); 156 setOperationAction(ISD::INTRINSIC_VOID, MVT::i64, Custom); 157 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom); 158 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom); 159 } 160 161 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); 162 163 static const ISD::CondCode FPCCToExpand[] = { 164 ISD::SETOGT, ISD::SETOGE, ISD::SETUGT, ISD::SETUGE, 165 ISD::SETGE, ISD::SETNE, ISD::SETGT}; 166 167 // Set operations for 'F' feature. 168 169 if (Subtarget.hasBasicF()) { 170 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand); 171 setTruncStoreAction(MVT::f32, MVT::f16, Expand); 172 setCondCodeAction(FPCCToExpand, MVT::f32, Expand); 173 174 setOperationAction(ISD::SELECT_CC, MVT::f32, Expand); 175 setOperationAction(ISD::BR_CC, MVT::f32, Expand); 176 setOperationAction(ISD::FMA, MVT::f32, Legal); 177 setOperationAction(ISD::FMINNUM_IEEE, MVT::f32, Legal); 178 setOperationAction(ISD::FMAXNUM_IEEE, MVT::f32, Legal); 179 setOperationAction(ISD::STRICT_FSETCCS, MVT::f32, Legal); 180 setOperationAction(ISD::STRICT_FSETCC, MVT::f32, Legal); 181 setOperationAction(ISD::IS_FPCLASS, MVT::f32, Legal); 182 setOperationAction(ISD::FSIN, MVT::f32, Expand); 183 setOperationAction(ISD::FCOS, MVT::f32, Expand); 184 setOperationAction(ISD::FSINCOS, MVT::f32, Expand); 185 setOperationAction(ISD::FPOW, MVT::f32, Expand); 186 setOperationAction(ISD::FREM, MVT::f32, Expand); 187 setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand); 188 setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand); 189 190 if (Subtarget.is64Bit()) 191 setOperationAction(ISD::FRINT, MVT::f32, Legal); 192 193 if (!Subtarget.hasBasicD()) { 194 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); 195 if (Subtarget.is64Bit()) { 196 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); 197 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom); 198 } 199 } 200 } 201 202 // Set operations for 'D' feature. 203 204 if (Subtarget.hasBasicD()) { 205 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand); 206 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); 207 setTruncStoreAction(MVT::f64, MVT::f16, Expand); 208 setTruncStoreAction(MVT::f64, MVT::f32, Expand); 209 setCondCodeAction(FPCCToExpand, MVT::f64, Expand); 210 211 setOperationAction(ISD::SELECT_CC, MVT::f64, Expand); 212 setOperationAction(ISD::BR_CC, MVT::f64, Expand); 213 setOperationAction(ISD::STRICT_FSETCCS, MVT::f64, Legal); 214 setOperationAction(ISD::STRICT_FSETCC, MVT::f64, Legal); 215 setOperationAction(ISD::FMA, MVT::f64, Legal); 216 setOperationAction(ISD::FMINNUM_IEEE, MVT::f64, Legal); 217 setOperationAction(ISD::FMAXNUM_IEEE, MVT::f64, Legal); 218 setOperationAction(ISD::IS_FPCLASS, MVT::f64, Legal); 219 setOperationAction(ISD::FSIN, MVT::f64, Expand); 220 setOperationAction(ISD::FCOS, MVT::f64, Expand); 221 setOperationAction(ISD::FSINCOS, MVT::f64, Expand); 222 setOperationAction(ISD::FPOW, MVT::f64, Expand); 223 setOperationAction(ISD::FREM, MVT::f64, Expand); 224 setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand); 225 setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand); 226 227 if (Subtarget.is64Bit()) 228 setOperationAction(ISD::FRINT, MVT::f64, Legal); 229 } 230 231 // Set operations for 'LSX' feature. 232 233 if (Subtarget.hasExtLSX()) { 234 for (MVT VT : MVT::fixedlen_vector_valuetypes()) { 235 // Expand all truncating stores and extending loads. 236 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) { 237 setTruncStoreAction(VT, InnerVT, Expand); 238 setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand); 239 setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand); 240 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand); 241 } 242 // By default everything must be expanded. Then we will selectively turn 243 // on ones that can be effectively codegen'd. 244 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) 245 setOperationAction(Op, VT, Expand); 246 } 247 248 for (MVT VT : LSXVTs) { 249 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Legal); 250 setOperationAction(ISD::BITCAST, VT, Legal); 251 setOperationAction(ISD::UNDEF, VT, Legal); 252 253 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); 254 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Legal); 255 setOperationAction(ISD::BUILD_VECTOR, VT, Custom); 256 257 setOperationAction(ISD::SETCC, VT, Legal); 258 setOperationAction(ISD::VSELECT, VT, Legal); 259 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); 260 } 261 for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) { 262 setOperationAction({ISD::ADD, ISD::SUB}, VT, Legal); 263 setOperationAction({ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN}, VT, 264 Legal); 265 setOperationAction({ISD::MUL, ISD::SDIV, ISD::SREM, ISD::UDIV, ISD::UREM}, 266 VT, Legal); 267 setOperationAction({ISD::AND, ISD::OR, ISD::XOR}, VT, Legal); 268 setOperationAction({ISD::SHL, ISD::SRA, ISD::SRL}, VT, Legal); 269 setOperationAction({ISD::CTPOP, ISD::CTLZ}, VT, Legal); 270 setOperationAction({ISD::MULHS, ISD::MULHU}, VT, Legal); 271 setCondCodeAction( 272 {ISD::SETNE, ISD::SETGE, ISD::SETGT, ISD::SETUGE, ISD::SETUGT}, VT, 273 Expand); 274 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom); 275 } 276 for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32}) 277 setOperationAction(ISD::BITREVERSE, VT, Custom); 278 for (MVT VT : {MVT::v8i16, MVT::v4i32, MVT::v2i64}) 279 setOperationAction(ISD::BSWAP, VT, Legal); 280 for (MVT VT : {MVT::v4i32, MVT::v2i64}) { 281 setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP}, VT, Legal); 282 setOperationAction({ISD::FP_TO_SINT, ISD::FP_TO_UINT}, VT, Legal); 283 } 284 for (MVT VT : {MVT::v4f32, MVT::v2f64}) { 285 setOperationAction({ISD::FADD, ISD::FSUB}, VT, Legal); 286 setOperationAction({ISD::FMUL, ISD::FDIV}, VT, Legal); 287 setOperationAction(ISD::FMA, VT, Legal); 288 setOperationAction(ISD::FSQRT, VT, Legal); 289 setOperationAction(ISD::FNEG, VT, Legal); 290 setCondCodeAction({ISD::SETGE, ISD::SETGT, ISD::SETOGE, ISD::SETOGT, 291 ISD::SETUGE, ISD::SETUGT}, 292 VT, Expand); 293 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Legal); 294 } 295 setOperationAction(ISD::CTPOP, GRLenVT, Legal); 296 setOperationAction(ISD::FCEIL, {MVT::f32, MVT::f64}, Legal); 297 setOperationAction(ISD::FFLOOR, {MVT::f32, MVT::f64}, Legal); 298 setOperationAction(ISD::FTRUNC, {MVT::f32, MVT::f64}, Legal); 299 setOperationAction(ISD::FROUNDEVEN, {MVT::f32, MVT::f64}, Legal); 300 } 301 302 // Set operations for 'LASX' feature. 303 304 if (Subtarget.hasExtLASX()) { 305 for (MVT VT : LASXVTs) { 306 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Legal); 307 setOperationAction(ISD::BITCAST, VT, Legal); 308 setOperationAction(ISD::UNDEF, VT, Legal); 309 310 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); 311 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); 312 setOperationAction(ISD::BUILD_VECTOR, VT, Custom); 313 setOperationAction(ISD::CONCAT_VECTORS, VT, Legal); 314 315 setOperationAction(ISD::SETCC, VT, Legal); 316 setOperationAction(ISD::VSELECT, VT, Legal); 317 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); 318 } 319 for (MVT VT : {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8}) { 320 setOperationAction({ISD::ADD, ISD::SUB}, VT, Legal); 321 setOperationAction({ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN}, VT, 322 Legal); 323 setOperationAction({ISD::MUL, ISD::SDIV, ISD::SREM, ISD::UDIV, ISD::UREM}, 324 VT, Legal); 325 setOperationAction({ISD::AND, ISD::OR, ISD::XOR}, VT, Legal); 326 setOperationAction({ISD::SHL, ISD::SRA, ISD::SRL}, VT, Legal); 327 setOperationAction({ISD::CTPOP, ISD::CTLZ}, VT, Legal); 328 setOperationAction({ISD::MULHS, ISD::MULHU}, VT, Legal); 329 setCondCodeAction( 330 {ISD::SETNE, ISD::SETGE, ISD::SETGT, ISD::SETUGE, ISD::SETUGT}, VT, 331 Expand); 332 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom); 333 } 334 for (MVT VT : {MVT::v32i8, MVT::v16i16, MVT::v8i32}) 335 setOperationAction(ISD::BITREVERSE, VT, Custom); 336 for (MVT VT : {MVT::v16i16, MVT::v8i32, MVT::v4i64}) 337 setOperationAction(ISD::BSWAP, VT, Legal); 338 for (MVT VT : {MVT::v8i32, MVT::v4i32, MVT::v4i64}) { 339 setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP}, VT, Legal); 340 setOperationAction({ISD::FP_TO_SINT, ISD::FP_TO_UINT}, VT, Legal); 341 } 342 for (MVT VT : {MVT::v8f32, MVT::v4f64}) { 343 setOperationAction({ISD::FADD, ISD::FSUB}, VT, Legal); 344 setOperationAction({ISD::FMUL, ISD::FDIV}, VT, Legal); 345 setOperationAction(ISD::FMA, VT, Legal); 346 setOperationAction(ISD::FSQRT, VT, Legal); 347 setOperationAction(ISD::FNEG, VT, Legal); 348 setCondCodeAction({ISD::SETGE, ISD::SETGT, ISD::SETOGE, ISD::SETOGT, 349 ISD::SETUGE, ISD::SETUGT}, 350 VT, Expand); 351 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Legal); 352 } 353 } 354 355 // Set DAG combine for LA32 and LA64. 356 357 setTargetDAGCombine(ISD::AND); 358 setTargetDAGCombine(ISD::OR); 359 setTargetDAGCombine(ISD::SRL); 360 setTargetDAGCombine(ISD::SETCC); 361 362 // Set DAG combine for 'LSX' feature. 363 364 if (Subtarget.hasExtLSX()) 365 setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN); 366 367 // Compute derived properties from the register classes. 368 computeRegisterProperties(Subtarget.getRegisterInfo()); 369 370 setStackPointerRegisterToSaveRestore(LoongArch::R3); 371 372 setBooleanContents(ZeroOrOneBooleanContent); 373 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); 374 375 setMaxAtomicSizeInBitsSupported(Subtarget.getGRLen()); 376 377 setMinCmpXchgSizeInBits(32); 378 379 // Function alignments. 380 setMinFunctionAlignment(Align(4)); 381 // Set preferred alignments. 382 setPrefFunctionAlignment(Subtarget.getPrefFunctionAlignment()); 383 setPrefLoopAlignment(Subtarget.getPrefLoopAlignment()); 384 setMaxBytesForAlignment(Subtarget.getMaxBytesForAlignment()); 385 386 // cmpxchg sizes down to 8 bits become legal if LAMCAS is available. 387 if (Subtarget.hasLAMCAS()) 388 setMinCmpXchgSizeInBits(8); 389 390 if (Subtarget.hasSCQ()) { 391 setMaxAtomicSizeInBitsSupported(128); 392 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i128, Custom); 393 } 394 } 395 396 bool LoongArchTargetLowering::isOffsetFoldingLegal( 397 const GlobalAddressSDNode *GA) const { 398 // In order to maximise the opportunity for common subexpression elimination, 399 // keep a separate ADD node for the global address offset instead of folding 400 // it in the global address node. Later peephole optimisations may choose to 401 // fold it back in when profitable. 402 return false; 403 } 404 405 SDValue LoongArchTargetLowering::LowerOperation(SDValue Op, 406 SelectionDAG &DAG) const { 407 switch (Op.getOpcode()) { 408 case ISD::ATOMIC_FENCE: 409 return lowerATOMIC_FENCE(Op, DAG); 410 case ISD::EH_DWARF_CFA: 411 return lowerEH_DWARF_CFA(Op, DAG); 412 case ISD::GlobalAddress: 413 return lowerGlobalAddress(Op, DAG); 414 case ISD::GlobalTLSAddress: 415 return lowerGlobalTLSAddress(Op, DAG); 416 case ISD::INTRINSIC_WO_CHAIN: 417 return lowerINTRINSIC_WO_CHAIN(Op, DAG); 418 case ISD::INTRINSIC_W_CHAIN: 419 return lowerINTRINSIC_W_CHAIN(Op, DAG); 420 case ISD::INTRINSIC_VOID: 421 return lowerINTRINSIC_VOID(Op, DAG); 422 case ISD::BlockAddress: 423 return lowerBlockAddress(Op, DAG); 424 case ISD::JumpTable: 425 return lowerJumpTable(Op, DAG); 426 case ISD::SHL_PARTS: 427 return lowerShiftLeftParts(Op, DAG); 428 case ISD::SRA_PARTS: 429 return lowerShiftRightParts(Op, DAG, true); 430 case ISD::SRL_PARTS: 431 return lowerShiftRightParts(Op, DAG, false); 432 case ISD::ConstantPool: 433 return lowerConstantPool(Op, DAG); 434 case ISD::FP_TO_SINT: 435 return lowerFP_TO_SINT(Op, DAG); 436 case ISD::BITCAST: 437 return lowerBITCAST(Op, DAG); 438 case ISD::UINT_TO_FP: 439 return lowerUINT_TO_FP(Op, DAG); 440 case ISD::SINT_TO_FP: 441 return lowerSINT_TO_FP(Op, DAG); 442 case ISD::VASTART: 443 return lowerVASTART(Op, DAG); 444 case ISD::FRAMEADDR: 445 return lowerFRAMEADDR(Op, DAG); 446 case ISD::RETURNADDR: 447 return lowerRETURNADDR(Op, DAG); 448 case ISD::WRITE_REGISTER: 449 return lowerWRITE_REGISTER(Op, DAG); 450 case ISD::INSERT_VECTOR_ELT: 451 return lowerINSERT_VECTOR_ELT(Op, DAG); 452 case ISD::EXTRACT_VECTOR_ELT: 453 return lowerEXTRACT_VECTOR_ELT(Op, DAG); 454 case ISD::BUILD_VECTOR: 455 return lowerBUILD_VECTOR(Op, DAG); 456 case ISD::VECTOR_SHUFFLE: 457 return lowerVECTOR_SHUFFLE(Op, DAG); 458 case ISD::BITREVERSE: 459 return lowerBITREVERSE(Op, DAG); 460 case ISD::SCALAR_TO_VECTOR: 461 return lowerSCALAR_TO_VECTOR(Op, DAG); 462 } 463 return SDValue(); 464 } 465 466 SDValue 467 LoongArchTargetLowering::lowerSCALAR_TO_VECTOR(SDValue Op, 468 SelectionDAG &DAG) const { 469 SDLoc DL(Op); 470 MVT OpVT = Op.getSimpleValueType(); 471 472 SDValue Vector = DAG.getUNDEF(OpVT); 473 SDValue Val = Op.getOperand(0); 474 SDValue Idx = DAG.getConstant(0, DL, Subtarget.getGRLenVT()); 475 476 return DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, OpVT, Vector, Val, Idx); 477 } 478 479 SDValue LoongArchTargetLowering::lowerBITREVERSE(SDValue Op, 480 SelectionDAG &DAG) const { 481 EVT ResTy = Op->getValueType(0); 482 SDValue Src = Op->getOperand(0); 483 SDLoc DL(Op); 484 485 EVT NewVT = ResTy.is128BitVector() ? MVT::v2i64 : MVT::v4i64; 486 unsigned int OrigEltNum = ResTy.getVectorNumElements(); 487 unsigned int NewEltNum = NewVT.getVectorNumElements(); 488 489 SDValue NewSrc = DAG.getNode(ISD::BITCAST, DL, NewVT, Src); 490 491 SmallVector<SDValue, 8> Ops; 492 for (unsigned int i = 0; i < NewEltNum; i++) { 493 SDValue Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, NewSrc, 494 DAG.getConstant(i, DL, MVT::i64)); 495 unsigned RevOp = (ResTy == MVT::v16i8 || ResTy == MVT::v32i8) 496 ? (unsigned)LoongArchISD::BITREV_8B 497 : (unsigned)ISD::BITREVERSE; 498 Ops.push_back(DAG.getNode(RevOp, DL, MVT::i64, Op)); 499 } 500 SDValue Res = 501 DAG.getNode(ISD::BITCAST, DL, ResTy, DAG.getBuildVector(NewVT, DL, Ops)); 502 503 switch (ResTy.getSimpleVT().SimpleTy) { 504 default: 505 return SDValue(); 506 case MVT::v16i8: 507 case MVT::v32i8: 508 return Res; 509 case MVT::v8i16: 510 case MVT::v16i16: 511 case MVT::v4i32: 512 case MVT::v8i32: { 513 SmallVector<int, 32> Mask; 514 for (unsigned int i = 0; i < NewEltNum; i++) 515 for (int j = OrigEltNum / NewEltNum - 1; j >= 0; j--) 516 Mask.push_back(j + (OrigEltNum / NewEltNum) * i); 517 return DAG.getVectorShuffle(ResTy, DL, Res, DAG.getUNDEF(ResTy), Mask); 518 } 519 } 520 } 521 522 /// Determine whether a range fits a regular pattern of values. 523 /// This function accounts for the possibility of jumping over the End iterator. 524 template <typename ValType> 525 static bool 526 fitsRegularPattern(typename SmallVectorImpl<ValType>::const_iterator Begin, 527 unsigned CheckStride, 528 typename SmallVectorImpl<ValType>::const_iterator End, 529 ValType ExpectedIndex, unsigned ExpectedIndexStride) { 530 auto &I = Begin; 531 532 while (I != End) { 533 if (*I != -1 && *I != ExpectedIndex) 534 return false; 535 ExpectedIndex += ExpectedIndexStride; 536 537 // Incrementing past End is undefined behaviour so we must increment one 538 // step at a time and check for End at each step. 539 for (unsigned n = 0; n < CheckStride && I != End; ++n, ++I) 540 ; // Empty loop body. 541 } 542 return true; 543 } 544 545 /// Lower VECTOR_SHUFFLE into VREPLVEI (if possible). 546 /// 547 /// VREPLVEI performs vector broadcast based on an element specified by an 548 /// integer immediate, with its mask being similar to: 549 /// <x, x, x, ...> 550 /// where x is any valid index. 551 /// 552 /// When undef's appear in the mask they are treated as if they were whatever 553 /// value is necessary in order to fit the above form. 554 static SDValue lowerVECTOR_SHUFFLE_VREPLVEI(const SDLoc &DL, ArrayRef<int> Mask, 555 MVT VT, SDValue V1, SDValue V2, 556 SelectionDAG &DAG) { 557 int SplatIndex = -1; 558 for (const auto &M : Mask) { 559 if (M != -1) { 560 SplatIndex = M; 561 break; 562 } 563 } 564 565 if (SplatIndex == -1) 566 return DAG.getUNDEF(VT); 567 568 assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index"); 569 if (fitsRegularPattern<int>(Mask.begin(), 1, Mask.end(), SplatIndex, 0)) { 570 APInt Imm(64, SplatIndex); 571 return DAG.getNode(LoongArchISD::VREPLVEI, DL, VT, V1, 572 DAG.getConstant(Imm, DL, MVT::i64)); 573 } 574 575 return SDValue(); 576 } 577 578 /// Lower VECTOR_SHUFFLE into VSHUF4I (if possible). 579 /// 580 /// VSHUF4I splits the vector into blocks of four elements, then shuffles these 581 /// elements according to a <4 x i2> constant (encoded as an integer immediate). 582 /// 583 /// It is therefore possible to lower into VSHUF4I when the mask takes the form: 584 /// <a, b, c, d, a+4, b+4, c+4, d+4, a+8, b+8, c+8, d+8, ...> 585 /// When undef's appear they are treated as if they were whatever value is 586 /// necessary in order to fit the above forms. 587 /// 588 /// For example: 589 /// %2 = shufflevector <8 x i16> %0, <8 x i16> undef, 590 /// <8 x i32> <i32 3, i32 2, i32 1, i32 0, 591 /// i32 7, i32 6, i32 5, i32 4> 592 /// is lowered to: 593 /// (VSHUF4I_H $v0, $v1, 27) 594 /// where the 27 comes from: 595 /// 3 + (2 << 2) + (1 << 4) + (0 << 6) 596 static SDValue lowerVECTOR_SHUFFLE_VSHUF4I(const SDLoc &DL, ArrayRef<int> Mask, 597 MVT VT, SDValue V1, SDValue V2, 598 SelectionDAG &DAG) { 599 600 // When the size is less than 4, lower cost instructions may be used. 601 if (Mask.size() < 4) 602 return SDValue(); 603 604 int SubMask[4] = {-1, -1, -1, -1}; 605 for (unsigned i = 0; i < 4; ++i) { 606 for (unsigned j = i; j < Mask.size(); j += 4) { 607 int Idx = Mask[j]; 608 609 // Convert from vector index to 4-element subvector index 610 // If an index refers to an element outside of the subvector then give up 611 if (Idx != -1) { 612 Idx -= 4 * (j / 4); 613 if (Idx < 0 || Idx >= 4) 614 return SDValue(); 615 } 616 617 // If the mask has an undef, replace it with the current index. 618 // Note that it might still be undef if the current index is also undef 619 if (SubMask[i] == -1) 620 SubMask[i] = Idx; 621 // Check that non-undef values are the same as in the mask. If they 622 // aren't then give up 623 else if (Idx != -1 && Idx != SubMask[i]) 624 return SDValue(); 625 } 626 } 627 628 // Calculate the immediate. Replace any remaining undefs with zero 629 APInt Imm(64, 0); 630 for (int i = 3; i >= 0; --i) { 631 int Idx = SubMask[i]; 632 633 if (Idx == -1) 634 Idx = 0; 635 636 Imm <<= 2; 637 Imm |= Idx & 0x3; 638 } 639 640 return DAG.getNode(LoongArchISD::VSHUF4I, DL, VT, V1, 641 DAG.getConstant(Imm, DL, MVT::i64)); 642 } 643 644 /// Lower VECTOR_SHUFFLE into VPACKEV (if possible). 645 /// 646 /// VPACKEV interleaves the even elements from each vector. 647 /// 648 /// It is possible to lower into VPACKEV when the mask consists of two of the 649 /// following forms interleaved: 650 /// <0, 2, 4, ...> 651 /// <n, n+2, n+4, ...> 652 /// where n is the number of elements in the vector. 653 /// For example: 654 /// <0, 0, 2, 2, 4, 4, ...> 655 /// <0, n, 2, n+2, 4, n+4, ...> 656 /// 657 /// When undef's appear in the mask they are treated as if they were whatever 658 /// value is necessary in order to fit the above forms. 659 static SDValue lowerVECTOR_SHUFFLE_VPACKEV(const SDLoc &DL, ArrayRef<int> Mask, 660 MVT VT, SDValue V1, SDValue V2, 661 SelectionDAG &DAG) { 662 663 const auto &Begin = Mask.begin(); 664 const auto &End = Mask.end(); 665 SDValue OriV1 = V1, OriV2 = V2; 666 667 if (fitsRegularPattern<int>(Begin, 2, End, 0, 2)) 668 V1 = OriV1; 669 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size(), 2)) 670 V1 = OriV2; 671 else 672 return SDValue(); 673 674 if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 2)) 675 V2 = OriV1; 676 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size(), 2)) 677 V2 = OriV2; 678 else 679 return SDValue(); 680 681 return DAG.getNode(LoongArchISD::VPACKEV, DL, VT, V2, V1); 682 } 683 684 /// Lower VECTOR_SHUFFLE into VPACKOD (if possible). 685 /// 686 /// VPACKOD interleaves the odd elements from each vector. 687 /// 688 /// It is possible to lower into VPACKOD when the mask consists of two of the 689 /// following forms interleaved: 690 /// <1, 3, 5, ...> 691 /// <n+1, n+3, n+5, ...> 692 /// where n is the number of elements in the vector. 693 /// For example: 694 /// <1, 1, 3, 3, 5, 5, ...> 695 /// <1, n+1, 3, n+3, 5, n+5, ...> 696 /// 697 /// When undef's appear in the mask they are treated as if they were whatever 698 /// value is necessary in order to fit the above forms. 699 static SDValue lowerVECTOR_SHUFFLE_VPACKOD(const SDLoc &DL, ArrayRef<int> Mask, 700 MVT VT, SDValue V1, SDValue V2, 701 SelectionDAG &DAG) { 702 703 const auto &Begin = Mask.begin(); 704 const auto &End = Mask.end(); 705 SDValue OriV1 = V1, OriV2 = V2; 706 707 if (fitsRegularPattern<int>(Begin, 2, End, 1, 2)) 708 V1 = OriV1; 709 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size() + 1, 2)) 710 V1 = OriV2; 711 else 712 return SDValue(); 713 714 if (fitsRegularPattern<int>(Begin + 1, 2, End, 1, 2)) 715 V2 = OriV1; 716 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size() + 1, 2)) 717 V2 = OriV2; 718 else 719 return SDValue(); 720 721 return DAG.getNode(LoongArchISD::VPACKOD, DL, VT, V2, V1); 722 } 723 724 /// Lower VECTOR_SHUFFLE into VILVH (if possible). 725 /// 726 /// VILVH interleaves consecutive elements from the left (highest-indexed) half 727 /// of each vector. 728 /// 729 /// It is possible to lower into VILVH when the mask consists of two of the 730 /// following forms interleaved: 731 /// <x, x+1, x+2, ...> 732 /// <n+x, n+x+1, n+x+2, ...> 733 /// where n is the number of elements in the vector and x is half n. 734 /// For example: 735 /// <x, x, x+1, x+1, x+2, x+2, ...> 736 /// <x, n+x, x+1, n+x+1, x+2, n+x+2, ...> 737 /// 738 /// When undef's appear in the mask they are treated as if they were whatever 739 /// value is necessary in order to fit the above forms. 740 static SDValue lowerVECTOR_SHUFFLE_VILVH(const SDLoc &DL, ArrayRef<int> Mask, 741 MVT VT, SDValue V1, SDValue V2, 742 SelectionDAG &DAG) { 743 744 const auto &Begin = Mask.begin(); 745 const auto &End = Mask.end(); 746 unsigned HalfSize = Mask.size() / 2; 747 SDValue OriV1 = V1, OriV2 = V2; 748 749 if (fitsRegularPattern<int>(Begin, 2, End, HalfSize, 1)) 750 V1 = OriV1; 751 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size() + HalfSize, 1)) 752 V1 = OriV2; 753 else 754 return SDValue(); 755 756 if (fitsRegularPattern<int>(Begin + 1, 2, End, HalfSize, 1)) 757 V2 = OriV1; 758 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size() + HalfSize, 759 1)) 760 V2 = OriV2; 761 else 762 return SDValue(); 763 764 return DAG.getNode(LoongArchISD::VILVH, DL, VT, V2, V1); 765 } 766 767 /// Lower VECTOR_SHUFFLE into VILVL (if possible). 768 /// 769 /// VILVL interleaves consecutive elements from the right (lowest-indexed) half 770 /// of each vector. 771 /// 772 /// It is possible to lower into VILVL when the mask consists of two of the 773 /// following forms interleaved: 774 /// <0, 1, 2, ...> 775 /// <n, n+1, n+2, ...> 776 /// where n is the number of elements in the vector. 777 /// For example: 778 /// <0, 0, 1, 1, 2, 2, ...> 779 /// <0, n, 1, n+1, 2, n+2, ...> 780 /// 781 /// When undef's appear in the mask they are treated as if they were whatever 782 /// value is necessary in order to fit the above forms. 783 static SDValue lowerVECTOR_SHUFFLE_VILVL(const SDLoc &DL, ArrayRef<int> Mask, 784 MVT VT, SDValue V1, SDValue V2, 785 SelectionDAG &DAG) { 786 787 const auto &Begin = Mask.begin(); 788 const auto &End = Mask.end(); 789 SDValue OriV1 = V1, OriV2 = V2; 790 791 if (fitsRegularPattern<int>(Begin, 2, End, 0, 1)) 792 V1 = OriV1; 793 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size(), 1)) 794 V1 = OriV2; 795 else 796 return SDValue(); 797 798 if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 1)) 799 V2 = OriV1; 800 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size(), 1)) 801 V2 = OriV2; 802 else 803 return SDValue(); 804 805 return DAG.getNode(LoongArchISD::VILVL, DL, VT, V2, V1); 806 } 807 808 /// Lower VECTOR_SHUFFLE into VPICKEV (if possible). 809 /// 810 /// VPICKEV copies the even elements of each vector into the result vector. 811 /// 812 /// It is possible to lower into VPICKEV when the mask consists of two of the 813 /// following forms concatenated: 814 /// <0, 2, 4, ...> 815 /// <n, n+2, n+4, ...> 816 /// where n is the number of elements in the vector. 817 /// For example: 818 /// <0, 2, 4, ..., 0, 2, 4, ...> 819 /// <0, 2, 4, ..., n, n+2, n+4, ...> 820 /// 821 /// When undef's appear in the mask they are treated as if they were whatever 822 /// value is necessary in order to fit the above forms. 823 static SDValue lowerVECTOR_SHUFFLE_VPICKEV(const SDLoc &DL, ArrayRef<int> Mask, 824 MVT VT, SDValue V1, SDValue V2, 825 SelectionDAG &DAG) { 826 827 const auto &Begin = Mask.begin(); 828 const auto &Mid = Mask.begin() + Mask.size() / 2; 829 const auto &End = Mask.end(); 830 SDValue OriV1 = V1, OriV2 = V2; 831 832 if (fitsRegularPattern<int>(Begin, 1, Mid, 0, 2)) 833 V1 = OriV1; 834 else if (fitsRegularPattern<int>(Begin, 1, Mid, Mask.size(), 2)) 835 V1 = OriV2; 836 else 837 return SDValue(); 838 839 if (fitsRegularPattern<int>(Mid, 1, End, 0, 2)) 840 V2 = OriV1; 841 else if (fitsRegularPattern<int>(Mid, 1, End, Mask.size(), 2)) 842 V2 = OriV2; 843 844 else 845 return SDValue(); 846 847 return DAG.getNode(LoongArchISD::VPICKEV, DL, VT, V2, V1); 848 } 849 850 /// Lower VECTOR_SHUFFLE into VPICKOD (if possible). 851 /// 852 /// VPICKOD copies the odd elements of each vector into the result vector. 853 /// 854 /// It is possible to lower into VPICKOD when the mask consists of two of the 855 /// following forms concatenated: 856 /// <1, 3, 5, ...> 857 /// <n+1, n+3, n+5, ...> 858 /// where n is the number of elements in the vector. 859 /// For example: 860 /// <1, 3, 5, ..., 1, 3, 5, ...> 861 /// <1, 3, 5, ..., n+1, n+3, n+5, ...> 862 /// 863 /// When undef's appear in the mask they are treated as if they were whatever 864 /// value is necessary in order to fit the above forms. 865 static SDValue lowerVECTOR_SHUFFLE_VPICKOD(const SDLoc &DL, ArrayRef<int> Mask, 866 MVT VT, SDValue V1, SDValue V2, 867 SelectionDAG &DAG) { 868 869 const auto &Begin = Mask.begin(); 870 const auto &Mid = Mask.begin() + Mask.size() / 2; 871 const auto &End = Mask.end(); 872 SDValue OriV1 = V1, OriV2 = V2; 873 874 if (fitsRegularPattern<int>(Begin, 1, Mid, 1, 2)) 875 V1 = OriV1; 876 else if (fitsRegularPattern<int>(Begin, 1, Mid, Mask.size() + 1, 2)) 877 V1 = OriV2; 878 else 879 return SDValue(); 880 881 if (fitsRegularPattern<int>(Mid, 1, End, 1, 2)) 882 V2 = OriV1; 883 else if (fitsRegularPattern<int>(Mid, 1, End, Mask.size() + 1, 2)) 884 V2 = OriV2; 885 else 886 return SDValue(); 887 888 return DAG.getNode(LoongArchISD::VPICKOD, DL, VT, V2, V1); 889 } 890 891 /// Lower VECTOR_SHUFFLE into VSHUF. 892 /// 893 /// This mostly consists of converting the shuffle mask into a BUILD_VECTOR and 894 /// adding it as an operand to the resulting VSHUF. 895 static SDValue lowerVECTOR_SHUFFLE_VSHUF(const SDLoc &DL, ArrayRef<int> Mask, 896 MVT VT, SDValue V1, SDValue V2, 897 SelectionDAG &DAG) { 898 899 SmallVector<SDValue, 16> Ops; 900 for (auto M : Mask) 901 Ops.push_back(DAG.getConstant(M, DL, MVT::i64)); 902 903 EVT MaskVecTy = VT.changeVectorElementTypeToInteger(); 904 SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, Ops); 905 906 // VECTOR_SHUFFLE concatenates the vectors in an vectorwise fashion. 907 // <0b00, 0b01> + <0b10, 0b11> -> <0b00, 0b01, 0b10, 0b11> 908 // VSHF concatenates the vectors in a bitwise fashion: 909 // <0b00, 0b01> + <0b10, 0b11> -> 910 // 0b0100 + 0b1110 -> 0b01001110 911 // <0b10, 0b11, 0b00, 0b01> 912 // We must therefore swap the operands to get the correct result. 913 return DAG.getNode(LoongArchISD::VSHUF, DL, VT, MaskVec, V2, V1); 914 } 915 916 /// Dispatching routine to lower various 128-bit LoongArch vector shuffles. 917 /// 918 /// This routine breaks down the specific type of 128-bit shuffle and 919 /// dispatches to the lowering routines accordingly. 920 static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT, 921 SDValue V1, SDValue V2, SelectionDAG &DAG) { 922 assert((VT.SimpleTy == MVT::v16i8 || VT.SimpleTy == MVT::v8i16 || 923 VT.SimpleTy == MVT::v4i32 || VT.SimpleTy == MVT::v2i64 || 924 VT.SimpleTy == MVT::v4f32 || VT.SimpleTy == MVT::v2f64) && 925 "Vector type is unsupported for lsx!"); 926 assert(V1.getSimpleValueType() == V2.getSimpleValueType() && 927 "Two operands have different types!"); 928 assert(VT.getVectorNumElements() == Mask.size() && 929 "Unexpected mask size for shuffle!"); 930 assert(Mask.size() % 2 == 0 && "Expected even mask size."); 931 932 SDValue Result; 933 // TODO: Add more comparison patterns. 934 if (V2.isUndef()) { 935 if ((Result = lowerVECTOR_SHUFFLE_VREPLVEI(DL, Mask, VT, V1, V2, DAG))) 936 return Result; 937 if ((Result = lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG))) 938 return Result; 939 940 // TODO: This comment may be enabled in the future to better match the 941 // pattern for instruction selection. 942 /* V2 = V1; */ 943 } 944 945 // It is recommended not to change the pattern comparison order for better 946 // performance. 947 if ((Result = lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG))) 948 return Result; 949 if ((Result = lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG))) 950 return Result; 951 if ((Result = lowerVECTOR_SHUFFLE_VILVH(DL, Mask, VT, V1, V2, DAG))) 952 return Result; 953 if ((Result = lowerVECTOR_SHUFFLE_VILVL(DL, Mask, VT, V1, V2, DAG))) 954 return Result; 955 if ((Result = lowerVECTOR_SHUFFLE_VPICKEV(DL, Mask, VT, V1, V2, DAG))) 956 return Result; 957 if ((Result = lowerVECTOR_SHUFFLE_VPICKOD(DL, Mask, VT, V1, V2, DAG))) 958 return Result; 959 if ((Result = lowerVECTOR_SHUFFLE_VSHUF(DL, Mask, VT, V1, V2, DAG))) 960 return Result; 961 962 return SDValue(); 963 } 964 965 /// Lower VECTOR_SHUFFLE into XVREPLVEI (if possible). 966 /// 967 /// It is a XVREPLVEI when the mask is: 968 /// <x, x, x, ..., x+n, x+n, x+n, ...> 969 /// where the number of x is equal to n and n is half the length of vector. 970 /// 971 /// When undef's appear in the mask they are treated as if they were whatever 972 /// value is necessary in order to fit the above form. 973 static SDValue lowerVECTOR_SHUFFLE_XVREPLVEI(const SDLoc &DL, 974 ArrayRef<int> Mask, MVT VT, 975 SDValue V1, SDValue V2, 976 SelectionDAG &DAG) { 977 int SplatIndex = -1; 978 for (const auto &M : Mask) { 979 if (M != -1) { 980 SplatIndex = M; 981 break; 982 } 983 } 984 985 if (SplatIndex == -1) 986 return DAG.getUNDEF(VT); 987 988 const auto &Begin = Mask.begin(); 989 const auto &End = Mask.end(); 990 unsigned HalfSize = Mask.size() / 2; 991 992 assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index"); 993 if (fitsRegularPattern<int>(Begin, 1, End - HalfSize, SplatIndex, 0) && 994 fitsRegularPattern<int>(Begin + HalfSize, 1, End, SplatIndex + HalfSize, 995 0)) { 996 APInt Imm(64, SplatIndex); 997 return DAG.getNode(LoongArchISD::VREPLVEI, DL, VT, V1, 998 DAG.getConstant(Imm, DL, MVT::i64)); 999 } 1000 1001 return SDValue(); 1002 } 1003 1004 /// Lower VECTOR_SHUFFLE into XVSHUF4I (if possible). 1005 static SDValue lowerVECTOR_SHUFFLE_XVSHUF4I(const SDLoc &DL, ArrayRef<int> Mask, 1006 MVT VT, SDValue V1, SDValue V2, 1007 SelectionDAG &DAG) { 1008 // When the size is less than or equal to 4, lower cost instructions may be 1009 // used. 1010 if (Mask.size() <= 4) 1011 return SDValue(); 1012 return lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG); 1013 } 1014 1015 /// Lower VECTOR_SHUFFLE into XVPACKEV (if possible). 1016 static SDValue lowerVECTOR_SHUFFLE_XVPACKEV(const SDLoc &DL, ArrayRef<int> Mask, 1017 MVT VT, SDValue V1, SDValue V2, 1018 SelectionDAG &DAG) { 1019 return lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG); 1020 } 1021 1022 /// Lower VECTOR_SHUFFLE into XVPACKOD (if possible). 1023 static SDValue lowerVECTOR_SHUFFLE_XVPACKOD(const SDLoc &DL, ArrayRef<int> Mask, 1024 MVT VT, SDValue V1, SDValue V2, 1025 SelectionDAG &DAG) { 1026 return lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG); 1027 } 1028 1029 /// Lower VECTOR_SHUFFLE into XVILVH (if possible). 1030 static SDValue lowerVECTOR_SHUFFLE_XVILVH(const SDLoc &DL, ArrayRef<int> Mask, 1031 MVT VT, SDValue V1, SDValue V2, 1032 SelectionDAG &DAG) { 1033 1034 const auto &Begin = Mask.begin(); 1035 const auto &End = Mask.end(); 1036 unsigned HalfSize = Mask.size() / 2; 1037 unsigned LeftSize = HalfSize / 2; 1038 SDValue OriV1 = V1, OriV2 = V2; 1039 1040 if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, HalfSize - LeftSize, 1041 1) && 1042 fitsRegularPattern<int>(Begin + HalfSize, 2, End, HalfSize + LeftSize, 1)) 1043 V1 = OriV1; 1044 else if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, 1045 Mask.size() + HalfSize - LeftSize, 1) && 1046 fitsRegularPattern<int>(Begin + HalfSize, 2, End, 1047 Mask.size() + HalfSize + LeftSize, 1)) 1048 V1 = OriV2; 1049 else 1050 return SDValue(); 1051 1052 if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, HalfSize - LeftSize, 1053 1) && 1054 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End, HalfSize + LeftSize, 1055 1)) 1056 V2 = OriV1; 1057 else if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, 1058 Mask.size() + HalfSize - LeftSize, 1) && 1059 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End, 1060 Mask.size() + HalfSize + LeftSize, 1)) 1061 V2 = OriV2; 1062 else 1063 return SDValue(); 1064 1065 return DAG.getNode(LoongArchISD::VILVH, DL, VT, V2, V1); 1066 } 1067 1068 /// Lower VECTOR_SHUFFLE into XVILVL (if possible). 1069 static SDValue lowerVECTOR_SHUFFLE_XVILVL(const SDLoc &DL, ArrayRef<int> Mask, 1070 MVT VT, SDValue V1, SDValue V2, 1071 SelectionDAG &DAG) { 1072 1073 const auto &Begin = Mask.begin(); 1074 const auto &End = Mask.end(); 1075 unsigned HalfSize = Mask.size() / 2; 1076 SDValue OriV1 = V1, OriV2 = V2; 1077 1078 if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, 0, 1) && 1079 fitsRegularPattern<int>(Begin + HalfSize, 2, End, HalfSize, 1)) 1080 V1 = OriV1; 1081 else if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, Mask.size(), 1) && 1082 fitsRegularPattern<int>(Begin + HalfSize, 2, End, 1083 Mask.size() + HalfSize, 1)) 1084 V1 = OriV2; 1085 else 1086 return SDValue(); 1087 1088 if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, 0, 1) && 1089 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End, HalfSize, 1)) 1090 V2 = OriV1; 1091 else if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, Mask.size(), 1092 1) && 1093 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End, 1094 Mask.size() + HalfSize, 1)) 1095 V2 = OriV2; 1096 else 1097 return SDValue(); 1098 1099 return DAG.getNode(LoongArchISD::VILVL, DL, VT, V2, V1); 1100 } 1101 1102 /// Lower VECTOR_SHUFFLE into XVPICKEV (if possible). 1103 static SDValue lowerVECTOR_SHUFFLE_XVPICKEV(const SDLoc &DL, ArrayRef<int> Mask, 1104 MVT VT, SDValue V1, SDValue V2, 1105 SelectionDAG &DAG) { 1106 1107 const auto &Begin = Mask.begin(); 1108 const auto &LeftMid = Mask.begin() + Mask.size() / 4; 1109 const auto &Mid = Mask.begin() + Mask.size() / 2; 1110 const auto &RightMid = Mask.end() - Mask.size() / 4; 1111 const auto &End = Mask.end(); 1112 unsigned HalfSize = Mask.size() / 2; 1113 SDValue OriV1 = V1, OriV2 = V2; 1114 1115 if (fitsRegularPattern<int>(Begin, 1, LeftMid, 0, 2) && 1116 fitsRegularPattern<int>(Mid, 1, RightMid, HalfSize, 2)) 1117 V1 = OriV1; 1118 else if (fitsRegularPattern<int>(Begin, 1, LeftMid, Mask.size(), 2) && 1119 fitsRegularPattern<int>(Mid, 1, RightMid, Mask.size() + HalfSize, 2)) 1120 V1 = OriV2; 1121 else 1122 return SDValue(); 1123 1124 if (fitsRegularPattern<int>(LeftMid, 1, Mid, 0, 2) && 1125 fitsRegularPattern<int>(RightMid, 1, End, HalfSize, 2)) 1126 V2 = OriV1; 1127 else if (fitsRegularPattern<int>(LeftMid, 1, Mid, Mask.size(), 2) && 1128 fitsRegularPattern<int>(RightMid, 1, End, Mask.size() + HalfSize, 2)) 1129 V2 = OriV2; 1130 1131 else 1132 return SDValue(); 1133 1134 return DAG.getNode(LoongArchISD::VPICKEV, DL, VT, V2, V1); 1135 } 1136 1137 /// Lower VECTOR_SHUFFLE into XVPICKOD (if possible). 1138 static SDValue lowerVECTOR_SHUFFLE_XVPICKOD(const SDLoc &DL, ArrayRef<int> Mask, 1139 MVT VT, SDValue V1, SDValue V2, 1140 SelectionDAG &DAG) { 1141 1142 const auto &Begin = Mask.begin(); 1143 const auto &LeftMid = Mask.begin() + Mask.size() / 4; 1144 const auto &Mid = Mask.begin() + Mask.size() / 2; 1145 const auto &RightMid = Mask.end() - Mask.size() / 4; 1146 const auto &End = Mask.end(); 1147 unsigned HalfSize = Mask.size() / 2; 1148 SDValue OriV1 = V1, OriV2 = V2; 1149 1150 if (fitsRegularPattern<int>(Begin, 1, LeftMid, 1, 2) && 1151 fitsRegularPattern<int>(Mid, 1, RightMid, HalfSize + 1, 2)) 1152 V1 = OriV1; 1153 else if (fitsRegularPattern<int>(Begin, 1, LeftMid, Mask.size() + 1, 2) && 1154 fitsRegularPattern<int>(Mid, 1, RightMid, Mask.size() + HalfSize + 1, 1155 2)) 1156 V1 = OriV2; 1157 else 1158 return SDValue(); 1159 1160 if (fitsRegularPattern<int>(LeftMid, 1, Mid, 1, 2) && 1161 fitsRegularPattern<int>(RightMid, 1, End, HalfSize + 1, 2)) 1162 V2 = OriV1; 1163 else if (fitsRegularPattern<int>(LeftMid, 1, Mid, Mask.size() + 1, 2) && 1164 fitsRegularPattern<int>(RightMid, 1, End, Mask.size() + HalfSize + 1, 1165 2)) 1166 V2 = OriV2; 1167 else 1168 return SDValue(); 1169 1170 return DAG.getNode(LoongArchISD::VPICKOD, DL, VT, V2, V1); 1171 } 1172 1173 /// Lower VECTOR_SHUFFLE into XVSHUF (if possible). 1174 static SDValue lowerVECTOR_SHUFFLE_XVSHUF(const SDLoc &DL, ArrayRef<int> Mask, 1175 MVT VT, SDValue V1, SDValue V2, 1176 SelectionDAG &DAG) { 1177 1178 int MaskSize = Mask.size(); 1179 int HalfSize = Mask.size() / 2; 1180 const auto &Begin = Mask.begin(); 1181 const auto &Mid = Mask.begin() + HalfSize; 1182 const auto &End = Mask.end(); 1183 1184 // VECTOR_SHUFFLE concatenates the vectors: 1185 // <0, 1, 2, 3, 4, 5, 6, 7> + <8, 9, 10, 11, 12, 13, 14, 15> 1186 // shuffling -> 1187 // <0, 1, 2, 3, 8, 9, 10, 11> <4, 5, 6, 7, 12, 13, 14, 15> 1188 // 1189 // XVSHUF concatenates the vectors: 1190 // <a0, a1, a2, a3, b0, b1, b2, b3> + <a4, a5, a6, a7, b4, b5, b6, b7> 1191 // shuffling -> 1192 // <a0, a1, a2, a3, a4, a5, a6, a7> + <b0, b1, b2, b3, b4, b5, b6, b7> 1193 SmallVector<SDValue, 8> MaskAlloc; 1194 for (auto it = Begin; it < Mid; it++) { 1195 if (*it < 0) // UNDEF 1196 MaskAlloc.push_back(DAG.getTargetConstant(0, DL, MVT::i64)); 1197 else if ((*it >= 0 && *it < HalfSize) || 1198 (*it >= MaskSize && *it <= MaskSize + HalfSize)) { 1199 int M = *it < HalfSize ? *it : *it - HalfSize; 1200 MaskAlloc.push_back(DAG.getTargetConstant(M, DL, MVT::i64)); 1201 } else 1202 return SDValue(); 1203 } 1204 assert((int)MaskAlloc.size() == HalfSize && "xvshuf convert failed!"); 1205 1206 for (auto it = Mid; it < End; it++) { 1207 if (*it < 0) // UNDEF 1208 MaskAlloc.push_back(DAG.getTargetConstant(0, DL, MVT::i64)); 1209 else if ((*it >= HalfSize && *it < MaskSize) || 1210 (*it >= MaskSize + HalfSize && *it < MaskSize * 2)) { 1211 int M = *it < MaskSize ? *it - HalfSize : *it - MaskSize; 1212 MaskAlloc.push_back(DAG.getTargetConstant(M, DL, MVT::i64)); 1213 } else 1214 return SDValue(); 1215 } 1216 assert((int)MaskAlloc.size() == MaskSize && "xvshuf convert failed!"); 1217 1218 EVT MaskVecTy = VT.changeVectorElementTypeToInteger(); 1219 SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, MaskAlloc); 1220 return DAG.getNode(LoongArchISD::VSHUF, DL, VT, MaskVec, V2, V1); 1221 } 1222 1223 /// Shuffle vectors by lane to generate more optimized instructions. 1224 /// 256-bit shuffles are always considered as 2-lane 128-bit shuffles. 1225 /// 1226 /// Therefore, except for the following four cases, other cases are regarded 1227 /// as cross-lane shuffles, where optimization is relatively limited. 1228 /// 1229 /// - Shuffle high, low lanes of two inputs vector 1230 /// <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 3, 6> 1231 /// - Shuffle low, high lanes of two inputs vector 1232 /// <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 0, 5> 1233 /// - Shuffle low, low lanes of two inputs vector 1234 /// <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 3, 6> 1235 /// - Shuffle high, high lanes of two inputs vector 1236 /// <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 0, 5> 1237 /// 1238 /// The first case is the closest to LoongArch instructions and the other 1239 /// cases need to be converted to it for processing. 1240 /// 1241 /// This function may modify V1, V2 and Mask 1242 static void canonicalizeShuffleVectorByLane(const SDLoc &DL, 1243 MutableArrayRef<int> Mask, MVT VT, 1244 SDValue &V1, SDValue &V2, 1245 SelectionDAG &DAG) { 1246 1247 enum HalfMaskType { HighLaneTy, LowLaneTy, None }; 1248 1249 int MaskSize = Mask.size(); 1250 int HalfSize = Mask.size() / 2; 1251 1252 HalfMaskType preMask = None, postMask = None; 1253 1254 if (std::all_of(Mask.begin(), Mask.begin() + HalfSize, [&](int M) { 1255 return M < 0 || (M >= 0 && M < HalfSize) || 1256 (M >= MaskSize && M < MaskSize + HalfSize); 1257 })) 1258 preMask = HighLaneTy; 1259 else if (std::all_of(Mask.begin(), Mask.begin() + HalfSize, [&](int M) { 1260 return M < 0 || (M >= HalfSize && M < MaskSize) || 1261 (M >= MaskSize + HalfSize && M < MaskSize * 2); 1262 })) 1263 preMask = LowLaneTy; 1264 1265 if (std::all_of(Mask.begin() + HalfSize, Mask.end(), [&](int M) { 1266 return M < 0 || (M >= 0 && M < HalfSize) || 1267 (M >= MaskSize && M < MaskSize + HalfSize); 1268 })) 1269 postMask = HighLaneTy; 1270 else if (std::all_of(Mask.begin() + HalfSize, Mask.end(), [&](int M) { 1271 return M < 0 || (M >= HalfSize && M < MaskSize) || 1272 (M >= MaskSize + HalfSize && M < MaskSize * 2); 1273 })) 1274 postMask = LowLaneTy; 1275 1276 // The pre-half of mask is high lane type, and the post-half of mask 1277 // is low lane type, which is closest to the LoongArch instructions. 1278 // 1279 // Note: In the LoongArch architecture, the high lane of mask corresponds 1280 // to the lower 128-bit of vector register, and the low lane of mask 1281 // corresponds the higher 128-bit of vector register. 1282 if (preMask == HighLaneTy && postMask == LowLaneTy) { 1283 return; 1284 } 1285 if (preMask == LowLaneTy && postMask == HighLaneTy) { 1286 V1 = DAG.getBitcast(MVT::v4i64, V1); 1287 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1, 1288 DAG.getConstant(0b01001110, DL, MVT::i64)); 1289 V1 = DAG.getBitcast(VT, V1); 1290 1291 if (!V2.isUndef()) { 1292 V2 = DAG.getBitcast(MVT::v4i64, V2); 1293 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2, 1294 DAG.getConstant(0b01001110, DL, MVT::i64)); 1295 V2 = DAG.getBitcast(VT, V2); 1296 } 1297 1298 for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) { 1299 *it = *it < 0 ? *it : *it - HalfSize; 1300 } 1301 for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) { 1302 *it = *it < 0 ? *it : *it + HalfSize; 1303 } 1304 } else if (preMask == LowLaneTy && postMask == LowLaneTy) { 1305 V1 = DAG.getBitcast(MVT::v4i64, V1); 1306 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1, 1307 DAG.getConstant(0b11101110, DL, MVT::i64)); 1308 V1 = DAG.getBitcast(VT, V1); 1309 1310 if (!V2.isUndef()) { 1311 V2 = DAG.getBitcast(MVT::v4i64, V2); 1312 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2, 1313 DAG.getConstant(0b11101110, DL, MVT::i64)); 1314 V2 = DAG.getBitcast(VT, V2); 1315 } 1316 1317 for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) { 1318 *it = *it < 0 ? *it : *it - HalfSize; 1319 } 1320 } else if (preMask == HighLaneTy && postMask == HighLaneTy) { 1321 V1 = DAG.getBitcast(MVT::v4i64, V1); 1322 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1, 1323 DAG.getConstant(0b01000100, DL, MVT::i64)); 1324 V1 = DAG.getBitcast(VT, V1); 1325 1326 if (!V2.isUndef()) { 1327 V2 = DAG.getBitcast(MVT::v4i64, V2); 1328 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2, 1329 DAG.getConstant(0b01000100, DL, MVT::i64)); 1330 V2 = DAG.getBitcast(VT, V2); 1331 } 1332 1333 for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) { 1334 *it = *it < 0 ? *it : *it + HalfSize; 1335 } 1336 } else { // cross-lane 1337 return; 1338 } 1339 } 1340 1341 /// Dispatching routine to lower various 256-bit LoongArch vector shuffles. 1342 /// 1343 /// This routine breaks down the specific type of 256-bit shuffle and 1344 /// dispatches to the lowering routines accordingly. 1345 static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT, 1346 SDValue V1, SDValue V2, SelectionDAG &DAG) { 1347 assert((VT.SimpleTy == MVT::v32i8 || VT.SimpleTy == MVT::v16i16 || 1348 VT.SimpleTy == MVT::v8i32 || VT.SimpleTy == MVT::v4i64 || 1349 VT.SimpleTy == MVT::v8f32 || VT.SimpleTy == MVT::v4f64) && 1350 "Vector type is unsupported for lasx!"); 1351 assert(V1.getSimpleValueType() == V2.getSimpleValueType() && 1352 "Two operands have different types!"); 1353 assert(VT.getVectorNumElements() == Mask.size() && 1354 "Unexpected mask size for shuffle!"); 1355 assert(Mask.size() % 2 == 0 && "Expected even mask size."); 1356 assert(Mask.size() >= 4 && "Mask size is less than 4."); 1357 1358 // canonicalize non cross-lane shuffle vector 1359 SmallVector<int> NewMask(Mask); 1360 canonicalizeShuffleVectorByLane(DL, NewMask, VT, V1, V2, DAG); 1361 1362 SDValue Result; 1363 // TODO: Add more comparison patterns. 1364 if (V2.isUndef()) { 1365 if ((Result = lowerVECTOR_SHUFFLE_XVREPLVEI(DL, NewMask, VT, V1, V2, DAG))) 1366 return Result; 1367 if ((Result = lowerVECTOR_SHUFFLE_XVSHUF4I(DL, NewMask, VT, V1, V2, DAG))) 1368 return Result; 1369 1370 // TODO: This comment may be enabled in the future to better match the 1371 // pattern for instruction selection. 1372 /* V2 = V1; */ 1373 } 1374 1375 // It is recommended not to change the pattern comparison order for better 1376 // performance. 1377 if ((Result = lowerVECTOR_SHUFFLE_XVPACKEV(DL, NewMask, VT, V1, V2, DAG))) 1378 return Result; 1379 if ((Result = lowerVECTOR_SHUFFLE_XVPACKOD(DL, NewMask, VT, V1, V2, DAG))) 1380 return Result; 1381 if ((Result = lowerVECTOR_SHUFFLE_XVILVH(DL, NewMask, VT, V1, V2, DAG))) 1382 return Result; 1383 if ((Result = lowerVECTOR_SHUFFLE_XVILVL(DL, NewMask, VT, V1, V2, DAG))) 1384 return Result; 1385 if ((Result = lowerVECTOR_SHUFFLE_XVPICKEV(DL, NewMask, VT, V1, V2, DAG))) 1386 return Result; 1387 if ((Result = lowerVECTOR_SHUFFLE_XVPICKOD(DL, NewMask, VT, V1, V2, DAG))) 1388 return Result; 1389 if ((Result = lowerVECTOR_SHUFFLE_XVSHUF(DL, NewMask, VT, V1, V2, DAG))) 1390 return Result; 1391 1392 return SDValue(); 1393 } 1394 1395 SDValue LoongArchTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op, 1396 SelectionDAG &DAG) const { 1397 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op); 1398 ArrayRef<int> OrigMask = SVOp->getMask(); 1399 SDValue V1 = Op.getOperand(0); 1400 SDValue V2 = Op.getOperand(1); 1401 MVT VT = Op.getSimpleValueType(); 1402 int NumElements = VT.getVectorNumElements(); 1403 SDLoc DL(Op); 1404 1405 bool V1IsUndef = V1.isUndef(); 1406 bool V2IsUndef = V2.isUndef(); 1407 if (V1IsUndef && V2IsUndef) 1408 return DAG.getUNDEF(VT); 1409 1410 // When we create a shuffle node we put the UNDEF node to second operand, 1411 // but in some cases the first operand may be transformed to UNDEF. 1412 // In this case we should just commute the node. 1413 if (V1IsUndef) 1414 return DAG.getCommutedVectorShuffle(*SVOp); 1415 1416 // Check for non-undef masks pointing at an undef vector and make the masks 1417 // undef as well. This makes it easier to match the shuffle based solely on 1418 // the mask. 1419 if (V2IsUndef && 1420 any_of(OrigMask, [NumElements](int M) { return M >= NumElements; })) { 1421 SmallVector<int, 8> NewMask(OrigMask); 1422 for (int &M : NewMask) 1423 if (M >= NumElements) 1424 M = -1; 1425 return DAG.getVectorShuffle(VT, DL, V1, V2, NewMask); 1426 } 1427 1428 // Check for illegal shuffle mask element index values. 1429 int MaskUpperLimit = OrigMask.size() * (V2IsUndef ? 1 : 2); 1430 (void)MaskUpperLimit; 1431 assert(llvm::all_of(OrigMask, 1432 [&](int M) { return -1 <= M && M < MaskUpperLimit; }) && 1433 "Out of bounds shuffle index"); 1434 1435 // For each vector width, delegate to a specialized lowering routine. 1436 if (VT.is128BitVector()) 1437 return lower128BitShuffle(DL, OrigMask, VT, V1, V2, DAG); 1438 1439 if (VT.is256BitVector()) 1440 return lower256BitShuffle(DL, OrigMask, VT, V1, V2, DAG); 1441 1442 return SDValue(); 1443 } 1444 1445 static bool isConstantOrUndef(const SDValue Op) { 1446 if (Op->isUndef()) 1447 return true; 1448 if (isa<ConstantSDNode>(Op)) 1449 return true; 1450 if (isa<ConstantFPSDNode>(Op)) 1451 return true; 1452 return false; 1453 } 1454 1455 static bool isConstantOrUndefBUILD_VECTOR(const BuildVectorSDNode *Op) { 1456 for (unsigned i = 0; i < Op->getNumOperands(); ++i) 1457 if (isConstantOrUndef(Op->getOperand(i))) 1458 return true; 1459 return false; 1460 } 1461 1462 SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op, 1463 SelectionDAG &DAG) const { 1464 BuildVectorSDNode *Node = cast<BuildVectorSDNode>(Op); 1465 EVT ResTy = Op->getValueType(0); 1466 SDLoc DL(Op); 1467 APInt SplatValue, SplatUndef; 1468 unsigned SplatBitSize; 1469 bool HasAnyUndefs; 1470 bool Is128Vec = ResTy.is128BitVector(); 1471 bool Is256Vec = ResTy.is256BitVector(); 1472 1473 if ((!Subtarget.hasExtLSX() || !Is128Vec) && 1474 (!Subtarget.hasExtLASX() || !Is256Vec)) 1475 return SDValue(); 1476 1477 if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs, 1478 /*MinSplatBits=*/8) && 1479 SplatBitSize <= 64) { 1480 // We can only cope with 8, 16, 32, or 64-bit elements. 1481 if (SplatBitSize != 8 && SplatBitSize != 16 && SplatBitSize != 32 && 1482 SplatBitSize != 64) 1483 return SDValue(); 1484 1485 EVT ViaVecTy; 1486 1487 switch (SplatBitSize) { 1488 default: 1489 return SDValue(); 1490 case 8: 1491 ViaVecTy = Is128Vec ? MVT::v16i8 : MVT::v32i8; 1492 break; 1493 case 16: 1494 ViaVecTy = Is128Vec ? MVT::v8i16 : MVT::v16i16; 1495 break; 1496 case 32: 1497 ViaVecTy = Is128Vec ? MVT::v4i32 : MVT::v8i32; 1498 break; 1499 case 64: 1500 ViaVecTy = Is128Vec ? MVT::v2i64 : MVT::v4i64; 1501 break; 1502 } 1503 1504 // SelectionDAG::getConstant will promote SplatValue appropriately. 1505 SDValue Result = DAG.getConstant(SplatValue, DL, ViaVecTy); 1506 1507 // Bitcast to the type we originally wanted. 1508 if (ViaVecTy != ResTy) 1509 Result = DAG.getNode(ISD::BITCAST, SDLoc(Node), ResTy, Result); 1510 1511 return Result; 1512 } 1513 1514 if (DAG.isSplatValue(Op, /*AllowUndefs=*/false)) 1515 return Op; 1516 1517 if (!isConstantOrUndefBUILD_VECTOR(Node)) { 1518 // Use INSERT_VECTOR_ELT operations rather than expand to stores. 1519 // The resulting code is the same length as the expansion, but it doesn't 1520 // use memory operations. 1521 EVT ResTy = Node->getValueType(0); 1522 1523 assert(ResTy.isVector()); 1524 1525 unsigned NumElts = ResTy.getVectorNumElements(); 1526 SDValue Vector = DAG.getUNDEF(ResTy); 1527 for (unsigned i = 0; i < NumElts; ++i) { 1528 Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Vector, 1529 Node->getOperand(i), 1530 DAG.getConstant(i, DL, Subtarget.getGRLenVT())); 1531 } 1532 return Vector; 1533 } 1534 1535 return SDValue(); 1536 } 1537 1538 SDValue 1539 LoongArchTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op, 1540 SelectionDAG &DAG) const { 1541 EVT VecTy = Op->getOperand(0)->getValueType(0); 1542 SDValue Idx = Op->getOperand(1); 1543 EVT EltTy = VecTy.getVectorElementType(); 1544 unsigned NumElts = VecTy.getVectorNumElements(); 1545 1546 if (isa<ConstantSDNode>(Idx) && 1547 (EltTy == MVT::i32 || EltTy == MVT::i64 || EltTy == MVT::f32 || 1548 EltTy == MVT::f64 || Idx->getAsZExtVal() < NumElts / 2)) 1549 return Op; 1550 1551 return SDValue(); 1552 } 1553 1554 SDValue 1555 LoongArchTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op, 1556 SelectionDAG &DAG) const { 1557 if (isa<ConstantSDNode>(Op->getOperand(2))) 1558 return Op; 1559 return SDValue(); 1560 } 1561 1562 SDValue LoongArchTargetLowering::lowerATOMIC_FENCE(SDValue Op, 1563 SelectionDAG &DAG) const { 1564 SDLoc DL(Op); 1565 SyncScope::ID FenceSSID = 1566 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2)); 1567 1568 // singlethread fences only synchronize with signal handlers on the same 1569 // thread and thus only need to preserve instruction order, not actually 1570 // enforce memory ordering. 1571 if (FenceSSID == SyncScope::SingleThread) 1572 // MEMBARRIER is a compiler barrier; it codegens to a no-op. 1573 return DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0)); 1574 1575 return Op; 1576 } 1577 1578 SDValue LoongArchTargetLowering::lowerWRITE_REGISTER(SDValue Op, 1579 SelectionDAG &DAG) const { 1580 1581 if (Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i32) { 1582 DAG.getContext()->emitError( 1583 "On LA64, only 64-bit registers can be written."); 1584 return Op.getOperand(0); 1585 } 1586 1587 if (!Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i64) { 1588 DAG.getContext()->emitError( 1589 "On LA32, only 32-bit registers can be written."); 1590 return Op.getOperand(0); 1591 } 1592 1593 return Op; 1594 } 1595 1596 SDValue LoongArchTargetLowering::lowerFRAMEADDR(SDValue Op, 1597 SelectionDAG &DAG) const { 1598 if (!isa<ConstantSDNode>(Op.getOperand(0))) { 1599 DAG.getContext()->emitError("argument to '__builtin_frame_address' must " 1600 "be a constant integer"); 1601 return SDValue(); 1602 } 1603 1604 MachineFunction &MF = DAG.getMachineFunction(); 1605 MF.getFrameInfo().setFrameAddressIsTaken(true); 1606 Register FrameReg = Subtarget.getRegisterInfo()->getFrameRegister(MF); 1607 EVT VT = Op.getValueType(); 1608 SDLoc DL(Op); 1609 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT); 1610 unsigned Depth = Op.getConstantOperandVal(0); 1611 int GRLenInBytes = Subtarget.getGRLen() / 8; 1612 1613 while (Depth--) { 1614 int Offset = -(GRLenInBytes * 2); 1615 SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr, 1616 DAG.getSignedConstant(Offset, DL, VT)); 1617 FrameAddr = 1618 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo()); 1619 } 1620 return FrameAddr; 1621 } 1622 1623 SDValue LoongArchTargetLowering::lowerRETURNADDR(SDValue Op, 1624 SelectionDAG &DAG) const { 1625 if (verifyReturnAddressArgumentIsConstant(Op, DAG)) 1626 return SDValue(); 1627 1628 // Currently only support lowering return address for current frame. 1629 if (Op.getConstantOperandVal(0) != 0) { 1630 DAG.getContext()->emitError( 1631 "return address can only be determined for the current frame"); 1632 return SDValue(); 1633 } 1634 1635 MachineFunction &MF = DAG.getMachineFunction(); 1636 MF.getFrameInfo().setReturnAddressIsTaken(true); 1637 MVT GRLenVT = Subtarget.getGRLenVT(); 1638 1639 // Return the value of the return address register, marking it an implicit 1640 // live-in. 1641 Register Reg = MF.addLiveIn(Subtarget.getRegisterInfo()->getRARegister(), 1642 getRegClassFor(GRLenVT)); 1643 return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), Reg, GRLenVT); 1644 } 1645 1646 SDValue LoongArchTargetLowering::lowerEH_DWARF_CFA(SDValue Op, 1647 SelectionDAG &DAG) const { 1648 MachineFunction &MF = DAG.getMachineFunction(); 1649 auto Size = Subtarget.getGRLen() / 8; 1650 auto FI = MF.getFrameInfo().CreateFixedObject(Size, 0, false); 1651 return DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); 1652 } 1653 1654 SDValue LoongArchTargetLowering::lowerVASTART(SDValue Op, 1655 SelectionDAG &DAG) const { 1656 MachineFunction &MF = DAG.getMachineFunction(); 1657 auto *FuncInfo = MF.getInfo<LoongArchMachineFunctionInfo>(); 1658 1659 SDLoc DL(Op); 1660 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), 1661 getPointerTy(MF.getDataLayout())); 1662 1663 // vastart just stores the address of the VarArgsFrameIndex slot into the 1664 // memory location argument. 1665 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 1666 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1), 1667 MachinePointerInfo(SV)); 1668 } 1669 1670 SDValue LoongArchTargetLowering::lowerUINT_TO_FP(SDValue Op, 1671 SelectionDAG &DAG) const { 1672 assert(Subtarget.is64Bit() && Subtarget.hasBasicF() && 1673 !Subtarget.hasBasicD() && "unexpected target features"); 1674 1675 SDLoc DL(Op); 1676 SDValue Op0 = Op.getOperand(0); 1677 if (Op0->getOpcode() == ISD::AND) { 1678 auto *C = dyn_cast<ConstantSDNode>(Op0.getOperand(1)); 1679 if (C && C->getZExtValue() < UINT64_C(0xFFFFFFFF)) 1680 return Op; 1681 } 1682 1683 if (Op0->getOpcode() == LoongArchISD::BSTRPICK && 1684 Op0.getConstantOperandVal(1) < UINT64_C(0X1F) && 1685 Op0.getConstantOperandVal(2) == UINT64_C(0)) 1686 return Op; 1687 1688 if (Op0.getOpcode() == ISD::AssertZext && 1689 dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLT(MVT::i32)) 1690 return Op; 1691 1692 EVT OpVT = Op0.getValueType(); 1693 EVT RetVT = Op.getValueType(); 1694 RTLIB::Libcall LC = RTLIB::getUINTTOFP(OpVT, RetVT); 1695 MakeLibCallOptions CallOptions; 1696 CallOptions.setTypeListBeforeSoften(OpVT, RetVT, true); 1697 SDValue Chain = SDValue(); 1698 SDValue Result; 1699 std::tie(Result, Chain) = 1700 makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain); 1701 return Result; 1702 } 1703 1704 SDValue LoongArchTargetLowering::lowerSINT_TO_FP(SDValue Op, 1705 SelectionDAG &DAG) const { 1706 assert(Subtarget.is64Bit() && Subtarget.hasBasicF() && 1707 !Subtarget.hasBasicD() && "unexpected target features"); 1708 1709 SDLoc DL(Op); 1710 SDValue Op0 = Op.getOperand(0); 1711 1712 if ((Op0.getOpcode() == ISD::AssertSext || 1713 Op0.getOpcode() == ISD::SIGN_EXTEND_INREG) && 1714 dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLE(MVT::i32)) 1715 return Op; 1716 1717 EVT OpVT = Op0.getValueType(); 1718 EVT RetVT = Op.getValueType(); 1719 RTLIB::Libcall LC = RTLIB::getSINTTOFP(OpVT, RetVT); 1720 MakeLibCallOptions CallOptions; 1721 CallOptions.setTypeListBeforeSoften(OpVT, RetVT, true); 1722 SDValue Chain = SDValue(); 1723 SDValue Result; 1724 std::tie(Result, Chain) = 1725 makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain); 1726 return Result; 1727 } 1728 1729 SDValue LoongArchTargetLowering::lowerBITCAST(SDValue Op, 1730 SelectionDAG &DAG) const { 1731 1732 SDLoc DL(Op); 1733 SDValue Op0 = Op.getOperand(0); 1734 1735 if (Op.getValueType() == MVT::f32 && Op0.getValueType() == MVT::i32 && 1736 Subtarget.is64Bit() && Subtarget.hasBasicF()) { 1737 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0); 1738 return DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, NewOp0); 1739 } 1740 return Op; 1741 } 1742 1743 SDValue LoongArchTargetLowering::lowerFP_TO_SINT(SDValue Op, 1744 SelectionDAG &DAG) const { 1745 1746 SDLoc DL(Op); 1747 SDValue Op0 = Op.getOperand(0); 1748 1749 if (Op0.getValueType() == MVT::f16) 1750 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0); 1751 1752 if (Op.getValueSizeInBits() > 32 && Subtarget.hasBasicF() && 1753 !Subtarget.hasBasicD()) { 1754 SDValue Dst = DAG.getNode(LoongArchISD::FTINT, DL, MVT::f32, Op0); 1755 return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Dst); 1756 } 1757 1758 EVT FPTy = EVT::getFloatingPointVT(Op.getValueSizeInBits()); 1759 SDValue Trunc = DAG.getNode(LoongArchISD::FTINT, DL, FPTy, Op0); 1760 return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Trunc); 1761 } 1762 1763 static SDValue getTargetNode(GlobalAddressSDNode *N, SDLoc DL, EVT Ty, 1764 SelectionDAG &DAG, unsigned Flags) { 1765 return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags); 1766 } 1767 1768 static SDValue getTargetNode(BlockAddressSDNode *N, SDLoc DL, EVT Ty, 1769 SelectionDAG &DAG, unsigned Flags) { 1770 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(), 1771 Flags); 1772 } 1773 1774 static SDValue getTargetNode(ConstantPoolSDNode *N, SDLoc DL, EVT Ty, 1775 SelectionDAG &DAG, unsigned Flags) { 1776 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(), 1777 N->getOffset(), Flags); 1778 } 1779 1780 static SDValue getTargetNode(JumpTableSDNode *N, SDLoc DL, EVT Ty, 1781 SelectionDAG &DAG, unsigned Flags) { 1782 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags); 1783 } 1784 1785 template <class NodeTy> 1786 SDValue LoongArchTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG, 1787 CodeModel::Model M, 1788 bool IsLocal) const { 1789 SDLoc DL(N); 1790 EVT Ty = getPointerTy(DAG.getDataLayout()); 1791 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0); 1792 SDValue Load; 1793 1794 switch (M) { 1795 default: 1796 report_fatal_error("Unsupported code model"); 1797 1798 case CodeModel::Large: { 1799 assert(Subtarget.is64Bit() && "Large code model requires LA64"); 1800 1801 // This is not actually used, but is necessary for successfully matching 1802 // the PseudoLA_*_LARGE nodes. 1803 SDValue Tmp = DAG.getConstant(0, DL, Ty); 1804 if (IsLocal) { 1805 // This generates the pattern (PseudoLA_PCREL_LARGE tmp sym), that 1806 // eventually becomes the desired 5-insn code sequence. 1807 Load = SDValue(DAG.getMachineNode(LoongArch::PseudoLA_PCREL_LARGE, DL, Ty, 1808 Tmp, Addr), 1809 0); 1810 } else { 1811 // This generates the pattern (PseudoLA_GOT_LARGE tmp sym), that 1812 // eventually becomes the desired 5-insn code sequence. 1813 Load = SDValue( 1814 DAG.getMachineNode(LoongArch::PseudoLA_GOT_LARGE, DL, Ty, Tmp, Addr), 1815 0); 1816 } 1817 break; 1818 } 1819 1820 case CodeModel::Small: 1821 case CodeModel::Medium: 1822 if (IsLocal) { 1823 // This generates the pattern (PseudoLA_PCREL sym), which expands to 1824 // (addi.w/d (pcalau12i %pc_hi20(sym)) %pc_lo12(sym)). 1825 Load = SDValue( 1826 DAG.getMachineNode(LoongArch::PseudoLA_PCREL, DL, Ty, Addr), 0); 1827 } else { 1828 // This generates the pattern (PseudoLA_GOT sym), which expands to (ld.w/d 1829 // (pcalau12i %got_pc_hi20(sym)) %got_pc_lo12(sym)). 1830 Load = 1831 SDValue(DAG.getMachineNode(LoongArch::PseudoLA_GOT, DL, Ty, Addr), 0); 1832 } 1833 } 1834 1835 if (!IsLocal) { 1836 // Mark the load instruction as invariant to enable hoisting in MachineLICM. 1837 MachineFunction &MF = DAG.getMachineFunction(); 1838 MachineMemOperand *MemOp = MF.getMachineMemOperand( 1839 MachinePointerInfo::getGOT(MF), 1840 MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable | 1841 MachineMemOperand::MOInvariant, 1842 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8)); 1843 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp}); 1844 } 1845 1846 return Load; 1847 } 1848 1849 SDValue LoongArchTargetLowering::lowerBlockAddress(SDValue Op, 1850 SelectionDAG &DAG) const { 1851 return getAddr(cast<BlockAddressSDNode>(Op), DAG, 1852 DAG.getTarget().getCodeModel()); 1853 } 1854 1855 SDValue LoongArchTargetLowering::lowerJumpTable(SDValue Op, 1856 SelectionDAG &DAG) const { 1857 return getAddr(cast<JumpTableSDNode>(Op), DAG, 1858 DAG.getTarget().getCodeModel()); 1859 } 1860 1861 SDValue LoongArchTargetLowering::lowerConstantPool(SDValue Op, 1862 SelectionDAG &DAG) const { 1863 return getAddr(cast<ConstantPoolSDNode>(Op), DAG, 1864 DAG.getTarget().getCodeModel()); 1865 } 1866 1867 SDValue LoongArchTargetLowering::lowerGlobalAddress(SDValue Op, 1868 SelectionDAG &DAG) const { 1869 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op); 1870 assert(N->getOffset() == 0 && "unexpected offset in global node"); 1871 auto CM = DAG.getTarget().getCodeModel(); 1872 const GlobalValue *GV = N->getGlobal(); 1873 1874 if (GV->isDSOLocal() && isa<GlobalVariable>(GV)) { 1875 if (auto GCM = dyn_cast<GlobalVariable>(GV)->getCodeModel()) 1876 CM = *GCM; 1877 } 1878 1879 return getAddr(N, DAG, CM, GV->isDSOLocal()); 1880 } 1881 1882 SDValue LoongArchTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N, 1883 SelectionDAG &DAG, 1884 unsigned Opc, bool UseGOT, 1885 bool Large) const { 1886 SDLoc DL(N); 1887 EVT Ty = getPointerTy(DAG.getDataLayout()); 1888 MVT GRLenVT = Subtarget.getGRLenVT(); 1889 1890 // This is not actually used, but is necessary for successfully matching the 1891 // PseudoLA_*_LARGE nodes. 1892 SDValue Tmp = DAG.getConstant(0, DL, Ty); 1893 SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0); 1894 1895 // Only IE needs an extra argument for large code model. 1896 SDValue Offset = Opc == LoongArch::PseudoLA_TLS_IE_LARGE 1897 ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0) 1898 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0); 1899 1900 // If it is LE for normal/medium code model, the add tp operation will occur 1901 // during the pseudo-instruction expansion. 1902 if (Opc == LoongArch::PseudoLA_TLS_LE && !Large) 1903 return Offset; 1904 1905 if (UseGOT) { 1906 // Mark the load instruction as invariant to enable hoisting in MachineLICM. 1907 MachineFunction &MF = DAG.getMachineFunction(); 1908 MachineMemOperand *MemOp = MF.getMachineMemOperand( 1909 MachinePointerInfo::getGOT(MF), 1910 MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable | 1911 MachineMemOperand::MOInvariant, 1912 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8)); 1913 DAG.setNodeMemRefs(cast<MachineSDNode>(Offset.getNode()), {MemOp}); 1914 } 1915 1916 // Add the thread pointer. 1917 return DAG.getNode(ISD::ADD, DL, Ty, Offset, 1918 DAG.getRegister(LoongArch::R2, GRLenVT)); 1919 } 1920 1921 SDValue LoongArchTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N, 1922 SelectionDAG &DAG, 1923 unsigned Opc, 1924 bool Large) const { 1925 SDLoc DL(N); 1926 EVT Ty = getPointerTy(DAG.getDataLayout()); 1927 IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits()); 1928 1929 // This is not actually used, but is necessary for successfully matching the 1930 // PseudoLA_*_LARGE nodes. 1931 SDValue Tmp = DAG.getConstant(0, DL, Ty); 1932 1933 // Use a PC-relative addressing mode to access the dynamic GOT address. 1934 SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0); 1935 SDValue Load = Large ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0) 1936 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0); 1937 1938 // Prepare argument list to generate call. 1939 ArgListTy Args; 1940 ArgListEntry Entry; 1941 Entry.Node = Load; 1942 Entry.Ty = CallTy; 1943 Args.push_back(Entry); 1944 1945 // Setup call to __tls_get_addr. 1946 TargetLowering::CallLoweringInfo CLI(DAG); 1947 CLI.setDebugLoc(DL) 1948 .setChain(DAG.getEntryNode()) 1949 .setLibCallee(CallingConv::C, CallTy, 1950 DAG.getExternalSymbol("__tls_get_addr", Ty), 1951 std::move(Args)); 1952 1953 return LowerCallTo(CLI).first; 1954 } 1955 1956 SDValue LoongArchTargetLowering::getTLSDescAddr(GlobalAddressSDNode *N, 1957 SelectionDAG &DAG, unsigned Opc, 1958 bool Large) const { 1959 SDLoc DL(N); 1960 EVT Ty = getPointerTy(DAG.getDataLayout()); 1961 const GlobalValue *GV = N->getGlobal(); 1962 1963 // This is not actually used, but is necessary for successfully matching the 1964 // PseudoLA_*_LARGE nodes. 1965 SDValue Tmp = DAG.getConstant(0, DL, Ty); 1966 1967 // Use a PC-relative addressing mode to access the global dynamic GOT address. 1968 // This generates the pattern (PseudoLA_TLS_DESC_PC{,LARGE} sym). 1969 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0); 1970 return Large ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0) 1971 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0); 1972 } 1973 1974 SDValue 1975 LoongArchTargetLowering::lowerGlobalTLSAddress(SDValue Op, 1976 SelectionDAG &DAG) const { 1977 if (DAG.getMachineFunction().getFunction().getCallingConv() == 1978 CallingConv::GHC) 1979 report_fatal_error("In GHC calling convention TLS is not supported"); 1980 1981 bool Large = DAG.getTarget().getCodeModel() == CodeModel::Large; 1982 assert((!Large || Subtarget.is64Bit()) && "Large code model requires LA64"); 1983 1984 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op); 1985 assert(N->getOffset() == 0 && "unexpected offset in global node"); 1986 1987 if (DAG.getTarget().useEmulatedTLS()) 1988 report_fatal_error("the emulated TLS is prohibited", 1989 /*GenCrashDiag=*/false); 1990 1991 bool IsDesc = DAG.getTarget().useTLSDESC(); 1992 1993 switch (getTargetMachine().getTLSModel(N->getGlobal())) { 1994 case TLSModel::GeneralDynamic: 1995 // In this model, application code calls the dynamic linker function 1996 // __tls_get_addr to locate TLS offsets into the dynamic thread vector at 1997 // runtime. 1998 if (!IsDesc) 1999 return getDynamicTLSAddr(N, DAG, 2000 Large ? LoongArch::PseudoLA_TLS_GD_LARGE 2001 : LoongArch::PseudoLA_TLS_GD, 2002 Large); 2003 break; 2004 case TLSModel::LocalDynamic: 2005 // Same as GeneralDynamic, except for assembly modifiers and relocation 2006 // records. 2007 if (!IsDesc) 2008 return getDynamicTLSAddr(N, DAG, 2009 Large ? LoongArch::PseudoLA_TLS_LD_LARGE 2010 : LoongArch::PseudoLA_TLS_LD, 2011 Large); 2012 break; 2013 case TLSModel::InitialExec: 2014 // This model uses the GOT to resolve TLS offsets. 2015 return getStaticTLSAddr(N, DAG, 2016 Large ? LoongArch::PseudoLA_TLS_IE_LARGE 2017 : LoongArch::PseudoLA_TLS_IE, 2018 /*UseGOT=*/true, Large); 2019 case TLSModel::LocalExec: 2020 // This model is used when static linking as the TLS offsets are resolved 2021 // during program linking. 2022 // 2023 // This node doesn't need an extra argument for the large code model. 2024 return getStaticTLSAddr(N, DAG, LoongArch::PseudoLA_TLS_LE, 2025 /*UseGOT=*/false, Large); 2026 } 2027 2028 return getTLSDescAddr(N, DAG, 2029 Large ? LoongArch::PseudoLA_TLS_DESC_LARGE 2030 : LoongArch::PseudoLA_TLS_DESC, 2031 Large); 2032 } 2033 2034 template <unsigned N> 2035 static SDValue checkIntrinsicImmArg(SDValue Op, unsigned ImmOp, 2036 SelectionDAG &DAG, bool IsSigned = false) { 2037 auto *CImm = cast<ConstantSDNode>(Op->getOperand(ImmOp)); 2038 // Check the ImmArg. 2039 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) || 2040 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) { 2041 DAG.getContext()->emitError(Op->getOperationName(0) + 2042 ": argument out of range."); 2043 return DAG.getNode(ISD::UNDEF, SDLoc(Op), Op.getValueType()); 2044 } 2045 return SDValue(); 2046 } 2047 2048 SDValue 2049 LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, 2050 SelectionDAG &DAG) const { 2051 SDLoc DL(Op); 2052 switch (Op.getConstantOperandVal(0)) { 2053 default: 2054 return SDValue(); // Don't custom lower most intrinsics. 2055 case Intrinsic::thread_pointer: { 2056 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 2057 return DAG.getRegister(LoongArch::R2, PtrVT); 2058 } 2059 case Intrinsic::loongarch_lsx_vpickve2gr_d: 2060 case Intrinsic::loongarch_lsx_vpickve2gr_du: 2061 case Intrinsic::loongarch_lsx_vreplvei_d: 2062 case Intrinsic::loongarch_lasx_xvrepl128vei_d: 2063 return checkIntrinsicImmArg<1>(Op, 2, DAG); 2064 case Intrinsic::loongarch_lsx_vreplvei_w: 2065 case Intrinsic::loongarch_lasx_xvrepl128vei_w: 2066 case Intrinsic::loongarch_lasx_xvpickve2gr_d: 2067 case Intrinsic::loongarch_lasx_xvpickve2gr_du: 2068 case Intrinsic::loongarch_lasx_xvpickve_d: 2069 case Intrinsic::loongarch_lasx_xvpickve_d_f: 2070 return checkIntrinsicImmArg<2>(Op, 2, DAG); 2071 case Intrinsic::loongarch_lasx_xvinsve0_d: 2072 return checkIntrinsicImmArg<2>(Op, 3, DAG); 2073 case Intrinsic::loongarch_lsx_vsat_b: 2074 case Intrinsic::loongarch_lsx_vsat_bu: 2075 case Intrinsic::loongarch_lsx_vrotri_b: 2076 case Intrinsic::loongarch_lsx_vsllwil_h_b: 2077 case Intrinsic::loongarch_lsx_vsllwil_hu_bu: 2078 case Intrinsic::loongarch_lsx_vsrlri_b: 2079 case Intrinsic::loongarch_lsx_vsrari_b: 2080 case Intrinsic::loongarch_lsx_vreplvei_h: 2081 case Intrinsic::loongarch_lasx_xvsat_b: 2082 case Intrinsic::loongarch_lasx_xvsat_bu: 2083 case Intrinsic::loongarch_lasx_xvrotri_b: 2084 case Intrinsic::loongarch_lasx_xvsllwil_h_b: 2085 case Intrinsic::loongarch_lasx_xvsllwil_hu_bu: 2086 case Intrinsic::loongarch_lasx_xvsrlri_b: 2087 case Intrinsic::loongarch_lasx_xvsrari_b: 2088 case Intrinsic::loongarch_lasx_xvrepl128vei_h: 2089 case Intrinsic::loongarch_lasx_xvpickve_w: 2090 case Intrinsic::loongarch_lasx_xvpickve_w_f: 2091 return checkIntrinsicImmArg<3>(Op, 2, DAG); 2092 case Intrinsic::loongarch_lasx_xvinsve0_w: 2093 return checkIntrinsicImmArg<3>(Op, 3, DAG); 2094 case Intrinsic::loongarch_lsx_vsat_h: 2095 case Intrinsic::loongarch_lsx_vsat_hu: 2096 case Intrinsic::loongarch_lsx_vrotri_h: 2097 case Intrinsic::loongarch_lsx_vsllwil_w_h: 2098 case Intrinsic::loongarch_lsx_vsllwil_wu_hu: 2099 case Intrinsic::loongarch_lsx_vsrlri_h: 2100 case Intrinsic::loongarch_lsx_vsrari_h: 2101 case Intrinsic::loongarch_lsx_vreplvei_b: 2102 case Intrinsic::loongarch_lasx_xvsat_h: 2103 case Intrinsic::loongarch_lasx_xvsat_hu: 2104 case Intrinsic::loongarch_lasx_xvrotri_h: 2105 case Intrinsic::loongarch_lasx_xvsllwil_w_h: 2106 case Intrinsic::loongarch_lasx_xvsllwil_wu_hu: 2107 case Intrinsic::loongarch_lasx_xvsrlri_h: 2108 case Intrinsic::loongarch_lasx_xvsrari_h: 2109 case Intrinsic::loongarch_lasx_xvrepl128vei_b: 2110 return checkIntrinsicImmArg<4>(Op, 2, DAG); 2111 case Intrinsic::loongarch_lsx_vsrlni_b_h: 2112 case Intrinsic::loongarch_lsx_vsrani_b_h: 2113 case Intrinsic::loongarch_lsx_vsrlrni_b_h: 2114 case Intrinsic::loongarch_lsx_vsrarni_b_h: 2115 case Intrinsic::loongarch_lsx_vssrlni_b_h: 2116 case Intrinsic::loongarch_lsx_vssrani_b_h: 2117 case Intrinsic::loongarch_lsx_vssrlni_bu_h: 2118 case Intrinsic::loongarch_lsx_vssrani_bu_h: 2119 case Intrinsic::loongarch_lsx_vssrlrni_b_h: 2120 case Intrinsic::loongarch_lsx_vssrarni_b_h: 2121 case Intrinsic::loongarch_lsx_vssrlrni_bu_h: 2122 case Intrinsic::loongarch_lsx_vssrarni_bu_h: 2123 case Intrinsic::loongarch_lasx_xvsrlni_b_h: 2124 case Intrinsic::loongarch_lasx_xvsrani_b_h: 2125 case Intrinsic::loongarch_lasx_xvsrlrni_b_h: 2126 case Intrinsic::loongarch_lasx_xvsrarni_b_h: 2127 case Intrinsic::loongarch_lasx_xvssrlni_b_h: 2128 case Intrinsic::loongarch_lasx_xvssrani_b_h: 2129 case Intrinsic::loongarch_lasx_xvssrlni_bu_h: 2130 case Intrinsic::loongarch_lasx_xvssrani_bu_h: 2131 case Intrinsic::loongarch_lasx_xvssrlrni_b_h: 2132 case Intrinsic::loongarch_lasx_xvssrarni_b_h: 2133 case Intrinsic::loongarch_lasx_xvssrlrni_bu_h: 2134 case Intrinsic::loongarch_lasx_xvssrarni_bu_h: 2135 return checkIntrinsicImmArg<4>(Op, 3, DAG); 2136 case Intrinsic::loongarch_lsx_vsat_w: 2137 case Intrinsic::loongarch_lsx_vsat_wu: 2138 case Intrinsic::loongarch_lsx_vrotri_w: 2139 case Intrinsic::loongarch_lsx_vsllwil_d_w: 2140 case Intrinsic::loongarch_lsx_vsllwil_du_wu: 2141 case Intrinsic::loongarch_lsx_vsrlri_w: 2142 case Intrinsic::loongarch_lsx_vsrari_w: 2143 case Intrinsic::loongarch_lsx_vslei_bu: 2144 case Intrinsic::loongarch_lsx_vslei_hu: 2145 case Intrinsic::loongarch_lsx_vslei_wu: 2146 case Intrinsic::loongarch_lsx_vslei_du: 2147 case Intrinsic::loongarch_lsx_vslti_bu: 2148 case Intrinsic::loongarch_lsx_vslti_hu: 2149 case Intrinsic::loongarch_lsx_vslti_wu: 2150 case Intrinsic::loongarch_lsx_vslti_du: 2151 case Intrinsic::loongarch_lsx_vbsll_v: 2152 case Intrinsic::loongarch_lsx_vbsrl_v: 2153 case Intrinsic::loongarch_lasx_xvsat_w: 2154 case Intrinsic::loongarch_lasx_xvsat_wu: 2155 case Intrinsic::loongarch_lasx_xvrotri_w: 2156 case Intrinsic::loongarch_lasx_xvsllwil_d_w: 2157 case Intrinsic::loongarch_lasx_xvsllwil_du_wu: 2158 case Intrinsic::loongarch_lasx_xvsrlri_w: 2159 case Intrinsic::loongarch_lasx_xvsrari_w: 2160 case Intrinsic::loongarch_lasx_xvslei_bu: 2161 case Intrinsic::loongarch_lasx_xvslei_hu: 2162 case Intrinsic::loongarch_lasx_xvslei_wu: 2163 case Intrinsic::loongarch_lasx_xvslei_du: 2164 case Intrinsic::loongarch_lasx_xvslti_bu: 2165 case Intrinsic::loongarch_lasx_xvslti_hu: 2166 case Intrinsic::loongarch_lasx_xvslti_wu: 2167 case Intrinsic::loongarch_lasx_xvslti_du: 2168 case Intrinsic::loongarch_lasx_xvbsll_v: 2169 case Intrinsic::loongarch_lasx_xvbsrl_v: 2170 return checkIntrinsicImmArg<5>(Op, 2, DAG); 2171 case Intrinsic::loongarch_lsx_vseqi_b: 2172 case Intrinsic::loongarch_lsx_vseqi_h: 2173 case Intrinsic::loongarch_lsx_vseqi_w: 2174 case Intrinsic::loongarch_lsx_vseqi_d: 2175 case Intrinsic::loongarch_lsx_vslei_b: 2176 case Intrinsic::loongarch_lsx_vslei_h: 2177 case Intrinsic::loongarch_lsx_vslei_w: 2178 case Intrinsic::loongarch_lsx_vslei_d: 2179 case Intrinsic::loongarch_lsx_vslti_b: 2180 case Intrinsic::loongarch_lsx_vslti_h: 2181 case Intrinsic::loongarch_lsx_vslti_w: 2182 case Intrinsic::loongarch_lsx_vslti_d: 2183 case Intrinsic::loongarch_lasx_xvseqi_b: 2184 case Intrinsic::loongarch_lasx_xvseqi_h: 2185 case Intrinsic::loongarch_lasx_xvseqi_w: 2186 case Intrinsic::loongarch_lasx_xvseqi_d: 2187 case Intrinsic::loongarch_lasx_xvslei_b: 2188 case Intrinsic::loongarch_lasx_xvslei_h: 2189 case Intrinsic::loongarch_lasx_xvslei_w: 2190 case Intrinsic::loongarch_lasx_xvslei_d: 2191 case Intrinsic::loongarch_lasx_xvslti_b: 2192 case Intrinsic::loongarch_lasx_xvslti_h: 2193 case Intrinsic::loongarch_lasx_xvslti_w: 2194 case Intrinsic::loongarch_lasx_xvslti_d: 2195 return checkIntrinsicImmArg<5>(Op, 2, DAG, /*IsSigned=*/true); 2196 case Intrinsic::loongarch_lsx_vsrlni_h_w: 2197 case Intrinsic::loongarch_lsx_vsrani_h_w: 2198 case Intrinsic::loongarch_lsx_vsrlrni_h_w: 2199 case Intrinsic::loongarch_lsx_vsrarni_h_w: 2200 case Intrinsic::loongarch_lsx_vssrlni_h_w: 2201 case Intrinsic::loongarch_lsx_vssrani_h_w: 2202 case Intrinsic::loongarch_lsx_vssrlni_hu_w: 2203 case Intrinsic::loongarch_lsx_vssrani_hu_w: 2204 case Intrinsic::loongarch_lsx_vssrlrni_h_w: 2205 case Intrinsic::loongarch_lsx_vssrarni_h_w: 2206 case Intrinsic::loongarch_lsx_vssrlrni_hu_w: 2207 case Intrinsic::loongarch_lsx_vssrarni_hu_w: 2208 case Intrinsic::loongarch_lsx_vfrstpi_b: 2209 case Intrinsic::loongarch_lsx_vfrstpi_h: 2210 case Intrinsic::loongarch_lasx_xvsrlni_h_w: 2211 case Intrinsic::loongarch_lasx_xvsrani_h_w: 2212 case Intrinsic::loongarch_lasx_xvsrlrni_h_w: 2213 case Intrinsic::loongarch_lasx_xvsrarni_h_w: 2214 case Intrinsic::loongarch_lasx_xvssrlni_h_w: 2215 case Intrinsic::loongarch_lasx_xvssrani_h_w: 2216 case Intrinsic::loongarch_lasx_xvssrlni_hu_w: 2217 case Intrinsic::loongarch_lasx_xvssrani_hu_w: 2218 case Intrinsic::loongarch_lasx_xvssrlrni_h_w: 2219 case Intrinsic::loongarch_lasx_xvssrarni_h_w: 2220 case Intrinsic::loongarch_lasx_xvssrlrni_hu_w: 2221 case Intrinsic::loongarch_lasx_xvssrarni_hu_w: 2222 case Intrinsic::loongarch_lasx_xvfrstpi_b: 2223 case Intrinsic::loongarch_lasx_xvfrstpi_h: 2224 return checkIntrinsicImmArg<5>(Op, 3, DAG); 2225 case Intrinsic::loongarch_lsx_vsat_d: 2226 case Intrinsic::loongarch_lsx_vsat_du: 2227 case Intrinsic::loongarch_lsx_vrotri_d: 2228 case Intrinsic::loongarch_lsx_vsrlri_d: 2229 case Intrinsic::loongarch_lsx_vsrari_d: 2230 case Intrinsic::loongarch_lasx_xvsat_d: 2231 case Intrinsic::loongarch_lasx_xvsat_du: 2232 case Intrinsic::loongarch_lasx_xvrotri_d: 2233 case Intrinsic::loongarch_lasx_xvsrlri_d: 2234 case Intrinsic::loongarch_lasx_xvsrari_d: 2235 return checkIntrinsicImmArg<6>(Op, 2, DAG); 2236 case Intrinsic::loongarch_lsx_vsrlni_w_d: 2237 case Intrinsic::loongarch_lsx_vsrani_w_d: 2238 case Intrinsic::loongarch_lsx_vsrlrni_w_d: 2239 case Intrinsic::loongarch_lsx_vsrarni_w_d: 2240 case Intrinsic::loongarch_lsx_vssrlni_w_d: 2241 case Intrinsic::loongarch_lsx_vssrani_w_d: 2242 case Intrinsic::loongarch_lsx_vssrlni_wu_d: 2243 case Intrinsic::loongarch_lsx_vssrani_wu_d: 2244 case Intrinsic::loongarch_lsx_vssrlrni_w_d: 2245 case Intrinsic::loongarch_lsx_vssrarni_w_d: 2246 case Intrinsic::loongarch_lsx_vssrlrni_wu_d: 2247 case Intrinsic::loongarch_lsx_vssrarni_wu_d: 2248 case Intrinsic::loongarch_lasx_xvsrlni_w_d: 2249 case Intrinsic::loongarch_lasx_xvsrani_w_d: 2250 case Intrinsic::loongarch_lasx_xvsrlrni_w_d: 2251 case Intrinsic::loongarch_lasx_xvsrarni_w_d: 2252 case Intrinsic::loongarch_lasx_xvssrlni_w_d: 2253 case Intrinsic::loongarch_lasx_xvssrani_w_d: 2254 case Intrinsic::loongarch_lasx_xvssrlni_wu_d: 2255 case Intrinsic::loongarch_lasx_xvssrani_wu_d: 2256 case Intrinsic::loongarch_lasx_xvssrlrni_w_d: 2257 case Intrinsic::loongarch_lasx_xvssrarni_w_d: 2258 case Intrinsic::loongarch_lasx_xvssrlrni_wu_d: 2259 case Intrinsic::loongarch_lasx_xvssrarni_wu_d: 2260 return checkIntrinsicImmArg<6>(Op, 3, DAG); 2261 case Intrinsic::loongarch_lsx_vsrlni_d_q: 2262 case Intrinsic::loongarch_lsx_vsrani_d_q: 2263 case Intrinsic::loongarch_lsx_vsrlrni_d_q: 2264 case Intrinsic::loongarch_lsx_vsrarni_d_q: 2265 case Intrinsic::loongarch_lsx_vssrlni_d_q: 2266 case Intrinsic::loongarch_lsx_vssrani_d_q: 2267 case Intrinsic::loongarch_lsx_vssrlni_du_q: 2268 case Intrinsic::loongarch_lsx_vssrani_du_q: 2269 case Intrinsic::loongarch_lsx_vssrlrni_d_q: 2270 case Intrinsic::loongarch_lsx_vssrarni_d_q: 2271 case Intrinsic::loongarch_lsx_vssrlrni_du_q: 2272 case Intrinsic::loongarch_lsx_vssrarni_du_q: 2273 case Intrinsic::loongarch_lasx_xvsrlni_d_q: 2274 case Intrinsic::loongarch_lasx_xvsrani_d_q: 2275 case Intrinsic::loongarch_lasx_xvsrlrni_d_q: 2276 case Intrinsic::loongarch_lasx_xvsrarni_d_q: 2277 case Intrinsic::loongarch_lasx_xvssrlni_d_q: 2278 case Intrinsic::loongarch_lasx_xvssrani_d_q: 2279 case Intrinsic::loongarch_lasx_xvssrlni_du_q: 2280 case Intrinsic::loongarch_lasx_xvssrani_du_q: 2281 case Intrinsic::loongarch_lasx_xvssrlrni_d_q: 2282 case Intrinsic::loongarch_lasx_xvssrarni_d_q: 2283 case Intrinsic::loongarch_lasx_xvssrlrni_du_q: 2284 case Intrinsic::loongarch_lasx_xvssrarni_du_q: 2285 return checkIntrinsicImmArg<7>(Op, 3, DAG); 2286 case Intrinsic::loongarch_lsx_vnori_b: 2287 case Intrinsic::loongarch_lsx_vshuf4i_b: 2288 case Intrinsic::loongarch_lsx_vshuf4i_h: 2289 case Intrinsic::loongarch_lsx_vshuf4i_w: 2290 case Intrinsic::loongarch_lasx_xvnori_b: 2291 case Intrinsic::loongarch_lasx_xvshuf4i_b: 2292 case Intrinsic::loongarch_lasx_xvshuf4i_h: 2293 case Intrinsic::loongarch_lasx_xvshuf4i_w: 2294 case Intrinsic::loongarch_lasx_xvpermi_d: 2295 return checkIntrinsicImmArg<8>(Op, 2, DAG); 2296 case Intrinsic::loongarch_lsx_vshuf4i_d: 2297 case Intrinsic::loongarch_lsx_vpermi_w: 2298 case Intrinsic::loongarch_lsx_vbitseli_b: 2299 case Intrinsic::loongarch_lsx_vextrins_b: 2300 case Intrinsic::loongarch_lsx_vextrins_h: 2301 case Intrinsic::loongarch_lsx_vextrins_w: 2302 case Intrinsic::loongarch_lsx_vextrins_d: 2303 case Intrinsic::loongarch_lasx_xvshuf4i_d: 2304 case Intrinsic::loongarch_lasx_xvpermi_w: 2305 case Intrinsic::loongarch_lasx_xvpermi_q: 2306 case Intrinsic::loongarch_lasx_xvbitseli_b: 2307 case Intrinsic::loongarch_lasx_xvextrins_b: 2308 case Intrinsic::loongarch_lasx_xvextrins_h: 2309 case Intrinsic::loongarch_lasx_xvextrins_w: 2310 case Intrinsic::loongarch_lasx_xvextrins_d: 2311 return checkIntrinsicImmArg<8>(Op, 3, DAG); 2312 case Intrinsic::loongarch_lsx_vrepli_b: 2313 case Intrinsic::loongarch_lsx_vrepli_h: 2314 case Intrinsic::loongarch_lsx_vrepli_w: 2315 case Intrinsic::loongarch_lsx_vrepli_d: 2316 case Intrinsic::loongarch_lasx_xvrepli_b: 2317 case Intrinsic::loongarch_lasx_xvrepli_h: 2318 case Intrinsic::loongarch_lasx_xvrepli_w: 2319 case Intrinsic::loongarch_lasx_xvrepli_d: 2320 return checkIntrinsicImmArg<10>(Op, 1, DAG, /*IsSigned=*/true); 2321 case Intrinsic::loongarch_lsx_vldi: 2322 case Intrinsic::loongarch_lasx_xvldi: 2323 return checkIntrinsicImmArg<13>(Op, 1, DAG, /*IsSigned=*/true); 2324 } 2325 } 2326 2327 // Helper function that emits error message for intrinsics with chain and return 2328 // merge values of a UNDEF and the chain. 2329 static SDValue emitIntrinsicWithChainErrorMessage(SDValue Op, 2330 StringRef ErrorMsg, 2331 SelectionDAG &DAG) { 2332 DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + "."); 2333 return DAG.getMergeValues({DAG.getUNDEF(Op.getValueType()), Op.getOperand(0)}, 2334 SDLoc(Op)); 2335 } 2336 2337 SDValue 2338 LoongArchTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op, 2339 SelectionDAG &DAG) const { 2340 SDLoc DL(Op); 2341 MVT GRLenVT = Subtarget.getGRLenVT(); 2342 EVT VT = Op.getValueType(); 2343 SDValue Chain = Op.getOperand(0); 2344 const StringRef ErrorMsgOOR = "argument out of range"; 2345 const StringRef ErrorMsgReqLA64 = "requires loongarch64"; 2346 const StringRef ErrorMsgReqF = "requires basic 'f' target feature"; 2347 2348 switch (Op.getConstantOperandVal(1)) { 2349 default: 2350 return Op; 2351 case Intrinsic::loongarch_crc_w_b_w: 2352 case Intrinsic::loongarch_crc_w_h_w: 2353 case Intrinsic::loongarch_crc_w_w_w: 2354 case Intrinsic::loongarch_crc_w_d_w: 2355 case Intrinsic::loongarch_crcc_w_b_w: 2356 case Intrinsic::loongarch_crcc_w_h_w: 2357 case Intrinsic::loongarch_crcc_w_w_w: 2358 case Intrinsic::loongarch_crcc_w_d_w: 2359 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqLA64, DAG); 2360 case Intrinsic::loongarch_csrrd_w: 2361 case Intrinsic::loongarch_csrrd_d: { 2362 unsigned Imm = Op.getConstantOperandVal(2); 2363 return !isUInt<14>(Imm) 2364 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG) 2365 : DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other}, 2366 {Chain, DAG.getConstant(Imm, DL, GRLenVT)}); 2367 } 2368 case Intrinsic::loongarch_csrwr_w: 2369 case Intrinsic::loongarch_csrwr_d: { 2370 unsigned Imm = Op.getConstantOperandVal(3); 2371 return !isUInt<14>(Imm) 2372 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG) 2373 : DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other}, 2374 {Chain, Op.getOperand(2), 2375 DAG.getConstant(Imm, DL, GRLenVT)}); 2376 } 2377 case Intrinsic::loongarch_csrxchg_w: 2378 case Intrinsic::loongarch_csrxchg_d: { 2379 unsigned Imm = Op.getConstantOperandVal(4); 2380 return !isUInt<14>(Imm) 2381 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG) 2382 : DAG.getNode(LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other}, 2383 {Chain, Op.getOperand(2), Op.getOperand(3), 2384 DAG.getConstant(Imm, DL, GRLenVT)}); 2385 } 2386 case Intrinsic::loongarch_iocsrrd_d: { 2387 return DAG.getNode( 2388 LoongArchISD::IOCSRRD_D, DL, {GRLenVT, MVT::Other}, 2389 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2))}); 2390 } 2391 #define IOCSRRD_CASE(NAME, NODE) \ 2392 case Intrinsic::loongarch_##NAME: { \ 2393 return DAG.getNode(LoongArchISD::NODE, DL, {GRLenVT, MVT::Other}, \ 2394 {Chain, Op.getOperand(2)}); \ 2395 } 2396 IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B); 2397 IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H); 2398 IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W); 2399 #undef IOCSRRD_CASE 2400 case Intrinsic::loongarch_cpucfg: { 2401 return DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other}, 2402 {Chain, Op.getOperand(2)}); 2403 } 2404 case Intrinsic::loongarch_lddir_d: { 2405 unsigned Imm = Op.getConstantOperandVal(3); 2406 return !isUInt<8>(Imm) 2407 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG) 2408 : Op; 2409 } 2410 case Intrinsic::loongarch_movfcsr2gr: { 2411 if (!Subtarget.hasBasicF()) 2412 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqF, DAG); 2413 unsigned Imm = Op.getConstantOperandVal(2); 2414 return !isUInt<2>(Imm) 2415 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG) 2416 : DAG.getNode(LoongArchISD::MOVFCSR2GR, DL, {VT, MVT::Other}, 2417 {Chain, DAG.getConstant(Imm, DL, GRLenVT)}); 2418 } 2419 case Intrinsic::loongarch_lsx_vld: 2420 case Intrinsic::loongarch_lsx_vldrepl_b: 2421 case Intrinsic::loongarch_lasx_xvld: 2422 case Intrinsic::loongarch_lasx_xvldrepl_b: 2423 return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue()) 2424 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG) 2425 : SDValue(); 2426 case Intrinsic::loongarch_lsx_vldrepl_h: 2427 case Intrinsic::loongarch_lasx_xvldrepl_h: 2428 return !isShiftedInt<11, 1>( 2429 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue()) 2430 ? emitIntrinsicWithChainErrorMessage( 2431 Op, "argument out of range or not a multiple of 2", DAG) 2432 : SDValue(); 2433 case Intrinsic::loongarch_lsx_vldrepl_w: 2434 case Intrinsic::loongarch_lasx_xvldrepl_w: 2435 return !isShiftedInt<10, 2>( 2436 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue()) 2437 ? emitIntrinsicWithChainErrorMessage( 2438 Op, "argument out of range or not a multiple of 4", DAG) 2439 : SDValue(); 2440 case Intrinsic::loongarch_lsx_vldrepl_d: 2441 case Intrinsic::loongarch_lasx_xvldrepl_d: 2442 return !isShiftedInt<9, 3>( 2443 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue()) 2444 ? emitIntrinsicWithChainErrorMessage( 2445 Op, "argument out of range or not a multiple of 8", DAG) 2446 : SDValue(); 2447 } 2448 } 2449 2450 // Helper function that emits error message for intrinsics with void return 2451 // value and return the chain. 2452 static SDValue emitIntrinsicErrorMessage(SDValue Op, StringRef ErrorMsg, 2453 SelectionDAG &DAG) { 2454 2455 DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + "."); 2456 return Op.getOperand(0); 2457 } 2458 2459 SDValue LoongArchTargetLowering::lowerINTRINSIC_VOID(SDValue Op, 2460 SelectionDAG &DAG) const { 2461 SDLoc DL(Op); 2462 MVT GRLenVT = Subtarget.getGRLenVT(); 2463 SDValue Chain = Op.getOperand(0); 2464 uint64_t IntrinsicEnum = Op.getConstantOperandVal(1); 2465 SDValue Op2 = Op.getOperand(2); 2466 const StringRef ErrorMsgOOR = "argument out of range"; 2467 const StringRef ErrorMsgReqLA64 = "requires loongarch64"; 2468 const StringRef ErrorMsgReqLA32 = "requires loongarch32"; 2469 const StringRef ErrorMsgReqF = "requires basic 'f' target feature"; 2470 2471 switch (IntrinsicEnum) { 2472 default: 2473 // TODO: Add more Intrinsics. 2474 return SDValue(); 2475 case Intrinsic::loongarch_cacop_d: 2476 case Intrinsic::loongarch_cacop_w: { 2477 if (IntrinsicEnum == Intrinsic::loongarch_cacop_d && !Subtarget.is64Bit()) 2478 return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG); 2479 if (IntrinsicEnum == Intrinsic::loongarch_cacop_w && Subtarget.is64Bit()) 2480 return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA32, DAG); 2481 // call void @llvm.loongarch.cacop.[d/w](uimm5, rj, simm12) 2482 unsigned Imm1 = Op2->getAsZExtVal(); 2483 int Imm2 = cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue(); 2484 if (!isUInt<5>(Imm1) || !isInt<12>(Imm2)) 2485 return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG); 2486 return Op; 2487 } 2488 case Intrinsic::loongarch_dbar: { 2489 unsigned Imm = Op2->getAsZExtVal(); 2490 return !isUInt<15>(Imm) 2491 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG) 2492 : DAG.getNode(LoongArchISD::DBAR, DL, MVT::Other, Chain, 2493 DAG.getConstant(Imm, DL, GRLenVT)); 2494 } 2495 case Intrinsic::loongarch_ibar: { 2496 unsigned Imm = Op2->getAsZExtVal(); 2497 return !isUInt<15>(Imm) 2498 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG) 2499 : DAG.getNode(LoongArchISD::IBAR, DL, MVT::Other, Chain, 2500 DAG.getConstant(Imm, DL, GRLenVT)); 2501 } 2502 case Intrinsic::loongarch_break: { 2503 unsigned Imm = Op2->getAsZExtVal(); 2504 return !isUInt<15>(Imm) 2505 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG) 2506 : DAG.getNode(LoongArchISD::BREAK, DL, MVT::Other, Chain, 2507 DAG.getConstant(Imm, DL, GRLenVT)); 2508 } 2509 case Intrinsic::loongarch_movgr2fcsr: { 2510 if (!Subtarget.hasBasicF()) 2511 return emitIntrinsicErrorMessage(Op, ErrorMsgReqF, DAG); 2512 unsigned Imm = Op2->getAsZExtVal(); 2513 return !isUInt<2>(Imm) 2514 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG) 2515 : DAG.getNode(LoongArchISD::MOVGR2FCSR, DL, MVT::Other, Chain, 2516 DAG.getConstant(Imm, DL, GRLenVT), 2517 DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, 2518 Op.getOperand(3))); 2519 } 2520 case Intrinsic::loongarch_syscall: { 2521 unsigned Imm = Op2->getAsZExtVal(); 2522 return !isUInt<15>(Imm) 2523 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG) 2524 : DAG.getNode(LoongArchISD::SYSCALL, DL, MVT::Other, Chain, 2525 DAG.getConstant(Imm, DL, GRLenVT)); 2526 } 2527 #define IOCSRWR_CASE(NAME, NODE) \ 2528 case Intrinsic::loongarch_##NAME: { \ 2529 SDValue Op3 = Op.getOperand(3); \ 2530 return Subtarget.is64Bit() \ 2531 ? DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, \ 2532 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \ 2533 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op3)) \ 2534 : DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, Op2, \ 2535 Op3); \ 2536 } 2537 IOCSRWR_CASE(iocsrwr_b, IOCSRWR_B); 2538 IOCSRWR_CASE(iocsrwr_h, IOCSRWR_H); 2539 IOCSRWR_CASE(iocsrwr_w, IOCSRWR_W); 2540 #undef IOCSRWR_CASE 2541 case Intrinsic::loongarch_iocsrwr_d: { 2542 return !Subtarget.is64Bit() 2543 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG) 2544 : DAG.getNode(LoongArchISD::IOCSRWR_D, DL, MVT::Other, Chain, 2545 Op2, 2546 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, 2547 Op.getOperand(3))); 2548 } 2549 #define ASRT_LE_GT_CASE(NAME) \ 2550 case Intrinsic::loongarch_##NAME: { \ 2551 return !Subtarget.is64Bit() \ 2552 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG) \ 2553 : Op; \ 2554 } 2555 ASRT_LE_GT_CASE(asrtle_d) 2556 ASRT_LE_GT_CASE(asrtgt_d) 2557 #undef ASRT_LE_GT_CASE 2558 case Intrinsic::loongarch_ldpte_d: { 2559 unsigned Imm = Op.getConstantOperandVal(3); 2560 return !Subtarget.is64Bit() 2561 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG) 2562 : !isUInt<8>(Imm) ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG) 2563 : Op; 2564 } 2565 case Intrinsic::loongarch_lsx_vst: 2566 case Intrinsic::loongarch_lasx_xvst: 2567 return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) 2568 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG) 2569 : SDValue(); 2570 case Intrinsic::loongarch_lasx_xvstelm_b: 2571 return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) || 2572 !isUInt<5>(Op.getConstantOperandVal(5))) 2573 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG) 2574 : SDValue(); 2575 case Intrinsic::loongarch_lsx_vstelm_b: 2576 return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) || 2577 !isUInt<4>(Op.getConstantOperandVal(5))) 2578 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG) 2579 : SDValue(); 2580 case Intrinsic::loongarch_lasx_xvstelm_h: 2581 return (!isShiftedInt<8, 1>( 2582 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) || 2583 !isUInt<4>(Op.getConstantOperandVal(5))) 2584 ? emitIntrinsicErrorMessage( 2585 Op, "argument out of range or not a multiple of 2", DAG) 2586 : SDValue(); 2587 case Intrinsic::loongarch_lsx_vstelm_h: 2588 return (!isShiftedInt<8, 1>( 2589 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) || 2590 !isUInt<3>(Op.getConstantOperandVal(5))) 2591 ? emitIntrinsicErrorMessage( 2592 Op, "argument out of range or not a multiple of 2", DAG) 2593 : SDValue(); 2594 case Intrinsic::loongarch_lasx_xvstelm_w: 2595 return (!isShiftedInt<8, 2>( 2596 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) || 2597 !isUInt<3>(Op.getConstantOperandVal(5))) 2598 ? emitIntrinsicErrorMessage( 2599 Op, "argument out of range or not a multiple of 4", DAG) 2600 : SDValue(); 2601 case Intrinsic::loongarch_lsx_vstelm_w: 2602 return (!isShiftedInt<8, 2>( 2603 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) || 2604 !isUInt<2>(Op.getConstantOperandVal(5))) 2605 ? emitIntrinsicErrorMessage( 2606 Op, "argument out of range or not a multiple of 4", DAG) 2607 : SDValue(); 2608 case Intrinsic::loongarch_lasx_xvstelm_d: 2609 return (!isShiftedInt<8, 3>( 2610 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) || 2611 !isUInt<2>(Op.getConstantOperandVal(5))) 2612 ? emitIntrinsicErrorMessage( 2613 Op, "argument out of range or not a multiple of 8", DAG) 2614 : SDValue(); 2615 case Intrinsic::loongarch_lsx_vstelm_d: 2616 return (!isShiftedInt<8, 3>( 2617 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) || 2618 !isUInt<1>(Op.getConstantOperandVal(5))) 2619 ? emitIntrinsicErrorMessage( 2620 Op, "argument out of range or not a multiple of 8", DAG) 2621 : SDValue(); 2622 } 2623 } 2624 2625 SDValue LoongArchTargetLowering::lowerShiftLeftParts(SDValue Op, 2626 SelectionDAG &DAG) const { 2627 SDLoc DL(Op); 2628 SDValue Lo = Op.getOperand(0); 2629 SDValue Hi = Op.getOperand(1); 2630 SDValue Shamt = Op.getOperand(2); 2631 EVT VT = Lo.getValueType(); 2632 2633 // if Shamt-GRLen < 0: // Shamt < GRLen 2634 // Lo = Lo << Shamt 2635 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (GRLen-1 ^ Shamt)) 2636 // else: 2637 // Lo = 0 2638 // Hi = Lo << (Shamt-GRLen) 2639 2640 SDValue Zero = DAG.getConstant(0, DL, VT); 2641 SDValue One = DAG.getConstant(1, DL, VT); 2642 SDValue MinusGRLen = 2643 DAG.getSignedConstant(-(int)Subtarget.getGRLen(), DL, VT); 2644 SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT); 2645 SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen); 2646 SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1); 2647 2648 SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt); 2649 SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One); 2650 SDValue ShiftRightLo = 2651 DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, GRLenMinus1Shamt); 2652 SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt); 2653 SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo); 2654 SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusGRLen); 2655 2656 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT); 2657 2658 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero); 2659 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse); 2660 2661 SDValue Parts[2] = {Lo, Hi}; 2662 return DAG.getMergeValues(Parts, DL); 2663 } 2664 2665 SDValue LoongArchTargetLowering::lowerShiftRightParts(SDValue Op, 2666 SelectionDAG &DAG, 2667 bool IsSRA) const { 2668 SDLoc DL(Op); 2669 SDValue Lo = Op.getOperand(0); 2670 SDValue Hi = Op.getOperand(1); 2671 SDValue Shamt = Op.getOperand(2); 2672 EVT VT = Lo.getValueType(); 2673 2674 // SRA expansion: 2675 // if Shamt-GRLen < 0: // Shamt < GRLen 2676 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1)) 2677 // Hi = Hi >>s Shamt 2678 // else: 2679 // Lo = Hi >>s (Shamt-GRLen); 2680 // Hi = Hi >>s (GRLen-1) 2681 // 2682 // SRL expansion: 2683 // if Shamt-GRLen < 0: // Shamt < GRLen 2684 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1)) 2685 // Hi = Hi >>u Shamt 2686 // else: 2687 // Lo = Hi >>u (Shamt-GRLen); 2688 // Hi = 0; 2689 2690 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL; 2691 2692 SDValue Zero = DAG.getConstant(0, DL, VT); 2693 SDValue One = DAG.getConstant(1, DL, VT); 2694 SDValue MinusGRLen = 2695 DAG.getSignedConstant(-(int)Subtarget.getGRLen(), DL, VT); 2696 SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT); 2697 SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen); 2698 SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1); 2699 2700 SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt); 2701 SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One); 2702 SDValue ShiftLeftHi = 2703 DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, GRLenMinus1Shamt); 2704 SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi); 2705 SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt); 2706 SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusGRLen); 2707 SDValue HiFalse = 2708 IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, GRLenMinus1) : Zero; 2709 2710 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT); 2711 2712 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse); 2713 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse); 2714 2715 SDValue Parts[2] = {Lo, Hi}; 2716 return DAG.getMergeValues(Parts, DL); 2717 } 2718 2719 // Returns the opcode of the target-specific SDNode that implements the 32-bit 2720 // form of the given Opcode. 2721 static LoongArchISD::NodeType getLoongArchWOpcode(unsigned Opcode) { 2722 switch (Opcode) { 2723 default: 2724 llvm_unreachable("Unexpected opcode"); 2725 case ISD::SDIV: 2726 return LoongArchISD::DIV_W; 2727 case ISD::UDIV: 2728 return LoongArchISD::DIV_WU; 2729 case ISD::SREM: 2730 return LoongArchISD::MOD_W; 2731 case ISD::UREM: 2732 return LoongArchISD::MOD_WU; 2733 case ISD::SHL: 2734 return LoongArchISD::SLL_W; 2735 case ISD::SRA: 2736 return LoongArchISD::SRA_W; 2737 case ISD::SRL: 2738 return LoongArchISD::SRL_W; 2739 case ISD::ROTL: 2740 case ISD::ROTR: 2741 return LoongArchISD::ROTR_W; 2742 case ISD::CTTZ: 2743 return LoongArchISD::CTZ_W; 2744 case ISD::CTLZ: 2745 return LoongArchISD::CLZ_W; 2746 } 2747 } 2748 2749 // Converts the given i8/i16/i32 operation to a target-specific SelectionDAG 2750 // node. Because i8/i16/i32 isn't a legal type for LA64, these operations would 2751 // otherwise be promoted to i64, making it difficult to select the 2752 // SLL_W/.../*W later one because the fact the operation was originally of 2753 // type i8/i16/i32 is lost. 2754 static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp, 2755 unsigned ExtOpc = ISD::ANY_EXTEND) { 2756 SDLoc DL(N); 2757 LoongArchISD::NodeType WOpcode = getLoongArchWOpcode(N->getOpcode()); 2758 SDValue NewOp0, NewRes; 2759 2760 switch (NumOp) { 2761 default: 2762 llvm_unreachable("Unexpected NumOp"); 2763 case 1: { 2764 NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0)); 2765 NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0); 2766 break; 2767 } 2768 case 2: { 2769 NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0)); 2770 SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1)); 2771 if (N->getOpcode() == ISD::ROTL) { 2772 SDValue TmpOp = DAG.getConstant(32, DL, MVT::i64); 2773 NewOp1 = DAG.getNode(ISD::SUB, DL, MVT::i64, TmpOp, NewOp1); 2774 } 2775 NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1); 2776 break; 2777 } 2778 // TODO:Handle more NumOp. 2779 } 2780 2781 // ReplaceNodeResults requires we maintain the same type for the return 2782 // value. 2783 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes); 2784 } 2785 2786 // Converts the given 32-bit operation to a i64 operation with signed extension 2787 // semantic to reduce the signed extension instructions. 2788 static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG) { 2789 SDLoc DL(N); 2790 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); 2791 SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1)); 2792 SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1); 2793 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp, 2794 DAG.getValueType(MVT::i32)); 2795 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes); 2796 } 2797 2798 // Helper function that emits error message for intrinsics with/without chain 2799 // and return a UNDEF or and the chain as the results. 2800 static void emitErrorAndReplaceIntrinsicResults( 2801 SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG, 2802 StringRef ErrorMsg, bool WithChain = true) { 2803 DAG.getContext()->emitError(N->getOperationName(0) + ": " + ErrorMsg + "."); 2804 Results.push_back(DAG.getUNDEF(N->getValueType(0))); 2805 if (!WithChain) 2806 return; 2807 Results.push_back(N->getOperand(0)); 2808 } 2809 2810 template <unsigned N> 2811 static void 2812 replaceVPICKVE2GRResults(SDNode *Node, SmallVectorImpl<SDValue> &Results, 2813 SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, 2814 unsigned ResOp) { 2815 const StringRef ErrorMsgOOR = "argument out of range"; 2816 unsigned Imm = Node->getConstantOperandVal(2); 2817 if (!isUInt<N>(Imm)) { 2818 emitErrorAndReplaceIntrinsicResults(Node, Results, DAG, ErrorMsgOOR, 2819 /*WithChain=*/false); 2820 return; 2821 } 2822 SDLoc DL(Node); 2823 SDValue Vec = Node->getOperand(1); 2824 2825 SDValue PickElt = 2826 DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec, 2827 DAG.getConstant(Imm, DL, Subtarget.getGRLenVT()), 2828 DAG.getValueType(Vec.getValueType().getVectorElementType())); 2829 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, Node->getValueType(0), 2830 PickElt.getValue(0))); 2831 } 2832 2833 static void replaceVecCondBranchResults(SDNode *N, 2834 SmallVectorImpl<SDValue> &Results, 2835 SelectionDAG &DAG, 2836 const LoongArchSubtarget &Subtarget, 2837 unsigned ResOp) { 2838 SDLoc DL(N); 2839 SDValue Vec = N->getOperand(1); 2840 2841 SDValue CB = DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec); 2842 Results.push_back( 2843 DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), CB.getValue(0))); 2844 } 2845 2846 static void 2847 replaceINTRINSIC_WO_CHAINResults(SDNode *N, SmallVectorImpl<SDValue> &Results, 2848 SelectionDAG &DAG, 2849 const LoongArchSubtarget &Subtarget) { 2850 switch (N->getConstantOperandVal(0)) { 2851 default: 2852 llvm_unreachable("Unexpected Intrinsic."); 2853 case Intrinsic::loongarch_lsx_vpickve2gr_b: 2854 replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget, 2855 LoongArchISD::VPICK_SEXT_ELT); 2856 break; 2857 case Intrinsic::loongarch_lsx_vpickve2gr_h: 2858 case Intrinsic::loongarch_lasx_xvpickve2gr_w: 2859 replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget, 2860 LoongArchISD::VPICK_SEXT_ELT); 2861 break; 2862 case Intrinsic::loongarch_lsx_vpickve2gr_w: 2863 replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget, 2864 LoongArchISD::VPICK_SEXT_ELT); 2865 break; 2866 case Intrinsic::loongarch_lsx_vpickve2gr_bu: 2867 replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget, 2868 LoongArchISD::VPICK_ZEXT_ELT); 2869 break; 2870 case Intrinsic::loongarch_lsx_vpickve2gr_hu: 2871 case Intrinsic::loongarch_lasx_xvpickve2gr_wu: 2872 replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget, 2873 LoongArchISD::VPICK_ZEXT_ELT); 2874 break; 2875 case Intrinsic::loongarch_lsx_vpickve2gr_wu: 2876 replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget, 2877 LoongArchISD::VPICK_ZEXT_ELT); 2878 break; 2879 case Intrinsic::loongarch_lsx_bz_b: 2880 case Intrinsic::loongarch_lsx_bz_h: 2881 case Intrinsic::loongarch_lsx_bz_w: 2882 case Intrinsic::loongarch_lsx_bz_d: 2883 case Intrinsic::loongarch_lasx_xbz_b: 2884 case Intrinsic::loongarch_lasx_xbz_h: 2885 case Intrinsic::loongarch_lasx_xbz_w: 2886 case Intrinsic::loongarch_lasx_xbz_d: 2887 replaceVecCondBranchResults(N, Results, DAG, Subtarget, 2888 LoongArchISD::VALL_ZERO); 2889 break; 2890 case Intrinsic::loongarch_lsx_bz_v: 2891 case Intrinsic::loongarch_lasx_xbz_v: 2892 replaceVecCondBranchResults(N, Results, DAG, Subtarget, 2893 LoongArchISD::VANY_ZERO); 2894 break; 2895 case Intrinsic::loongarch_lsx_bnz_b: 2896 case Intrinsic::loongarch_lsx_bnz_h: 2897 case Intrinsic::loongarch_lsx_bnz_w: 2898 case Intrinsic::loongarch_lsx_bnz_d: 2899 case Intrinsic::loongarch_lasx_xbnz_b: 2900 case Intrinsic::loongarch_lasx_xbnz_h: 2901 case Intrinsic::loongarch_lasx_xbnz_w: 2902 case Intrinsic::loongarch_lasx_xbnz_d: 2903 replaceVecCondBranchResults(N, Results, DAG, Subtarget, 2904 LoongArchISD::VALL_NONZERO); 2905 break; 2906 case Intrinsic::loongarch_lsx_bnz_v: 2907 case Intrinsic::loongarch_lasx_xbnz_v: 2908 replaceVecCondBranchResults(N, Results, DAG, Subtarget, 2909 LoongArchISD::VANY_NONZERO); 2910 break; 2911 } 2912 } 2913 2914 static void replaceCMP_XCHG_128Results(SDNode *N, 2915 SmallVectorImpl<SDValue> &Results, 2916 SelectionDAG &DAG) { 2917 assert(N->getValueType(0) == MVT::i128 && 2918 "AtomicCmpSwap on types less than 128 should be legal"); 2919 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand(); 2920 2921 unsigned Opcode; 2922 switch (MemOp->getMergedOrdering()) { 2923 case AtomicOrdering::Acquire: 2924 case AtomicOrdering::AcquireRelease: 2925 case AtomicOrdering::SequentiallyConsistent: 2926 Opcode = LoongArch::PseudoCmpXchg128Acquire; 2927 break; 2928 case AtomicOrdering::Monotonic: 2929 case AtomicOrdering::Release: 2930 Opcode = LoongArch::PseudoCmpXchg128; 2931 break; 2932 default: 2933 llvm_unreachable("Unexpected ordering!"); 2934 } 2935 2936 SDLoc DL(N); 2937 auto CmpVal = DAG.SplitScalar(N->getOperand(2), DL, MVT::i64, MVT::i64); 2938 auto NewVal = DAG.SplitScalar(N->getOperand(3), DL, MVT::i64, MVT::i64); 2939 SDValue Ops[] = {N->getOperand(1), CmpVal.first, CmpVal.second, 2940 NewVal.first, NewVal.second, N->getOperand(0)}; 2941 2942 SDNode *CmpSwap = DAG.getMachineNode( 2943 Opcode, SDLoc(N), DAG.getVTList(MVT::i64, MVT::i64, MVT::i64, MVT::Other), 2944 Ops); 2945 DAG.setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp}); 2946 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128, 2947 SDValue(CmpSwap, 0), SDValue(CmpSwap, 1))); 2948 Results.push_back(SDValue(CmpSwap, 3)); 2949 } 2950 2951 void LoongArchTargetLowering::ReplaceNodeResults( 2952 SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const { 2953 SDLoc DL(N); 2954 EVT VT = N->getValueType(0); 2955 switch (N->getOpcode()) { 2956 default: 2957 llvm_unreachable("Don't know how to legalize this operation"); 2958 case ISD::ADD: 2959 case ISD::SUB: 2960 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 2961 "Unexpected custom legalisation"); 2962 Results.push_back(customLegalizeToWOpWithSExt(N, DAG)); 2963 break; 2964 case ISD::SDIV: 2965 case ISD::UDIV: 2966 case ISD::SREM: 2967 case ISD::UREM: 2968 assert(VT == MVT::i32 && Subtarget.is64Bit() && 2969 "Unexpected custom legalisation"); 2970 Results.push_back(customLegalizeToWOp(N, DAG, 2, 2971 Subtarget.hasDiv32() && VT == MVT::i32 2972 ? ISD::ANY_EXTEND 2973 : ISD::SIGN_EXTEND)); 2974 break; 2975 case ISD::SHL: 2976 case ISD::SRA: 2977 case ISD::SRL: 2978 assert(VT == MVT::i32 && Subtarget.is64Bit() && 2979 "Unexpected custom legalisation"); 2980 if (N->getOperand(1).getOpcode() != ISD::Constant) { 2981 Results.push_back(customLegalizeToWOp(N, DAG, 2)); 2982 break; 2983 } 2984 break; 2985 case ISD::ROTL: 2986 case ISD::ROTR: 2987 assert(VT == MVT::i32 && Subtarget.is64Bit() && 2988 "Unexpected custom legalisation"); 2989 Results.push_back(customLegalizeToWOp(N, DAG, 2)); 2990 break; 2991 case ISD::FP_TO_SINT: { 2992 assert(VT == MVT::i32 && Subtarget.is64Bit() && 2993 "Unexpected custom legalisation"); 2994 SDValue Src = N->getOperand(0); 2995 EVT FVT = EVT::getFloatingPointVT(N->getValueSizeInBits(0)); 2996 if (getTypeAction(*DAG.getContext(), Src.getValueType()) != 2997 TargetLowering::TypeSoftenFloat) { 2998 if (Src.getValueType() == MVT::f16) 2999 Src = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Src); 3000 SDValue Dst = DAG.getNode(LoongArchISD::FTINT, DL, FVT, Src); 3001 Results.push_back(DAG.getNode(ISD::BITCAST, DL, VT, Dst)); 3002 return; 3003 } 3004 // If the FP type needs to be softened, emit a library call using the 'si' 3005 // version. If we left it to default legalization we'd end up with 'di'. 3006 RTLIB::Libcall LC; 3007 LC = RTLIB::getFPTOSINT(Src.getValueType(), VT); 3008 MakeLibCallOptions CallOptions; 3009 EVT OpVT = Src.getValueType(); 3010 CallOptions.setTypeListBeforeSoften(OpVT, VT, true); 3011 SDValue Chain = SDValue(); 3012 SDValue Result; 3013 std::tie(Result, Chain) = 3014 makeLibCall(DAG, LC, VT, Src, CallOptions, DL, Chain); 3015 Results.push_back(Result); 3016 break; 3017 } 3018 case ISD::BITCAST: { 3019 SDValue Src = N->getOperand(0); 3020 EVT SrcVT = Src.getValueType(); 3021 if (VT == MVT::i32 && SrcVT == MVT::f32 && Subtarget.is64Bit() && 3022 Subtarget.hasBasicF()) { 3023 SDValue Dst = 3024 DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Src); 3025 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Dst)); 3026 } 3027 break; 3028 } 3029 case ISD::FP_TO_UINT: { 3030 assert(VT == MVT::i32 && Subtarget.is64Bit() && 3031 "Unexpected custom legalisation"); 3032 auto &TLI = DAG.getTargetLoweringInfo(); 3033 SDValue Tmp1, Tmp2; 3034 TLI.expandFP_TO_UINT(N, Tmp1, Tmp2, DAG); 3035 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Tmp1)); 3036 break; 3037 } 3038 case ISD::BSWAP: { 3039 SDValue Src = N->getOperand(0); 3040 assert((VT == MVT::i16 || VT == MVT::i32) && 3041 "Unexpected custom legalization"); 3042 MVT GRLenVT = Subtarget.getGRLenVT(); 3043 SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src); 3044 SDValue Tmp; 3045 switch (VT.getSizeInBits()) { 3046 default: 3047 llvm_unreachable("Unexpected operand width"); 3048 case 16: 3049 Tmp = DAG.getNode(LoongArchISD::REVB_2H, DL, GRLenVT, NewSrc); 3050 break; 3051 case 32: 3052 // Only LA64 will get to here due to the size mismatch between VT and 3053 // GRLenVT, LA32 lowering is directly defined in LoongArchInstrInfo. 3054 Tmp = DAG.getNode(LoongArchISD::REVB_2W, DL, GRLenVT, NewSrc); 3055 break; 3056 } 3057 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp)); 3058 break; 3059 } 3060 case ISD::BITREVERSE: { 3061 SDValue Src = N->getOperand(0); 3062 assert((VT == MVT::i8 || (VT == MVT::i32 && Subtarget.is64Bit())) && 3063 "Unexpected custom legalization"); 3064 MVT GRLenVT = Subtarget.getGRLenVT(); 3065 SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src); 3066 SDValue Tmp; 3067 switch (VT.getSizeInBits()) { 3068 default: 3069 llvm_unreachable("Unexpected operand width"); 3070 case 8: 3071 Tmp = DAG.getNode(LoongArchISD::BITREV_4B, DL, GRLenVT, NewSrc); 3072 break; 3073 case 32: 3074 Tmp = DAG.getNode(LoongArchISD::BITREV_W, DL, GRLenVT, NewSrc); 3075 break; 3076 } 3077 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp)); 3078 break; 3079 } 3080 case ISD::CTLZ: 3081 case ISD::CTTZ: { 3082 assert(VT == MVT::i32 && Subtarget.is64Bit() && 3083 "Unexpected custom legalisation"); 3084 Results.push_back(customLegalizeToWOp(N, DAG, 1)); 3085 break; 3086 } 3087 case ISD::INTRINSIC_W_CHAIN: { 3088 SDValue Chain = N->getOperand(0); 3089 SDValue Op2 = N->getOperand(2); 3090 MVT GRLenVT = Subtarget.getGRLenVT(); 3091 const StringRef ErrorMsgOOR = "argument out of range"; 3092 const StringRef ErrorMsgReqLA64 = "requires loongarch64"; 3093 const StringRef ErrorMsgReqF = "requires basic 'f' target feature"; 3094 3095 switch (N->getConstantOperandVal(1)) { 3096 default: 3097 llvm_unreachable("Unexpected Intrinsic."); 3098 case Intrinsic::loongarch_movfcsr2gr: { 3099 if (!Subtarget.hasBasicF()) { 3100 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqF); 3101 return; 3102 } 3103 unsigned Imm = Op2->getAsZExtVal(); 3104 if (!isUInt<2>(Imm)) { 3105 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR); 3106 return; 3107 } 3108 SDValue MOVFCSR2GRResults = DAG.getNode( 3109 LoongArchISD::MOVFCSR2GR, SDLoc(N), {MVT::i64, MVT::Other}, 3110 {Chain, DAG.getConstant(Imm, DL, GRLenVT)}); 3111 Results.push_back( 3112 DAG.getNode(ISD::TRUNCATE, DL, VT, MOVFCSR2GRResults.getValue(0))); 3113 Results.push_back(MOVFCSR2GRResults.getValue(1)); 3114 break; 3115 } 3116 #define CRC_CASE_EXT_BINARYOP(NAME, NODE) \ 3117 case Intrinsic::loongarch_##NAME: { \ 3118 SDValue NODE = DAG.getNode( \ 3119 LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \ 3120 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \ 3121 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \ 3122 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \ 3123 Results.push_back(NODE.getValue(1)); \ 3124 break; \ 3125 } 3126 CRC_CASE_EXT_BINARYOP(crc_w_b_w, CRC_W_B_W) 3127 CRC_CASE_EXT_BINARYOP(crc_w_h_w, CRC_W_H_W) 3128 CRC_CASE_EXT_BINARYOP(crc_w_w_w, CRC_W_W_W) 3129 CRC_CASE_EXT_BINARYOP(crcc_w_b_w, CRCC_W_B_W) 3130 CRC_CASE_EXT_BINARYOP(crcc_w_h_w, CRCC_W_H_W) 3131 CRC_CASE_EXT_BINARYOP(crcc_w_w_w, CRCC_W_W_W) 3132 #undef CRC_CASE_EXT_BINARYOP 3133 3134 #define CRC_CASE_EXT_UNARYOP(NAME, NODE) \ 3135 case Intrinsic::loongarch_##NAME: { \ 3136 SDValue NODE = DAG.getNode( \ 3137 LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \ 3138 {Chain, Op2, \ 3139 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \ 3140 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \ 3141 Results.push_back(NODE.getValue(1)); \ 3142 break; \ 3143 } 3144 CRC_CASE_EXT_UNARYOP(crc_w_d_w, CRC_W_D_W) 3145 CRC_CASE_EXT_UNARYOP(crcc_w_d_w, CRCC_W_D_W) 3146 #undef CRC_CASE_EXT_UNARYOP 3147 #define CSR_CASE(ID) \ 3148 case Intrinsic::loongarch_##ID: { \ 3149 if (!Subtarget.is64Bit()) \ 3150 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64); \ 3151 break; \ 3152 } 3153 CSR_CASE(csrrd_d); 3154 CSR_CASE(csrwr_d); 3155 CSR_CASE(csrxchg_d); 3156 CSR_CASE(iocsrrd_d); 3157 #undef CSR_CASE 3158 case Intrinsic::loongarch_csrrd_w: { 3159 unsigned Imm = Op2->getAsZExtVal(); 3160 if (!isUInt<14>(Imm)) { 3161 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR); 3162 return; 3163 } 3164 SDValue CSRRDResults = 3165 DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other}, 3166 {Chain, DAG.getConstant(Imm, DL, GRLenVT)}); 3167 Results.push_back( 3168 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRRDResults.getValue(0))); 3169 Results.push_back(CSRRDResults.getValue(1)); 3170 break; 3171 } 3172 case Intrinsic::loongarch_csrwr_w: { 3173 unsigned Imm = N->getConstantOperandVal(3); 3174 if (!isUInt<14>(Imm)) { 3175 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR); 3176 return; 3177 } 3178 SDValue CSRWRResults = 3179 DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other}, 3180 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), 3181 DAG.getConstant(Imm, DL, GRLenVT)}); 3182 Results.push_back( 3183 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRWRResults.getValue(0))); 3184 Results.push_back(CSRWRResults.getValue(1)); 3185 break; 3186 } 3187 case Intrinsic::loongarch_csrxchg_w: { 3188 unsigned Imm = N->getConstantOperandVal(4); 3189 if (!isUInt<14>(Imm)) { 3190 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR); 3191 return; 3192 } 3193 SDValue CSRXCHGResults = DAG.getNode( 3194 LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other}, 3195 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), 3196 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3)), 3197 DAG.getConstant(Imm, DL, GRLenVT)}); 3198 Results.push_back( 3199 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRXCHGResults.getValue(0))); 3200 Results.push_back(CSRXCHGResults.getValue(1)); 3201 break; 3202 } 3203 #define IOCSRRD_CASE(NAME, NODE) \ 3204 case Intrinsic::loongarch_##NAME: { \ 3205 SDValue IOCSRRDResults = \ 3206 DAG.getNode(LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \ 3207 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)}); \ 3208 Results.push_back( \ 3209 DAG.getNode(ISD::TRUNCATE, DL, VT, IOCSRRDResults.getValue(0))); \ 3210 Results.push_back(IOCSRRDResults.getValue(1)); \ 3211 break; \ 3212 } 3213 IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B); 3214 IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H); 3215 IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W); 3216 #undef IOCSRRD_CASE 3217 case Intrinsic::loongarch_cpucfg: { 3218 SDValue CPUCFGResults = 3219 DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other}, 3220 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)}); 3221 Results.push_back( 3222 DAG.getNode(ISD::TRUNCATE, DL, VT, CPUCFGResults.getValue(0))); 3223 Results.push_back(CPUCFGResults.getValue(1)); 3224 break; 3225 } 3226 case Intrinsic::loongarch_lddir_d: { 3227 if (!Subtarget.is64Bit()) { 3228 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64); 3229 return; 3230 } 3231 break; 3232 } 3233 } 3234 break; 3235 } 3236 case ISD::READ_REGISTER: { 3237 if (Subtarget.is64Bit()) 3238 DAG.getContext()->emitError( 3239 "On LA64, only 64-bit registers can be read."); 3240 else 3241 DAG.getContext()->emitError( 3242 "On LA32, only 32-bit registers can be read."); 3243 Results.push_back(DAG.getUNDEF(VT)); 3244 Results.push_back(N->getOperand(0)); 3245 break; 3246 } 3247 case ISD::INTRINSIC_WO_CHAIN: { 3248 replaceINTRINSIC_WO_CHAINResults(N, Results, DAG, Subtarget); 3249 break; 3250 } 3251 case ISD::LROUND: { 3252 SDValue Op0 = N->getOperand(0); 3253 EVT OpVT = Op0.getValueType(); 3254 RTLIB::Libcall LC = 3255 OpVT == MVT::f64 ? RTLIB::LROUND_F64 : RTLIB::LROUND_F32; 3256 MakeLibCallOptions CallOptions; 3257 CallOptions.setTypeListBeforeSoften(OpVT, MVT::i64, true); 3258 SDValue Result = makeLibCall(DAG, LC, MVT::i64, Op0, CallOptions, DL).first; 3259 Result = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Result); 3260 Results.push_back(Result); 3261 break; 3262 } 3263 case ISD::ATOMIC_CMP_SWAP: { 3264 replaceCMP_XCHG_128Results(N, Results, DAG); 3265 break; 3266 } 3267 } 3268 } 3269 3270 static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG, 3271 TargetLowering::DAGCombinerInfo &DCI, 3272 const LoongArchSubtarget &Subtarget) { 3273 if (DCI.isBeforeLegalizeOps()) 3274 return SDValue(); 3275 3276 SDValue FirstOperand = N->getOperand(0); 3277 SDValue SecondOperand = N->getOperand(1); 3278 unsigned FirstOperandOpc = FirstOperand.getOpcode(); 3279 EVT ValTy = N->getValueType(0); 3280 SDLoc DL(N); 3281 uint64_t lsb, msb; 3282 unsigned SMIdx, SMLen; 3283 ConstantSDNode *CN; 3284 SDValue NewOperand; 3285 MVT GRLenVT = Subtarget.getGRLenVT(); 3286 3287 // Op's second operand must be a shifted mask. 3288 if (!(CN = dyn_cast<ConstantSDNode>(SecondOperand)) || 3289 !isShiftedMask_64(CN->getZExtValue(), SMIdx, SMLen)) 3290 return SDValue(); 3291 3292 if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL) { 3293 // Pattern match BSTRPICK. 3294 // $dst = and ((sra or srl) $src , lsb), (2**len - 1) 3295 // => BSTRPICK $dst, $src, msb, lsb 3296 // where msb = lsb + len - 1 3297 3298 // The second operand of the shift must be an immediate. 3299 if (!(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1)))) 3300 return SDValue(); 3301 3302 lsb = CN->getZExtValue(); 3303 3304 // Return if the shifted mask does not start at bit 0 or the sum of its 3305 // length and lsb exceeds the word's size. 3306 if (SMIdx != 0 || lsb + SMLen > ValTy.getSizeInBits()) 3307 return SDValue(); 3308 3309 NewOperand = FirstOperand.getOperand(0); 3310 } else { 3311 // Pattern match BSTRPICK. 3312 // $dst = and $src, (2**len- 1) , if len > 12 3313 // => BSTRPICK $dst, $src, msb, lsb 3314 // where lsb = 0 and msb = len - 1 3315 3316 // If the mask is <= 0xfff, andi can be used instead. 3317 if (CN->getZExtValue() <= 0xfff) 3318 return SDValue(); 3319 3320 // Return if the MSB exceeds. 3321 if (SMIdx + SMLen > ValTy.getSizeInBits()) 3322 return SDValue(); 3323 3324 if (SMIdx > 0) { 3325 // Omit if the constant has more than 2 uses. This a conservative 3326 // decision. Whether it is a win depends on the HW microarchitecture. 3327 // However it should always be better for 1 and 2 uses. 3328 if (CN->use_size() > 2) 3329 return SDValue(); 3330 // Return if the constant can be composed by a single LU12I.W. 3331 if ((CN->getZExtValue() & 0xfff) == 0) 3332 return SDValue(); 3333 // Return if the constand can be composed by a single ADDI with 3334 // the zero register. 3335 if (CN->getSExtValue() >= -2048 && CN->getSExtValue() < 0) 3336 return SDValue(); 3337 } 3338 3339 lsb = SMIdx; 3340 NewOperand = FirstOperand; 3341 } 3342 3343 msb = lsb + SMLen - 1; 3344 SDValue NR0 = DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy, NewOperand, 3345 DAG.getConstant(msb, DL, GRLenVT), 3346 DAG.getConstant(lsb, DL, GRLenVT)); 3347 if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL || lsb == 0) 3348 return NR0; 3349 // Try to optimize to 3350 // bstrpick $Rd, $Rs, msb, lsb 3351 // slli $Rd, $Rd, lsb 3352 return DAG.getNode(ISD::SHL, DL, ValTy, NR0, 3353 DAG.getConstant(lsb, DL, GRLenVT)); 3354 } 3355 3356 static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG, 3357 TargetLowering::DAGCombinerInfo &DCI, 3358 const LoongArchSubtarget &Subtarget) { 3359 if (DCI.isBeforeLegalizeOps()) 3360 return SDValue(); 3361 3362 // $dst = srl (and $src, Mask), Shamt 3363 // => 3364 // BSTRPICK $dst, $src, MaskIdx+MaskLen-1, Shamt 3365 // when Mask is a shifted mask, and MaskIdx <= Shamt <= MaskIdx+MaskLen-1 3366 // 3367 3368 SDValue FirstOperand = N->getOperand(0); 3369 ConstantSDNode *CN; 3370 EVT ValTy = N->getValueType(0); 3371 SDLoc DL(N); 3372 MVT GRLenVT = Subtarget.getGRLenVT(); 3373 unsigned MaskIdx, MaskLen; 3374 uint64_t Shamt; 3375 3376 // The first operand must be an AND and the second operand of the AND must be 3377 // a shifted mask. 3378 if (FirstOperand.getOpcode() != ISD::AND || 3379 !(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))) || 3380 !isShiftedMask_64(CN->getZExtValue(), MaskIdx, MaskLen)) 3381 return SDValue(); 3382 3383 // The second operand (shift amount) must be an immediate. 3384 if (!(CN = dyn_cast<ConstantSDNode>(N->getOperand(1)))) 3385 return SDValue(); 3386 3387 Shamt = CN->getZExtValue(); 3388 if (MaskIdx <= Shamt && Shamt <= MaskIdx + MaskLen - 1) 3389 return DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy, 3390 FirstOperand->getOperand(0), 3391 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT), 3392 DAG.getConstant(Shamt, DL, GRLenVT)); 3393 3394 return SDValue(); 3395 } 3396 3397 static SDValue performORCombine(SDNode *N, SelectionDAG &DAG, 3398 TargetLowering::DAGCombinerInfo &DCI, 3399 const LoongArchSubtarget &Subtarget) { 3400 MVT GRLenVT = Subtarget.getGRLenVT(); 3401 EVT ValTy = N->getValueType(0); 3402 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); 3403 ConstantSDNode *CN0, *CN1; 3404 SDLoc DL(N); 3405 unsigned ValBits = ValTy.getSizeInBits(); 3406 unsigned MaskIdx0, MaskLen0, MaskIdx1, MaskLen1; 3407 unsigned Shamt; 3408 bool SwapAndRetried = false; 3409 3410 if (DCI.isBeforeLegalizeOps()) 3411 return SDValue(); 3412 3413 if (ValBits != 32 && ValBits != 64) 3414 return SDValue(); 3415 3416 Retry: 3417 // 1st pattern to match BSTRINS: 3418 // R = or (and X, mask0), (and (shl Y, lsb), mask1) 3419 // where mask1 = (2**size - 1) << lsb, mask0 = ~mask1 3420 // => 3421 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1) 3422 if (N0.getOpcode() == ISD::AND && 3423 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) && 3424 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) && 3425 N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL && 3426 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) && 3427 isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) && 3428 MaskIdx0 == MaskIdx1 && MaskLen0 == MaskLen1 && 3429 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) && 3430 (Shamt = CN1->getZExtValue()) == MaskIdx0 && 3431 (MaskIdx0 + MaskLen0 <= ValBits)) { 3432 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 1\n"); 3433 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0), 3434 N1.getOperand(0).getOperand(0), 3435 DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT), 3436 DAG.getConstant(MaskIdx0, DL, GRLenVT)); 3437 } 3438 3439 // 2nd pattern to match BSTRINS: 3440 // R = or (and X, mask0), (shl (and Y, mask1), lsb) 3441 // where mask1 = (2**size - 1), mask0 = ~(mask1 << lsb) 3442 // => 3443 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1) 3444 if (N0.getOpcode() == ISD::AND && 3445 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) && 3446 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) && 3447 N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND && 3448 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) && 3449 (Shamt = CN1->getZExtValue()) == MaskIdx0 && 3450 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) && 3451 isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) && 3452 MaskLen0 == MaskLen1 && MaskIdx1 == 0 && 3453 (MaskIdx0 + MaskLen0 <= ValBits)) { 3454 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 2\n"); 3455 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0), 3456 N1.getOperand(0).getOperand(0), 3457 DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT), 3458 DAG.getConstant(MaskIdx0, DL, GRLenVT)); 3459 } 3460 3461 // 3rd pattern to match BSTRINS: 3462 // R = or (and X, mask0), (and Y, mask1) 3463 // where ~mask0 = (2**size - 1) << lsb, mask0 & mask1 = 0 3464 // => 3465 // R = BSTRINS X, (shr (and Y, mask1), lsb), msb, lsb 3466 // where msb = lsb + size - 1 3467 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND && 3468 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) && 3469 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) && 3470 (MaskIdx0 + MaskLen0 <= 64) && 3471 (CN1 = dyn_cast<ConstantSDNode>(N1->getOperand(1))) && 3472 (CN1->getSExtValue() & CN0->getSExtValue()) == 0) { 3473 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 3\n"); 3474 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0), 3475 DAG.getNode(ISD::SRL, DL, N1->getValueType(0), N1, 3476 DAG.getConstant(MaskIdx0, DL, GRLenVT)), 3477 DAG.getConstant(ValBits == 32 3478 ? (MaskIdx0 + (MaskLen0 & 31) - 1) 3479 : (MaskIdx0 + MaskLen0 - 1), 3480 DL, GRLenVT), 3481 DAG.getConstant(MaskIdx0, DL, GRLenVT)); 3482 } 3483 3484 // 4th pattern to match BSTRINS: 3485 // R = or (and X, mask), (shl Y, shamt) 3486 // where mask = (2**shamt - 1) 3487 // => 3488 // R = BSTRINS X, Y, ValBits - 1, shamt 3489 // where ValBits = 32 or 64 3490 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::SHL && 3491 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) && 3492 isShiftedMask_64(CN0->getZExtValue(), MaskIdx0, MaskLen0) && 3493 MaskIdx0 == 0 && (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) && 3494 (Shamt = CN1->getZExtValue()) == MaskLen0 && 3495 (MaskIdx0 + MaskLen0 <= ValBits)) { 3496 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 4\n"); 3497 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0), 3498 N1.getOperand(0), 3499 DAG.getConstant((ValBits - 1), DL, GRLenVT), 3500 DAG.getConstant(Shamt, DL, GRLenVT)); 3501 } 3502 3503 // 5th pattern to match BSTRINS: 3504 // R = or (and X, mask), const 3505 // where ~mask = (2**size - 1) << lsb, mask & const = 0 3506 // => 3507 // R = BSTRINS X, (const >> lsb), msb, lsb 3508 // where msb = lsb + size - 1 3509 if (N0.getOpcode() == ISD::AND && 3510 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) && 3511 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) && 3512 (CN1 = dyn_cast<ConstantSDNode>(N1)) && 3513 (CN1->getSExtValue() & CN0->getSExtValue()) == 0) { 3514 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 5\n"); 3515 return DAG.getNode( 3516 LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0), 3517 DAG.getConstant(CN1->getSExtValue() >> MaskIdx0, DL, ValTy), 3518 DAG.getConstant(ValBits == 32 ? (MaskIdx0 + (MaskLen0 & 31) - 1) 3519 : (MaskIdx0 + MaskLen0 - 1), 3520 DL, GRLenVT), 3521 DAG.getConstant(MaskIdx0, DL, GRLenVT)); 3522 } 3523 3524 // 6th pattern. 3525 // a = b | ((c & mask) << shamt), where all positions in b to be overwritten 3526 // by the incoming bits are known to be zero. 3527 // => 3528 // a = BSTRINS b, c, shamt + MaskLen - 1, shamt 3529 // 3530 // Note that the 1st pattern is a special situation of the 6th, i.e. the 6th 3531 // pattern is more common than the 1st. So we put the 1st before the 6th in 3532 // order to match as many nodes as possible. 3533 ConstantSDNode *CNMask, *CNShamt; 3534 unsigned MaskIdx, MaskLen; 3535 if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND && 3536 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) && 3537 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) && 3538 MaskIdx == 0 && (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(1))) && 3539 CNShamt->getZExtValue() + MaskLen <= ValBits) { 3540 Shamt = CNShamt->getZExtValue(); 3541 APInt ShMask(ValBits, CNMask->getZExtValue() << Shamt); 3542 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) { 3543 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 6\n"); 3544 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0, 3545 N1.getOperand(0).getOperand(0), 3546 DAG.getConstant(Shamt + MaskLen - 1, DL, GRLenVT), 3547 DAG.getConstant(Shamt, DL, GRLenVT)); 3548 } 3549 } 3550 3551 // 7th pattern. 3552 // a = b | ((c << shamt) & shifted_mask), where all positions in b to be 3553 // overwritten by the incoming bits are known to be zero. 3554 // => 3555 // a = BSTRINS b, c, MaskIdx + MaskLen - 1, MaskIdx 3556 // 3557 // Similarly, the 7th pattern is more common than the 2nd. So we put the 2nd 3558 // before the 7th in order to match as many nodes as possible. 3559 if (N1.getOpcode() == ISD::AND && 3560 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) && 3561 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) && 3562 N1.getOperand(0).getOpcode() == ISD::SHL && 3563 (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) && 3564 CNShamt->getZExtValue() == MaskIdx) { 3565 APInt ShMask(ValBits, CNMask->getZExtValue()); 3566 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) { 3567 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 7\n"); 3568 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0, 3569 N1.getOperand(0).getOperand(0), 3570 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT), 3571 DAG.getConstant(MaskIdx, DL, GRLenVT)); 3572 } 3573 } 3574 3575 // (or a, b) and (or b, a) are equivalent, so swap the operands and retry. 3576 if (!SwapAndRetried) { 3577 std::swap(N0, N1); 3578 SwapAndRetried = true; 3579 goto Retry; 3580 } 3581 3582 SwapAndRetried = false; 3583 Retry2: 3584 // 8th pattern. 3585 // a = b | (c & shifted_mask), where all positions in b to be overwritten by 3586 // the incoming bits are known to be zero. 3587 // => 3588 // a = BSTRINS b, c >> MaskIdx, MaskIdx + MaskLen - 1, MaskIdx 3589 // 3590 // Similarly, the 8th pattern is more common than the 4th and 5th patterns. So 3591 // we put it here in order to match as many nodes as possible or generate less 3592 // instructions. 3593 if (N1.getOpcode() == ISD::AND && 3594 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) && 3595 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen)) { 3596 APInt ShMask(ValBits, CNMask->getZExtValue()); 3597 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) { 3598 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 8\n"); 3599 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0, 3600 DAG.getNode(ISD::SRL, DL, N1->getValueType(0), 3601 N1->getOperand(0), 3602 DAG.getConstant(MaskIdx, DL, GRLenVT)), 3603 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT), 3604 DAG.getConstant(MaskIdx, DL, GRLenVT)); 3605 } 3606 } 3607 // Swap N0/N1 and retry. 3608 if (!SwapAndRetried) { 3609 std::swap(N0, N1); 3610 SwapAndRetried = true; 3611 goto Retry2; 3612 } 3613 3614 return SDValue(); 3615 } 3616 3617 static bool checkValueWidth(SDValue V, ISD::LoadExtType &ExtType) { 3618 ExtType = ISD::NON_EXTLOAD; 3619 3620 switch (V.getNode()->getOpcode()) { 3621 case ISD::LOAD: { 3622 LoadSDNode *LoadNode = cast<LoadSDNode>(V.getNode()); 3623 if ((LoadNode->getMemoryVT() == MVT::i8) || 3624 (LoadNode->getMemoryVT() == MVT::i16)) { 3625 ExtType = LoadNode->getExtensionType(); 3626 return true; 3627 } 3628 return false; 3629 } 3630 case ISD::AssertSext: { 3631 VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1)); 3632 if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) { 3633 ExtType = ISD::SEXTLOAD; 3634 return true; 3635 } 3636 return false; 3637 } 3638 case ISD::AssertZext: { 3639 VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1)); 3640 if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) { 3641 ExtType = ISD::ZEXTLOAD; 3642 return true; 3643 } 3644 return false; 3645 } 3646 default: 3647 return false; 3648 } 3649 3650 return false; 3651 } 3652 3653 // Eliminate redundant truncation and zero-extension nodes. 3654 // * Case 1: 3655 // +------------+ +------------+ +------------+ 3656 // | Input1 | | Input2 | | CC | 3657 // +------------+ +------------+ +------------+ 3658 // | | | 3659 // V V +----+ 3660 // +------------+ +------------+ | 3661 // | TRUNCATE | | TRUNCATE | | 3662 // +------------+ +------------+ | 3663 // | | | 3664 // V V | 3665 // +------------+ +------------+ | 3666 // | ZERO_EXT | | ZERO_EXT | | 3667 // +------------+ +------------+ | 3668 // | | | 3669 // | +-------------+ | 3670 // V V | | 3671 // +----------------+ | | 3672 // | AND | | | 3673 // +----------------+ | | 3674 // | | | 3675 // +---------------+ | | 3676 // | | | 3677 // V V V 3678 // +-------------+ 3679 // | CMP | 3680 // +-------------+ 3681 // * Case 2: 3682 // +------------+ +------------+ +-------------+ +------------+ +------------+ 3683 // | Input1 | | Input2 | | Constant -1 | | Constant 0 | | CC | 3684 // +------------+ +------------+ +-------------+ +------------+ +------------+ 3685 // | | | | | 3686 // V | | | | 3687 // +------------+ | | | | 3688 // | XOR |<---------------------+ | | 3689 // +------------+ | | | 3690 // | | | | 3691 // V V +---------------+ | 3692 // +------------+ +------------+ | | 3693 // | TRUNCATE | | TRUNCATE | | +-------------------------+ 3694 // +------------+ +------------+ | | 3695 // | | | | 3696 // V V | | 3697 // +------------+ +------------+ | | 3698 // | ZERO_EXT | | ZERO_EXT | | | 3699 // +------------+ +------------+ | | 3700 // | | | | 3701 // V V | | 3702 // +----------------+ | | 3703 // | AND | | | 3704 // +----------------+ | | 3705 // | | | 3706 // +---------------+ | | 3707 // | | | 3708 // V V V 3709 // +-------------+ 3710 // | CMP | 3711 // +-------------+ 3712 static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG, 3713 TargetLowering::DAGCombinerInfo &DCI, 3714 const LoongArchSubtarget &Subtarget) { 3715 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); 3716 3717 SDNode *AndNode = N->getOperand(0).getNode(); 3718 if (AndNode->getOpcode() != ISD::AND) 3719 return SDValue(); 3720 3721 SDValue AndInputValue2 = AndNode->getOperand(1); 3722 if (AndInputValue2.getOpcode() != ISD::ZERO_EXTEND) 3723 return SDValue(); 3724 3725 SDValue CmpInputValue = N->getOperand(1); 3726 SDValue AndInputValue1 = AndNode->getOperand(0); 3727 if (AndInputValue1.getOpcode() == ISD::XOR) { 3728 if (CC != ISD::SETEQ && CC != ISD::SETNE) 3729 return SDValue(); 3730 ConstantSDNode *CN = dyn_cast<ConstantSDNode>(AndInputValue1.getOperand(1)); 3731 if (!CN || CN->getSExtValue() != -1) 3732 return SDValue(); 3733 CN = dyn_cast<ConstantSDNode>(CmpInputValue); 3734 if (!CN || CN->getSExtValue() != 0) 3735 return SDValue(); 3736 AndInputValue1 = AndInputValue1.getOperand(0); 3737 if (AndInputValue1.getOpcode() != ISD::ZERO_EXTEND) 3738 return SDValue(); 3739 } else if (AndInputValue1.getOpcode() == ISD::ZERO_EXTEND) { 3740 if (AndInputValue2 != CmpInputValue) 3741 return SDValue(); 3742 } else { 3743 return SDValue(); 3744 } 3745 3746 SDValue TruncValue1 = AndInputValue1.getNode()->getOperand(0); 3747 if (TruncValue1.getOpcode() != ISD::TRUNCATE) 3748 return SDValue(); 3749 3750 SDValue TruncValue2 = AndInputValue2.getNode()->getOperand(0); 3751 if (TruncValue2.getOpcode() != ISD::TRUNCATE) 3752 return SDValue(); 3753 3754 SDValue TruncInputValue1 = TruncValue1.getNode()->getOperand(0); 3755 SDValue TruncInputValue2 = TruncValue2.getNode()->getOperand(0); 3756 ISD::LoadExtType ExtType1; 3757 ISD::LoadExtType ExtType2; 3758 3759 if (!checkValueWidth(TruncInputValue1, ExtType1) || 3760 !checkValueWidth(TruncInputValue2, ExtType2)) 3761 return SDValue(); 3762 3763 if (TruncInputValue1->getValueType(0) != TruncInputValue2->getValueType(0) || 3764 AndNode->getValueType(0) != TruncInputValue1->getValueType(0)) 3765 return SDValue(); 3766 3767 if ((ExtType2 != ISD::ZEXTLOAD) && 3768 ((ExtType2 != ISD::SEXTLOAD) && (ExtType1 != ISD::SEXTLOAD))) 3769 return SDValue(); 3770 3771 // These truncation and zero-extension nodes are not necessary, remove them. 3772 SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N), AndNode->getValueType(0), 3773 TruncInputValue1, TruncInputValue2); 3774 SDValue NewSetCC = 3775 DAG.getSetCC(SDLoc(N), N->getValueType(0), NewAnd, TruncInputValue2, CC); 3776 DAG.ReplaceAllUsesWith(N, NewSetCC.getNode()); 3777 return SDValue(N, 0); 3778 } 3779 3780 // Combine (loongarch_bitrev_w (loongarch_revb_2w X)) to loongarch_bitrev_4b. 3781 static SDValue performBITREV_WCombine(SDNode *N, SelectionDAG &DAG, 3782 TargetLowering::DAGCombinerInfo &DCI, 3783 const LoongArchSubtarget &Subtarget) { 3784 if (DCI.isBeforeLegalizeOps()) 3785 return SDValue(); 3786 3787 SDValue Src = N->getOperand(0); 3788 if (Src.getOpcode() != LoongArchISD::REVB_2W) 3789 return SDValue(); 3790 3791 return DAG.getNode(LoongArchISD::BITREV_4B, SDLoc(N), N->getValueType(0), 3792 Src.getOperand(0)); 3793 } 3794 3795 template <unsigned N> 3796 static SDValue legalizeIntrinsicImmArg(SDNode *Node, unsigned ImmOp, 3797 SelectionDAG &DAG, 3798 const LoongArchSubtarget &Subtarget, 3799 bool IsSigned = false) { 3800 SDLoc DL(Node); 3801 auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp)); 3802 // Check the ImmArg. 3803 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) || 3804 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) { 3805 DAG.getContext()->emitError(Node->getOperationName(0) + 3806 ": argument out of range."); 3807 return DAG.getNode(ISD::UNDEF, DL, Subtarget.getGRLenVT()); 3808 } 3809 return DAG.getConstant(CImm->getZExtValue(), DL, Subtarget.getGRLenVT()); 3810 } 3811 3812 template <unsigned N> 3813 static SDValue lowerVectorSplatImm(SDNode *Node, unsigned ImmOp, 3814 SelectionDAG &DAG, bool IsSigned = false) { 3815 SDLoc DL(Node); 3816 EVT ResTy = Node->getValueType(0); 3817 auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp)); 3818 3819 // Check the ImmArg. 3820 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) || 3821 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) { 3822 DAG.getContext()->emitError(Node->getOperationName(0) + 3823 ": argument out of range."); 3824 return DAG.getNode(ISD::UNDEF, DL, ResTy); 3825 } 3826 return DAG.getConstant( 3827 APInt(ResTy.getScalarType().getSizeInBits(), 3828 IsSigned ? CImm->getSExtValue() : CImm->getZExtValue(), IsSigned), 3829 DL, ResTy); 3830 } 3831 3832 static SDValue truncateVecElts(SDNode *Node, SelectionDAG &DAG) { 3833 SDLoc DL(Node); 3834 EVT ResTy = Node->getValueType(0); 3835 SDValue Vec = Node->getOperand(2); 3836 SDValue Mask = DAG.getConstant(Vec.getScalarValueSizeInBits() - 1, DL, ResTy); 3837 return DAG.getNode(ISD::AND, DL, ResTy, Vec, Mask); 3838 } 3839 3840 static SDValue lowerVectorBitClear(SDNode *Node, SelectionDAG &DAG) { 3841 SDLoc DL(Node); 3842 EVT ResTy = Node->getValueType(0); 3843 SDValue One = DAG.getConstant(1, DL, ResTy); 3844 SDValue Bit = 3845 DAG.getNode(ISD::SHL, DL, ResTy, One, truncateVecElts(Node, DAG)); 3846 3847 return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1), 3848 DAG.getNOT(DL, Bit, ResTy)); 3849 } 3850 3851 template <unsigned N> 3852 static SDValue lowerVectorBitClearImm(SDNode *Node, SelectionDAG &DAG) { 3853 SDLoc DL(Node); 3854 EVT ResTy = Node->getValueType(0); 3855 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2)); 3856 // Check the unsigned ImmArg. 3857 if (!isUInt<N>(CImm->getZExtValue())) { 3858 DAG.getContext()->emitError(Node->getOperationName(0) + 3859 ": argument out of range."); 3860 return DAG.getNode(ISD::UNDEF, DL, ResTy); 3861 } 3862 3863 APInt BitImm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue(); 3864 SDValue Mask = DAG.getConstant(~BitImm, DL, ResTy); 3865 3866 return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1), Mask); 3867 } 3868 3869 template <unsigned N> 3870 static SDValue lowerVectorBitSetImm(SDNode *Node, SelectionDAG &DAG) { 3871 SDLoc DL(Node); 3872 EVT ResTy = Node->getValueType(0); 3873 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2)); 3874 // Check the unsigned ImmArg. 3875 if (!isUInt<N>(CImm->getZExtValue())) { 3876 DAG.getContext()->emitError(Node->getOperationName(0) + 3877 ": argument out of range."); 3878 return DAG.getNode(ISD::UNDEF, DL, ResTy); 3879 } 3880 3881 APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue(); 3882 SDValue BitImm = DAG.getConstant(Imm, DL, ResTy); 3883 return DAG.getNode(ISD::OR, DL, ResTy, Node->getOperand(1), BitImm); 3884 } 3885 3886 template <unsigned N> 3887 static SDValue lowerVectorBitRevImm(SDNode *Node, SelectionDAG &DAG) { 3888 SDLoc DL(Node); 3889 EVT ResTy = Node->getValueType(0); 3890 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2)); 3891 // Check the unsigned ImmArg. 3892 if (!isUInt<N>(CImm->getZExtValue())) { 3893 DAG.getContext()->emitError(Node->getOperationName(0) + 3894 ": argument out of range."); 3895 return DAG.getNode(ISD::UNDEF, DL, ResTy); 3896 } 3897 3898 APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue(); 3899 SDValue BitImm = DAG.getConstant(Imm, DL, ResTy); 3900 return DAG.getNode(ISD::XOR, DL, ResTy, Node->getOperand(1), BitImm); 3901 } 3902 3903 static SDValue 3904 performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG, 3905 TargetLowering::DAGCombinerInfo &DCI, 3906 const LoongArchSubtarget &Subtarget) { 3907 SDLoc DL(N); 3908 switch (N->getConstantOperandVal(0)) { 3909 default: 3910 break; 3911 case Intrinsic::loongarch_lsx_vadd_b: 3912 case Intrinsic::loongarch_lsx_vadd_h: 3913 case Intrinsic::loongarch_lsx_vadd_w: 3914 case Intrinsic::loongarch_lsx_vadd_d: 3915 case Intrinsic::loongarch_lasx_xvadd_b: 3916 case Intrinsic::loongarch_lasx_xvadd_h: 3917 case Intrinsic::loongarch_lasx_xvadd_w: 3918 case Intrinsic::loongarch_lasx_xvadd_d: 3919 return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1), 3920 N->getOperand(2)); 3921 case Intrinsic::loongarch_lsx_vaddi_bu: 3922 case Intrinsic::loongarch_lsx_vaddi_hu: 3923 case Intrinsic::loongarch_lsx_vaddi_wu: 3924 case Intrinsic::loongarch_lsx_vaddi_du: 3925 case Intrinsic::loongarch_lasx_xvaddi_bu: 3926 case Intrinsic::loongarch_lasx_xvaddi_hu: 3927 case Intrinsic::loongarch_lasx_xvaddi_wu: 3928 case Intrinsic::loongarch_lasx_xvaddi_du: 3929 return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1), 3930 lowerVectorSplatImm<5>(N, 2, DAG)); 3931 case Intrinsic::loongarch_lsx_vsub_b: 3932 case Intrinsic::loongarch_lsx_vsub_h: 3933 case Intrinsic::loongarch_lsx_vsub_w: 3934 case Intrinsic::loongarch_lsx_vsub_d: 3935 case Intrinsic::loongarch_lasx_xvsub_b: 3936 case Intrinsic::loongarch_lasx_xvsub_h: 3937 case Intrinsic::loongarch_lasx_xvsub_w: 3938 case Intrinsic::loongarch_lasx_xvsub_d: 3939 return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1), 3940 N->getOperand(2)); 3941 case Intrinsic::loongarch_lsx_vsubi_bu: 3942 case Intrinsic::loongarch_lsx_vsubi_hu: 3943 case Intrinsic::loongarch_lsx_vsubi_wu: 3944 case Intrinsic::loongarch_lsx_vsubi_du: 3945 case Intrinsic::loongarch_lasx_xvsubi_bu: 3946 case Intrinsic::loongarch_lasx_xvsubi_hu: 3947 case Intrinsic::loongarch_lasx_xvsubi_wu: 3948 case Intrinsic::loongarch_lasx_xvsubi_du: 3949 return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1), 3950 lowerVectorSplatImm<5>(N, 2, DAG)); 3951 case Intrinsic::loongarch_lsx_vneg_b: 3952 case Intrinsic::loongarch_lsx_vneg_h: 3953 case Intrinsic::loongarch_lsx_vneg_w: 3954 case Intrinsic::loongarch_lsx_vneg_d: 3955 case Intrinsic::loongarch_lasx_xvneg_b: 3956 case Intrinsic::loongarch_lasx_xvneg_h: 3957 case Intrinsic::loongarch_lasx_xvneg_w: 3958 case Intrinsic::loongarch_lasx_xvneg_d: 3959 return DAG.getNode( 3960 ISD::SUB, DL, N->getValueType(0), 3961 DAG.getConstant( 3962 APInt(N->getValueType(0).getScalarType().getSizeInBits(), 0, 3963 /*isSigned=*/true), 3964 SDLoc(N), N->getValueType(0)), 3965 N->getOperand(1)); 3966 case Intrinsic::loongarch_lsx_vmax_b: 3967 case Intrinsic::loongarch_lsx_vmax_h: 3968 case Intrinsic::loongarch_lsx_vmax_w: 3969 case Intrinsic::loongarch_lsx_vmax_d: 3970 case Intrinsic::loongarch_lasx_xvmax_b: 3971 case Intrinsic::loongarch_lasx_xvmax_h: 3972 case Intrinsic::loongarch_lasx_xvmax_w: 3973 case Intrinsic::loongarch_lasx_xvmax_d: 3974 return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1), 3975 N->getOperand(2)); 3976 case Intrinsic::loongarch_lsx_vmax_bu: 3977 case Intrinsic::loongarch_lsx_vmax_hu: 3978 case Intrinsic::loongarch_lsx_vmax_wu: 3979 case Intrinsic::loongarch_lsx_vmax_du: 3980 case Intrinsic::loongarch_lasx_xvmax_bu: 3981 case Intrinsic::loongarch_lasx_xvmax_hu: 3982 case Intrinsic::loongarch_lasx_xvmax_wu: 3983 case Intrinsic::loongarch_lasx_xvmax_du: 3984 return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1), 3985 N->getOperand(2)); 3986 case Intrinsic::loongarch_lsx_vmaxi_b: 3987 case Intrinsic::loongarch_lsx_vmaxi_h: 3988 case Intrinsic::loongarch_lsx_vmaxi_w: 3989 case Intrinsic::loongarch_lsx_vmaxi_d: 3990 case Intrinsic::loongarch_lasx_xvmaxi_b: 3991 case Intrinsic::loongarch_lasx_xvmaxi_h: 3992 case Intrinsic::loongarch_lasx_xvmaxi_w: 3993 case Intrinsic::loongarch_lasx_xvmaxi_d: 3994 return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1), 3995 lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true)); 3996 case Intrinsic::loongarch_lsx_vmaxi_bu: 3997 case Intrinsic::loongarch_lsx_vmaxi_hu: 3998 case Intrinsic::loongarch_lsx_vmaxi_wu: 3999 case Intrinsic::loongarch_lsx_vmaxi_du: 4000 case Intrinsic::loongarch_lasx_xvmaxi_bu: 4001 case Intrinsic::loongarch_lasx_xvmaxi_hu: 4002 case Intrinsic::loongarch_lasx_xvmaxi_wu: 4003 case Intrinsic::loongarch_lasx_xvmaxi_du: 4004 return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1), 4005 lowerVectorSplatImm<5>(N, 2, DAG)); 4006 case Intrinsic::loongarch_lsx_vmin_b: 4007 case Intrinsic::loongarch_lsx_vmin_h: 4008 case Intrinsic::loongarch_lsx_vmin_w: 4009 case Intrinsic::loongarch_lsx_vmin_d: 4010 case Intrinsic::loongarch_lasx_xvmin_b: 4011 case Intrinsic::loongarch_lasx_xvmin_h: 4012 case Intrinsic::loongarch_lasx_xvmin_w: 4013 case Intrinsic::loongarch_lasx_xvmin_d: 4014 return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1), 4015 N->getOperand(2)); 4016 case Intrinsic::loongarch_lsx_vmin_bu: 4017 case Intrinsic::loongarch_lsx_vmin_hu: 4018 case Intrinsic::loongarch_lsx_vmin_wu: 4019 case Intrinsic::loongarch_lsx_vmin_du: 4020 case Intrinsic::loongarch_lasx_xvmin_bu: 4021 case Intrinsic::loongarch_lasx_xvmin_hu: 4022 case Intrinsic::loongarch_lasx_xvmin_wu: 4023 case Intrinsic::loongarch_lasx_xvmin_du: 4024 return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1), 4025 N->getOperand(2)); 4026 case Intrinsic::loongarch_lsx_vmini_b: 4027 case Intrinsic::loongarch_lsx_vmini_h: 4028 case Intrinsic::loongarch_lsx_vmini_w: 4029 case Intrinsic::loongarch_lsx_vmini_d: 4030 case Intrinsic::loongarch_lasx_xvmini_b: 4031 case Intrinsic::loongarch_lasx_xvmini_h: 4032 case Intrinsic::loongarch_lasx_xvmini_w: 4033 case Intrinsic::loongarch_lasx_xvmini_d: 4034 return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1), 4035 lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true)); 4036 case Intrinsic::loongarch_lsx_vmini_bu: 4037 case Intrinsic::loongarch_lsx_vmini_hu: 4038 case Intrinsic::loongarch_lsx_vmini_wu: 4039 case Intrinsic::loongarch_lsx_vmini_du: 4040 case Intrinsic::loongarch_lasx_xvmini_bu: 4041 case Intrinsic::loongarch_lasx_xvmini_hu: 4042 case Intrinsic::loongarch_lasx_xvmini_wu: 4043 case Intrinsic::loongarch_lasx_xvmini_du: 4044 return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1), 4045 lowerVectorSplatImm<5>(N, 2, DAG)); 4046 case Intrinsic::loongarch_lsx_vmul_b: 4047 case Intrinsic::loongarch_lsx_vmul_h: 4048 case Intrinsic::loongarch_lsx_vmul_w: 4049 case Intrinsic::loongarch_lsx_vmul_d: 4050 case Intrinsic::loongarch_lasx_xvmul_b: 4051 case Intrinsic::loongarch_lasx_xvmul_h: 4052 case Intrinsic::loongarch_lasx_xvmul_w: 4053 case Intrinsic::loongarch_lasx_xvmul_d: 4054 return DAG.getNode(ISD::MUL, DL, N->getValueType(0), N->getOperand(1), 4055 N->getOperand(2)); 4056 case Intrinsic::loongarch_lsx_vmadd_b: 4057 case Intrinsic::loongarch_lsx_vmadd_h: 4058 case Intrinsic::loongarch_lsx_vmadd_w: 4059 case Intrinsic::loongarch_lsx_vmadd_d: 4060 case Intrinsic::loongarch_lasx_xvmadd_b: 4061 case Intrinsic::loongarch_lasx_xvmadd_h: 4062 case Intrinsic::loongarch_lasx_xvmadd_w: 4063 case Intrinsic::loongarch_lasx_xvmadd_d: { 4064 EVT ResTy = N->getValueType(0); 4065 return DAG.getNode(ISD::ADD, SDLoc(N), ResTy, N->getOperand(1), 4066 DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2), 4067 N->getOperand(3))); 4068 } 4069 case Intrinsic::loongarch_lsx_vmsub_b: 4070 case Intrinsic::loongarch_lsx_vmsub_h: 4071 case Intrinsic::loongarch_lsx_vmsub_w: 4072 case Intrinsic::loongarch_lsx_vmsub_d: 4073 case Intrinsic::loongarch_lasx_xvmsub_b: 4074 case Intrinsic::loongarch_lasx_xvmsub_h: 4075 case Intrinsic::loongarch_lasx_xvmsub_w: 4076 case Intrinsic::loongarch_lasx_xvmsub_d: { 4077 EVT ResTy = N->getValueType(0); 4078 return DAG.getNode(ISD::SUB, SDLoc(N), ResTy, N->getOperand(1), 4079 DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2), 4080 N->getOperand(3))); 4081 } 4082 case Intrinsic::loongarch_lsx_vdiv_b: 4083 case Intrinsic::loongarch_lsx_vdiv_h: 4084 case Intrinsic::loongarch_lsx_vdiv_w: 4085 case Intrinsic::loongarch_lsx_vdiv_d: 4086 case Intrinsic::loongarch_lasx_xvdiv_b: 4087 case Intrinsic::loongarch_lasx_xvdiv_h: 4088 case Intrinsic::loongarch_lasx_xvdiv_w: 4089 case Intrinsic::loongarch_lasx_xvdiv_d: 4090 return DAG.getNode(ISD::SDIV, DL, N->getValueType(0), N->getOperand(1), 4091 N->getOperand(2)); 4092 case Intrinsic::loongarch_lsx_vdiv_bu: 4093 case Intrinsic::loongarch_lsx_vdiv_hu: 4094 case Intrinsic::loongarch_lsx_vdiv_wu: 4095 case Intrinsic::loongarch_lsx_vdiv_du: 4096 case Intrinsic::loongarch_lasx_xvdiv_bu: 4097 case Intrinsic::loongarch_lasx_xvdiv_hu: 4098 case Intrinsic::loongarch_lasx_xvdiv_wu: 4099 case Intrinsic::loongarch_lasx_xvdiv_du: 4100 return DAG.getNode(ISD::UDIV, DL, N->getValueType(0), N->getOperand(1), 4101 N->getOperand(2)); 4102 case Intrinsic::loongarch_lsx_vmod_b: 4103 case Intrinsic::loongarch_lsx_vmod_h: 4104 case Intrinsic::loongarch_lsx_vmod_w: 4105 case Intrinsic::loongarch_lsx_vmod_d: 4106 case Intrinsic::loongarch_lasx_xvmod_b: 4107 case Intrinsic::loongarch_lasx_xvmod_h: 4108 case Intrinsic::loongarch_lasx_xvmod_w: 4109 case Intrinsic::loongarch_lasx_xvmod_d: 4110 return DAG.getNode(ISD::SREM, DL, N->getValueType(0), N->getOperand(1), 4111 N->getOperand(2)); 4112 case Intrinsic::loongarch_lsx_vmod_bu: 4113 case Intrinsic::loongarch_lsx_vmod_hu: 4114 case Intrinsic::loongarch_lsx_vmod_wu: 4115 case Intrinsic::loongarch_lsx_vmod_du: 4116 case Intrinsic::loongarch_lasx_xvmod_bu: 4117 case Intrinsic::loongarch_lasx_xvmod_hu: 4118 case Intrinsic::loongarch_lasx_xvmod_wu: 4119 case Intrinsic::loongarch_lasx_xvmod_du: 4120 return DAG.getNode(ISD::UREM, DL, N->getValueType(0), N->getOperand(1), 4121 N->getOperand(2)); 4122 case Intrinsic::loongarch_lsx_vand_v: 4123 case Intrinsic::loongarch_lasx_xvand_v: 4124 return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1), 4125 N->getOperand(2)); 4126 case Intrinsic::loongarch_lsx_vor_v: 4127 case Intrinsic::loongarch_lasx_xvor_v: 4128 return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1), 4129 N->getOperand(2)); 4130 case Intrinsic::loongarch_lsx_vxor_v: 4131 case Intrinsic::loongarch_lasx_xvxor_v: 4132 return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1), 4133 N->getOperand(2)); 4134 case Intrinsic::loongarch_lsx_vnor_v: 4135 case Intrinsic::loongarch_lasx_xvnor_v: { 4136 SDValue Res = DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1), 4137 N->getOperand(2)); 4138 return DAG.getNOT(DL, Res, Res->getValueType(0)); 4139 } 4140 case Intrinsic::loongarch_lsx_vandi_b: 4141 case Intrinsic::loongarch_lasx_xvandi_b: 4142 return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1), 4143 lowerVectorSplatImm<8>(N, 2, DAG)); 4144 case Intrinsic::loongarch_lsx_vori_b: 4145 case Intrinsic::loongarch_lasx_xvori_b: 4146 return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1), 4147 lowerVectorSplatImm<8>(N, 2, DAG)); 4148 case Intrinsic::loongarch_lsx_vxori_b: 4149 case Intrinsic::loongarch_lasx_xvxori_b: 4150 return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1), 4151 lowerVectorSplatImm<8>(N, 2, DAG)); 4152 case Intrinsic::loongarch_lsx_vsll_b: 4153 case Intrinsic::loongarch_lsx_vsll_h: 4154 case Intrinsic::loongarch_lsx_vsll_w: 4155 case Intrinsic::loongarch_lsx_vsll_d: 4156 case Intrinsic::loongarch_lasx_xvsll_b: 4157 case Intrinsic::loongarch_lasx_xvsll_h: 4158 case Intrinsic::loongarch_lasx_xvsll_w: 4159 case Intrinsic::loongarch_lasx_xvsll_d: 4160 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1), 4161 truncateVecElts(N, DAG)); 4162 case Intrinsic::loongarch_lsx_vslli_b: 4163 case Intrinsic::loongarch_lasx_xvslli_b: 4164 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1), 4165 lowerVectorSplatImm<3>(N, 2, DAG)); 4166 case Intrinsic::loongarch_lsx_vslli_h: 4167 case Intrinsic::loongarch_lasx_xvslli_h: 4168 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1), 4169 lowerVectorSplatImm<4>(N, 2, DAG)); 4170 case Intrinsic::loongarch_lsx_vslli_w: 4171 case Intrinsic::loongarch_lasx_xvslli_w: 4172 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1), 4173 lowerVectorSplatImm<5>(N, 2, DAG)); 4174 case Intrinsic::loongarch_lsx_vslli_d: 4175 case Intrinsic::loongarch_lasx_xvslli_d: 4176 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1), 4177 lowerVectorSplatImm<6>(N, 2, DAG)); 4178 case Intrinsic::loongarch_lsx_vsrl_b: 4179 case Intrinsic::loongarch_lsx_vsrl_h: 4180 case Intrinsic::loongarch_lsx_vsrl_w: 4181 case Intrinsic::loongarch_lsx_vsrl_d: 4182 case Intrinsic::loongarch_lasx_xvsrl_b: 4183 case Intrinsic::loongarch_lasx_xvsrl_h: 4184 case Intrinsic::loongarch_lasx_xvsrl_w: 4185 case Intrinsic::loongarch_lasx_xvsrl_d: 4186 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1), 4187 truncateVecElts(N, DAG)); 4188 case Intrinsic::loongarch_lsx_vsrli_b: 4189 case Intrinsic::loongarch_lasx_xvsrli_b: 4190 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1), 4191 lowerVectorSplatImm<3>(N, 2, DAG)); 4192 case Intrinsic::loongarch_lsx_vsrli_h: 4193 case Intrinsic::loongarch_lasx_xvsrli_h: 4194 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1), 4195 lowerVectorSplatImm<4>(N, 2, DAG)); 4196 case Intrinsic::loongarch_lsx_vsrli_w: 4197 case Intrinsic::loongarch_lasx_xvsrli_w: 4198 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1), 4199 lowerVectorSplatImm<5>(N, 2, DAG)); 4200 case Intrinsic::loongarch_lsx_vsrli_d: 4201 case Intrinsic::loongarch_lasx_xvsrli_d: 4202 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1), 4203 lowerVectorSplatImm<6>(N, 2, DAG)); 4204 case Intrinsic::loongarch_lsx_vsra_b: 4205 case Intrinsic::loongarch_lsx_vsra_h: 4206 case Intrinsic::loongarch_lsx_vsra_w: 4207 case Intrinsic::loongarch_lsx_vsra_d: 4208 case Intrinsic::loongarch_lasx_xvsra_b: 4209 case Intrinsic::loongarch_lasx_xvsra_h: 4210 case Intrinsic::loongarch_lasx_xvsra_w: 4211 case Intrinsic::loongarch_lasx_xvsra_d: 4212 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1), 4213 truncateVecElts(N, DAG)); 4214 case Intrinsic::loongarch_lsx_vsrai_b: 4215 case Intrinsic::loongarch_lasx_xvsrai_b: 4216 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1), 4217 lowerVectorSplatImm<3>(N, 2, DAG)); 4218 case Intrinsic::loongarch_lsx_vsrai_h: 4219 case Intrinsic::loongarch_lasx_xvsrai_h: 4220 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1), 4221 lowerVectorSplatImm<4>(N, 2, DAG)); 4222 case Intrinsic::loongarch_lsx_vsrai_w: 4223 case Intrinsic::loongarch_lasx_xvsrai_w: 4224 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1), 4225 lowerVectorSplatImm<5>(N, 2, DAG)); 4226 case Intrinsic::loongarch_lsx_vsrai_d: 4227 case Intrinsic::loongarch_lasx_xvsrai_d: 4228 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1), 4229 lowerVectorSplatImm<6>(N, 2, DAG)); 4230 case Intrinsic::loongarch_lsx_vclz_b: 4231 case Intrinsic::loongarch_lsx_vclz_h: 4232 case Intrinsic::loongarch_lsx_vclz_w: 4233 case Intrinsic::loongarch_lsx_vclz_d: 4234 case Intrinsic::loongarch_lasx_xvclz_b: 4235 case Intrinsic::loongarch_lasx_xvclz_h: 4236 case Intrinsic::loongarch_lasx_xvclz_w: 4237 case Intrinsic::loongarch_lasx_xvclz_d: 4238 return DAG.getNode(ISD::CTLZ, DL, N->getValueType(0), N->getOperand(1)); 4239 case Intrinsic::loongarch_lsx_vpcnt_b: 4240 case Intrinsic::loongarch_lsx_vpcnt_h: 4241 case Intrinsic::loongarch_lsx_vpcnt_w: 4242 case Intrinsic::loongarch_lsx_vpcnt_d: 4243 case Intrinsic::loongarch_lasx_xvpcnt_b: 4244 case Intrinsic::loongarch_lasx_xvpcnt_h: 4245 case Intrinsic::loongarch_lasx_xvpcnt_w: 4246 case Intrinsic::loongarch_lasx_xvpcnt_d: 4247 return DAG.getNode(ISD::CTPOP, DL, N->getValueType(0), N->getOperand(1)); 4248 case Intrinsic::loongarch_lsx_vbitclr_b: 4249 case Intrinsic::loongarch_lsx_vbitclr_h: 4250 case Intrinsic::loongarch_lsx_vbitclr_w: 4251 case Intrinsic::loongarch_lsx_vbitclr_d: 4252 case Intrinsic::loongarch_lasx_xvbitclr_b: 4253 case Intrinsic::loongarch_lasx_xvbitclr_h: 4254 case Intrinsic::loongarch_lasx_xvbitclr_w: 4255 case Intrinsic::loongarch_lasx_xvbitclr_d: 4256 return lowerVectorBitClear(N, DAG); 4257 case Intrinsic::loongarch_lsx_vbitclri_b: 4258 case Intrinsic::loongarch_lasx_xvbitclri_b: 4259 return lowerVectorBitClearImm<3>(N, DAG); 4260 case Intrinsic::loongarch_lsx_vbitclri_h: 4261 case Intrinsic::loongarch_lasx_xvbitclri_h: 4262 return lowerVectorBitClearImm<4>(N, DAG); 4263 case Intrinsic::loongarch_lsx_vbitclri_w: 4264 case Intrinsic::loongarch_lasx_xvbitclri_w: 4265 return lowerVectorBitClearImm<5>(N, DAG); 4266 case Intrinsic::loongarch_lsx_vbitclri_d: 4267 case Intrinsic::loongarch_lasx_xvbitclri_d: 4268 return lowerVectorBitClearImm<6>(N, DAG); 4269 case Intrinsic::loongarch_lsx_vbitset_b: 4270 case Intrinsic::loongarch_lsx_vbitset_h: 4271 case Intrinsic::loongarch_lsx_vbitset_w: 4272 case Intrinsic::loongarch_lsx_vbitset_d: 4273 case Intrinsic::loongarch_lasx_xvbitset_b: 4274 case Intrinsic::loongarch_lasx_xvbitset_h: 4275 case Intrinsic::loongarch_lasx_xvbitset_w: 4276 case Intrinsic::loongarch_lasx_xvbitset_d: { 4277 EVT VecTy = N->getValueType(0); 4278 SDValue One = DAG.getConstant(1, DL, VecTy); 4279 return DAG.getNode( 4280 ISD::OR, DL, VecTy, N->getOperand(1), 4281 DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG))); 4282 } 4283 case Intrinsic::loongarch_lsx_vbitseti_b: 4284 case Intrinsic::loongarch_lasx_xvbitseti_b: 4285 return lowerVectorBitSetImm<3>(N, DAG); 4286 case Intrinsic::loongarch_lsx_vbitseti_h: 4287 case Intrinsic::loongarch_lasx_xvbitseti_h: 4288 return lowerVectorBitSetImm<4>(N, DAG); 4289 case Intrinsic::loongarch_lsx_vbitseti_w: 4290 case Intrinsic::loongarch_lasx_xvbitseti_w: 4291 return lowerVectorBitSetImm<5>(N, DAG); 4292 case Intrinsic::loongarch_lsx_vbitseti_d: 4293 case Intrinsic::loongarch_lasx_xvbitseti_d: 4294 return lowerVectorBitSetImm<6>(N, DAG); 4295 case Intrinsic::loongarch_lsx_vbitrev_b: 4296 case Intrinsic::loongarch_lsx_vbitrev_h: 4297 case Intrinsic::loongarch_lsx_vbitrev_w: 4298 case Intrinsic::loongarch_lsx_vbitrev_d: 4299 case Intrinsic::loongarch_lasx_xvbitrev_b: 4300 case Intrinsic::loongarch_lasx_xvbitrev_h: 4301 case Intrinsic::loongarch_lasx_xvbitrev_w: 4302 case Intrinsic::loongarch_lasx_xvbitrev_d: { 4303 EVT VecTy = N->getValueType(0); 4304 SDValue One = DAG.getConstant(1, DL, VecTy); 4305 return DAG.getNode( 4306 ISD::XOR, DL, VecTy, N->getOperand(1), 4307 DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG))); 4308 } 4309 case Intrinsic::loongarch_lsx_vbitrevi_b: 4310 case Intrinsic::loongarch_lasx_xvbitrevi_b: 4311 return lowerVectorBitRevImm<3>(N, DAG); 4312 case Intrinsic::loongarch_lsx_vbitrevi_h: 4313 case Intrinsic::loongarch_lasx_xvbitrevi_h: 4314 return lowerVectorBitRevImm<4>(N, DAG); 4315 case Intrinsic::loongarch_lsx_vbitrevi_w: 4316 case Intrinsic::loongarch_lasx_xvbitrevi_w: 4317 return lowerVectorBitRevImm<5>(N, DAG); 4318 case Intrinsic::loongarch_lsx_vbitrevi_d: 4319 case Intrinsic::loongarch_lasx_xvbitrevi_d: 4320 return lowerVectorBitRevImm<6>(N, DAG); 4321 case Intrinsic::loongarch_lsx_vfadd_s: 4322 case Intrinsic::loongarch_lsx_vfadd_d: 4323 case Intrinsic::loongarch_lasx_xvfadd_s: 4324 case Intrinsic::loongarch_lasx_xvfadd_d: 4325 return DAG.getNode(ISD::FADD, DL, N->getValueType(0), N->getOperand(1), 4326 N->getOperand(2)); 4327 case Intrinsic::loongarch_lsx_vfsub_s: 4328 case Intrinsic::loongarch_lsx_vfsub_d: 4329 case Intrinsic::loongarch_lasx_xvfsub_s: 4330 case Intrinsic::loongarch_lasx_xvfsub_d: 4331 return DAG.getNode(ISD::FSUB, DL, N->getValueType(0), N->getOperand(1), 4332 N->getOperand(2)); 4333 case Intrinsic::loongarch_lsx_vfmul_s: 4334 case Intrinsic::loongarch_lsx_vfmul_d: 4335 case Intrinsic::loongarch_lasx_xvfmul_s: 4336 case Intrinsic::loongarch_lasx_xvfmul_d: 4337 return DAG.getNode(ISD::FMUL, DL, N->getValueType(0), N->getOperand(1), 4338 N->getOperand(2)); 4339 case Intrinsic::loongarch_lsx_vfdiv_s: 4340 case Intrinsic::loongarch_lsx_vfdiv_d: 4341 case Intrinsic::loongarch_lasx_xvfdiv_s: 4342 case Intrinsic::loongarch_lasx_xvfdiv_d: 4343 return DAG.getNode(ISD::FDIV, DL, N->getValueType(0), N->getOperand(1), 4344 N->getOperand(2)); 4345 case Intrinsic::loongarch_lsx_vfmadd_s: 4346 case Intrinsic::loongarch_lsx_vfmadd_d: 4347 case Intrinsic::loongarch_lasx_xvfmadd_s: 4348 case Intrinsic::loongarch_lasx_xvfmadd_d: 4349 return DAG.getNode(ISD::FMA, DL, N->getValueType(0), N->getOperand(1), 4350 N->getOperand(2), N->getOperand(3)); 4351 case Intrinsic::loongarch_lsx_vinsgr2vr_b: 4352 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0), 4353 N->getOperand(1), N->getOperand(2), 4354 legalizeIntrinsicImmArg<4>(N, 3, DAG, Subtarget)); 4355 case Intrinsic::loongarch_lsx_vinsgr2vr_h: 4356 case Intrinsic::loongarch_lasx_xvinsgr2vr_w: 4357 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0), 4358 N->getOperand(1), N->getOperand(2), 4359 legalizeIntrinsicImmArg<3>(N, 3, DAG, Subtarget)); 4360 case Intrinsic::loongarch_lsx_vinsgr2vr_w: 4361 case Intrinsic::loongarch_lasx_xvinsgr2vr_d: 4362 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0), 4363 N->getOperand(1), N->getOperand(2), 4364 legalizeIntrinsicImmArg<2>(N, 3, DAG, Subtarget)); 4365 case Intrinsic::loongarch_lsx_vinsgr2vr_d: 4366 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0), 4367 N->getOperand(1), N->getOperand(2), 4368 legalizeIntrinsicImmArg<1>(N, 3, DAG, Subtarget)); 4369 case Intrinsic::loongarch_lsx_vreplgr2vr_b: 4370 case Intrinsic::loongarch_lsx_vreplgr2vr_h: 4371 case Intrinsic::loongarch_lsx_vreplgr2vr_w: 4372 case Intrinsic::loongarch_lsx_vreplgr2vr_d: 4373 case Intrinsic::loongarch_lasx_xvreplgr2vr_b: 4374 case Intrinsic::loongarch_lasx_xvreplgr2vr_h: 4375 case Intrinsic::loongarch_lasx_xvreplgr2vr_w: 4376 case Intrinsic::loongarch_lasx_xvreplgr2vr_d: 4377 return DAG.getNode(LoongArchISD::VREPLGR2VR, DL, N->getValueType(0), 4378 DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getGRLenVT(), 4379 N->getOperand(1))); 4380 case Intrinsic::loongarch_lsx_vreplve_b: 4381 case Intrinsic::loongarch_lsx_vreplve_h: 4382 case Intrinsic::loongarch_lsx_vreplve_w: 4383 case Intrinsic::loongarch_lsx_vreplve_d: 4384 case Intrinsic::loongarch_lasx_xvreplve_b: 4385 case Intrinsic::loongarch_lasx_xvreplve_h: 4386 case Intrinsic::loongarch_lasx_xvreplve_w: 4387 case Intrinsic::loongarch_lasx_xvreplve_d: 4388 return DAG.getNode(LoongArchISD::VREPLVE, DL, N->getValueType(0), 4389 N->getOperand(1), 4390 DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getGRLenVT(), 4391 N->getOperand(2))); 4392 } 4393 return SDValue(); 4394 } 4395 4396 SDValue LoongArchTargetLowering::PerformDAGCombine(SDNode *N, 4397 DAGCombinerInfo &DCI) const { 4398 SelectionDAG &DAG = DCI.DAG; 4399 switch (N->getOpcode()) { 4400 default: 4401 break; 4402 case ISD::AND: 4403 return performANDCombine(N, DAG, DCI, Subtarget); 4404 case ISD::OR: 4405 return performORCombine(N, DAG, DCI, Subtarget); 4406 case ISD::SETCC: 4407 return performSETCCCombine(N, DAG, DCI, Subtarget); 4408 case ISD::SRL: 4409 return performSRLCombine(N, DAG, DCI, Subtarget); 4410 case LoongArchISD::BITREV_W: 4411 return performBITREV_WCombine(N, DAG, DCI, Subtarget); 4412 case ISD::INTRINSIC_WO_CHAIN: 4413 return performINTRINSIC_WO_CHAINCombine(N, DAG, DCI, Subtarget); 4414 } 4415 return SDValue(); 4416 } 4417 4418 static MachineBasicBlock *insertDivByZeroTrap(MachineInstr &MI, 4419 MachineBasicBlock *MBB) { 4420 if (!ZeroDivCheck) 4421 return MBB; 4422 4423 // Build instructions: 4424 // MBB: 4425 // div(or mod) $dst, $dividend, $divisor 4426 // bnez $divisor, SinkMBB 4427 // BreakMBB: 4428 // break 7 // BRK_DIVZERO 4429 // SinkMBB: 4430 // fallthrough 4431 const BasicBlock *LLVM_BB = MBB->getBasicBlock(); 4432 MachineFunction::iterator It = ++MBB->getIterator(); 4433 MachineFunction *MF = MBB->getParent(); 4434 auto BreakMBB = MF->CreateMachineBasicBlock(LLVM_BB); 4435 auto SinkMBB = MF->CreateMachineBasicBlock(LLVM_BB); 4436 MF->insert(It, BreakMBB); 4437 MF->insert(It, SinkMBB); 4438 4439 // Transfer the remainder of MBB and its successor edges to SinkMBB. 4440 SinkMBB->splice(SinkMBB->end(), MBB, std::next(MI.getIterator()), MBB->end()); 4441 SinkMBB->transferSuccessorsAndUpdatePHIs(MBB); 4442 4443 const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo(); 4444 DebugLoc DL = MI.getDebugLoc(); 4445 MachineOperand &Divisor = MI.getOperand(2); 4446 Register DivisorReg = Divisor.getReg(); 4447 4448 // MBB: 4449 BuildMI(MBB, DL, TII.get(LoongArch::BNEZ)) 4450 .addReg(DivisorReg, getKillRegState(Divisor.isKill())) 4451 .addMBB(SinkMBB); 4452 MBB->addSuccessor(BreakMBB); 4453 MBB->addSuccessor(SinkMBB); 4454 4455 // BreakMBB: 4456 // See linux header file arch/loongarch/include/uapi/asm/break.h for the 4457 // definition of BRK_DIVZERO. 4458 BuildMI(BreakMBB, DL, TII.get(LoongArch::BREAK)).addImm(7 /*BRK_DIVZERO*/); 4459 BreakMBB->addSuccessor(SinkMBB); 4460 4461 // Clear Divisor's kill flag. 4462 Divisor.setIsKill(false); 4463 4464 return SinkMBB; 4465 } 4466 4467 static MachineBasicBlock * 4468 emitVecCondBranchPseudo(MachineInstr &MI, MachineBasicBlock *BB, 4469 const LoongArchSubtarget &Subtarget) { 4470 unsigned CondOpc; 4471 switch (MI.getOpcode()) { 4472 default: 4473 llvm_unreachable("Unexpected opcode"); 4474 case LoongArch::PseudoVBZ: 4475 CondOpc = LoongArch::VSETEQZ_V; 4476 break; 4477 case LoongArch::PseudoVBZ_B: 4478 CondOpc = LoongArch::VSETANYEQZ_B; 4479 break; 4480 case LoongArch::PseudoVBZ_H: 4481 CondOpc = LoongArch::VSETANYEQZ_H; 4482 break; 4483 case LoongArch::PseudoVBZ_W: 4484 CondOpc = LoongArch::VSETANYEQZ_W; 4485 break; 4486 case LoongArch::PseudoVBZ_D: 4487 CondOpc = LoongArch::VSETANYEQZ_D; 4488 break; 4489 case LoongArch::PseudoVBNZ: 4490 CondOpc = LoongArch::VSETNEZ_V; 4491 break; 4492 case LoongArch::PseudoVBNZ_B: 4493 CondOpc = LoongArch::VSETALLNEZ_B; 4494 break; 4495 case LoongArch::PseudoVBNZ_H: 4496 CondOpc = LoongArch::VSETALLNEZ_H; 4497 break; 4498 case LoongArch::PseudoVBNZ_W: 4499 CondOpc = LoongArch::VSETALLNEZ_W; 4500 break; 4501 case LoongArch::PseudoVBNZ_D: 4502 CondOpc = LoongArch::VSETALLNEZ_D; 4503 break; 4504 case LoongArch::PseudoXVBZ: 4505 CondOpc = LoongArch::XVSETEQZ_V; 4506 break; 4507 case LoongArch::PseudoXVBZ_B: 4508 CondOpc = LoongArch::XVSETANYEQZ_B; 4509 break; 4510 case LoongArch::PseudoXVBZ_H: 4511 CondOpc = LoongArch::XVSETANYEQZ_H; 4512 break; 4513 case LoongArch::PseudoXVBZ_W: 4514 CondOpc = LoongArch::XVSETANYEQZ_W; 4515 break; 4516 case LoongArch::PseudoXVBZ_D: 4517 CondOpc = LoongArch::XVSETANYEQZ_D; 4518 break; 4519 case LoongArch::PseudoXVBNZ: 4520 CondOpc = LoongArch::XVSETNEZ_V; 4521 break; 4522 case LoongArch::PseudoXVBNZ_B: 4523 CondOpc = LoongArch::XVSETALLNEZ_B; 4524 break; 4525 case LoongArch::PseudoXVBNZ_H: 4526 CondOpc = LoongArch::XVSETALLNEZ_H; 4527 break; 4528 case LoongArch::PseudoXVBNZ_W: 4529 CondOpc = LoongArch::XVSETALLNEZ_W; 4530 break; 4531 case LoongArch::PseudoXVBNZ_D: 4532 CondOpc = LoongArch::XVSETALLNEZ_D; 4533 break; 4534 } 4535 4536 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 4537 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 4538 DebugLoc DL = MI.getDebugLoc(); 4539 MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); 4540 MachineFunction::iterator It = ++BB->getIterator(); 4541 4542 MachineFunction *F = BB->getParent(); 4543 MachineBasicBlock *FalseBB = F->CreateMachineBasicBlock(LLVM_BB); 4544 MachineBasicBlock *TrueBB = F->CreateMachineBasicBlock(LLVM_BB); 4545 MachineBasicBlock *SinkBB = F->CreateMachineBasicBlock(LLVM_BB); 4546 4547 F->insert(It, FalseBB); 4548 F->insert(It, TrueBB); 4549 F->insert(It, SinkBB); 4550 4551 // Transfer the remainder of MBB and its successor edges to Sink. 4552 SinkBB->splice(SinkBB->end(), BB, std::next(MI.getIterator()), BB->end()); 4553 SinkBB->transferSuccessorsAndUpdatePHIs(BB); 4554 4555 // Insert the real instruction to BB. 4556 Register FCC = MRI.createVirtualRegister(&LoongArch::CFRRegClass); 4557 BuildMI(BB, DL, TII->get(CondOpc), FCC).addReg(MI.getOperand(1).getReg()); 4558 4559 // Insert branch. 4560 BuildMI(BB, DL, TII->get(LoongArch::BCNEZ)).addReg(FCC).addMBB(TrueBB); 4561 BB->addSuccessor(FalseBB); 4562 BB->addSuccessor(TrueBB); 4563 4564 // FalseBB. 4565 Register RD1 = MRI.createVirtualRegister(&LoongArch::GPRRegClass); 4566 BuildMI(FalseBB, DL, TII->get(LoongArch::ADDI_W), RD1) 4567 .addReg(LoongArch::R0) 4568 .addImm(0); 4569 BuildMI(FalseBB, DL, TII->get(LoongArch::PseudoBR)).addMBB(SinkBB); 4570 FalseBB->addSuccessor(SinkBB); 4571 4572 // TrueBB. 4573 Register RD2 = MRI.createVirtualRegister(&LoongArch::GPRRegClass); 4574 BuildMI(TrueBB, DL, TII->get(LoongArch::ADDI_W), RD2) 4575 .addReg(LoongArch::R0) 4576 .addImm(1); 4577 TrueBB->addSuccessor(SinkBB); 4578 4579 // SinkBB: merge the results. 4580 BuildMI(*SinkBB, SinkBB->begin(), DL, TII->get(LoongArch::PHI), 4581 MI.getOperand(0).getReg()) 4582 .addReg(RD1) 4583 .addMBB(FalseBB) 4584 .addReg(RD2) 4585 .addMBB(TrueBB); 4586 4587 // The pseudo instruction is gone now. 4588 MI.eraseFromParent(); 4589 return SinkBB; 4590 } 4591 4592 static MachineBasicBlock * 4593 emitPseudoXVINSGR2VR(MachineInstr &MI, MachineBasicBlock *BB, 4594 const LoongArchSubtarget &Subtarget) { 4595 unsigned InsOp; 4596 unsigned HalfSize; 4597 switch (MI.getOpcode()) { 4598 default: 4599 llvm_unreachable("Unexpected opcode"); 4600 case LoongArch::PseudoXVINSGR2VR_B: 4601 HalfSize = 16; 4602 InsOp = LoongArch::VINSGR2VR_B; 4603 break; 4604 case LoongArch::PseudoXVINSGR2VR_H: 4605 HalfSize = 8; 4606 InsOp = LoongArch::VINSGR2VR_H; 4607 break; 4608 } 4609 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 4610 const TargetRegisterClass *RC = &LoongArch::LASX256RegClass; 4611 const TargetRegisterClass *SubRC = &LoongArch::LSX128RegClass; 4612 DebugLoc DL = MI.getDebugLoc(); 4613 MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); 4614 // XDst = vector_insert XSrc, Elt, Idx 4615 Register XDst = MI.getOperand(0).getReg(); 4616 Register XSrc = MI.getOperand(1).getReg(); 4617 Register Elt = MI.getOperand(2).getReg(); 4618 unsigned Idx = MI.getOperand(3).getImm(); 4619 4620 Register ScratchReg1 = XSrc; 4621 if (Idx >= HalfSize) { 4622 ScratchReg1 = MRI.createVirtualRegister(RC); 4623 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), ScratchReg1) 4624 .addReg(XSrc) 4625 .addReg(XSrc) 4626 .addImm(1); 4627 } 4628 4629 Register ScratchSubReg1 = MRI.createVirtualRegister(SubRC); 4630 Register ScratchSubReg2 = MRI.createVirtualRegister(SubRC); 4631 BuildMI(*BB, MI, DL, TII->get(LoongArch::COPY), ScratchSubReg1) 4632 .addReg(ScratchReg1, 0, LoongArch::sub_128); 4633 BuildMI(*BB, MI, DL, TII->get(InsOp), ScratchSubReg2) 4634 .addReg(ScratchSubReg1) 4635 .addReg(Elt) 4636 .addImm(Idx >= HalfSize ? Idx - HalfSize : Idx); 4637 4638 Register ScratchReg2 = XDst; 4639 if (Idx >= HalfSize) 4640 ScratchReg2 = MRI.createVirtualRegister(RC); 4641 4642 BuildMI(*BB, MI, DL, TII->get(LoongArch::SUBREG_TO_REG), ScratchReg2) 4643 .addImm(0) 4644 .addReg(ScratchSubReg2) 4645 .addImm(LoongArch::sub_128); 4646 4647 if (Idx >= HalfSize) 4648 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), XDst) 4649 .addReg(XSrc) 4650 .addReg(ScratchReg2) 4651 .addImm(2); 4652 4653 MI.eraseFromParent(); 4654 return BB; 4655 } 4656 4657 static MachineBasicBlock *emitPseudoCTPOP(MachineInstr &MI, 4658 MachineBasicBlock *BB, 4659 const LoongArchSubtarget &Subtarget) { 4660 assert(Subtarget.hasExtLSX()); 4661 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 4662 const TargetRegisterClass *RC = &LoongArch::LSX128RegClass; 4663 DebugLoc DL = MI.getDebugLoc(); 4664 MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); 4665 Register Dst = MI.getOperand(0).getReg(); 4666 Register Src = MI.getOperand(1).getReg(); 4667 Register ScratchReg1 = MRI.createVirtualRegister(RC); 4668 Register ScratchReg2 = MRI.createVirtualRegister(RC); 4669 Register ScratchReg3 = MRI.createVirtualRegister(RC); 4670 4671 BuildMI(*BB, MI, DL, TII->get(LoongArch::VLDI), ScratchReg1).addImm(0); 4672 BuildMI(*BB, MI, DL, 4673 TII->get(Subtarget.is64Bit() ? LoongArch::VINSGR2VR_D 4674 : LoongArch::VINSGR2VR_W), 4675 ScratchReg2) 4676 .addReg(ScratchReg1) 4677 .addReg(Src) 4678 .addImm(0); 4679 BuildMI( 4680 *BB, MI, DL, 4681 TII->get(Subtarget.is64Bit() ? LoongArch::VPCNT_D : LoongArch::VPCNT_W), 4682 ScratchReg3) 4683 .addReg(ScratchReg2); 4684 BuildMI(*BB, MI, DL, 4685 TII->get(Subtarget.is64Bit() ? LoongArch::VPICKVE2GR_D 4686 : LoongArch::VPICKVE2GR_W), 4687 Dst) 4688 .addReg(ScratchReg3) 4689 .addImm(0); 4690 4691 MI.eraseFromParent(); 4692 return BB; 4693 } 4694 4695 MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter( 4696 MachineInstr &MI, MachineBasicBlock *BB) const { 4697 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 4698 DebugLoc DL = MI.getDebugLoc(); 4699 4700 switch (MI.getOpcode()) { 4701 default: 4702 llvm_unreachable("Unexpected instr type to insert"); 4703 case LoongArch::DIV_W: 4704 case LoongArch::DIV_WU: 4705 case LoongArch::MOD_W: 4706 case LoongArch::MOD_WU: 4707 case LoongArch::DIV_D: 4708 case LoongArch::DIV_DU: 4709 case LoongArch::MOD_D: 4710 case LoongArch::MOD_DU: 4711 return insertDivByZeroTrap(MI, BB); 4712 break; 4713 case LoongArch::WRFCSR: { 4714 BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVGR2FCSR), 4715 LoongArch::FCSR0 + MI.getOperand(0).getImm()) 4716 .addReg(MI.getOperand(1).getReg()); 4717 MI.eraseFromParent(); 4718 return BB; 4719 } 4720 case LoongArch::RDFCSR: { 4721 MachineInstr *ReadFCSR = 4722 BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVFCSR2GR), 4723 MI.getOperand(0).getReg()) 4724 .addReg(LoongArch::FCSR0 + MI.getOperand(1).getImm()); 4725 ReadFCSR->getOperand(1).setIsUndef(); 4726 MI.eraseFromParent(); 4727 return BB; 4728 } 4729 case LoongArch::PseudoVBZ: 4730 case LoongArch::PseudoVBZ_B: 4731 case LoongArch::PseudoVBZ_H: 4732 case LoongArch::PseudoVBZ_W: 4733 case LoongArch::PseudoVBZ_D: 4734 case LoongArch::PseudoVBNZ: 4735 case LoongArch::PseudoVBNZ_B: 4736 case LoongArch::PseudoVBNZ_H: 4737 case LoongArch::PseudoVBNZ_W: 4738 case LoongArch::PseudoVBNZ_D: 4739 case LoongArch::PseudoXVBZ: 4740 case LoongArch::PseudoXVBZ_B: 4741 case LoongArch::PseudoXVBZ_H: 4742 case LoongArch::PseudoXVBZ_W: 4743 case LoongArch::PseudoXVBZ_D: 4744 case LoongArch::PseudoXVBNZ: 4745 case LoongArch::PseudoXVBNZ_B: 4746 case LoongArch::PseudoXVBNZ_H: 4747 case LoongArch::PseudoXVBNZ_W: 4748 case LoongArch::PseudoXVBNZ_D: 4749 return emitVecCondBranchPseudo(MI, BB, Subtarget); 4750 case LoongArch::PseudoXVINSGR2VR_B: 4751 case LoongArch::PseudoXVINSGR2VR_H: 4752 return emitPseudoXVINSGR2VR(MI, BB, Subtarget); 4753 case LoongArch::PseudoCTPOP: 4754 return emitPseudoCTPOP(MI, BB, Subtarget); 4755 case TargetOpcode::STATEPOINT: 4756 // STATEPOINT is a pseudo instruction which has no implicit defs/uses 4757 // while bl call instruction (where statepoint will be lowered at the 4758 // end) has implicit def. This def is early-clobber as it will be set at 4759 // the moment of the call and earlier than any use is read. 4760 // Add this implicit dead def here as a workaround. 4761 MI.addOperand(*MI.getMF(), 4762 MachineOperand::CreateReg( 4763 LoongArch::R1, /*isDef*/ true, 4764 /*isImp*/ true, /*isKill*/ false, /*isDead*/ true, 4765 /*isUndef*/ false, /*isEarlyClobber*/ true)); 4766 if (!Subtarget.is64Bit()) 4767 report_fatal_error("STATEPOINT is only supported on 64-bit targets"); 4768 return emitPatchPoint(MI, BB); 4769 } 4770 } 4771 4772 bool LoongArchTargetLowering::allowsMisalignedMemoryAccesses( 4773 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags, 4774 unsigned *Fast) const { 4775 if (!Subtarget.hasUAL()) 4776 return false; 4777 4778 // TODO: set reasonable speed number. 4779 if (Fast) 4780 *Fast = 1; 4781 return true; 4782 } 4783 4784 const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const { 4785 switch ((LoongArchISD::NodeType)Opcode) { 4786 case LoongArchISD::FIRST_NUMBER: 4787 break; 4788 4789 #define NODE_NAME_CASE(node) \ 4790 case LoongArchISD::node: \ 4791 return "LoongArchISD::" #node; 4792 4793 // TODO: Add more target-dependent nodes later. 4794 NODE_NAME_CASE(CALL) 4795 NODE_NAME_CASE(CALL_MEDIUM) 4796 NODE_NAME_CASE(CALL_LARGE) 4797 NODE_NAME_CASE(RET) 4798 NODE_NAME_CASE(TAIL) 4799 NODE_NAME_CASE(TAIL_MEDIUM) 4800 NODE_NAME_CASE(TAIL_LARGE) 4801 NODE_NAME_CASE(SLL_W) 4802 NODE_NAME_CASE(SRA_W) 4803 NODE_NAME_CASE(SRL_W) 4804 NODE_NAME_CASE(BSTRINS) 4805 NODE_NAME_CASE(BSTRPICK) 4806 NODE_NAME_CASE(MOVGR2FR_W_LA64) 4807 NODE_NAME_CASE(MOVFR2GR_S_LA64) 4808 NODE_NAME_CASE(FTINT) 4809 NODE_NAME_CASE(REVB_2H) 4810 NODE_NAME_CASE(REVB_2W) 4811 NODE_NAME_CASE(BITREV_4B) 4812 NODE_NAME_CASE(BITREV_8B) 4813 NODE_NAME_CASE(BITREV_W) 4814 NODE_NAME_CASE(ROTR_W) 4815 NODE_NAME_CASE(ROTL_W) 4816 NODE_NAME_CASE(DIV_W) 4817 NODE_NAME_CASE(DIV_WU) 4818 NODE_NAME_CASE(MOD_W) 4819 NODE_NAME_CASE(MOD_WU) 4820 NODE_NAME_CASE(CLZ_W) 4821 NODE_NAME_CASE(CTZ_W) 4822 NODE_NAME_CASE(DBAR) 4823 NODE_NAME_CASE(IBAR) 4824 NODE_NAME_CASE(BREAK) 4825 NODE_NAME_CASE(SYSCALL) 4826 NODE_NAME_CASE(CRC_W_B_W) 4827 NODE_NAME_CASE(CRC_W_H_W) 4828 NODE_NAME_CASE(CRC_W_W_W) 4829 NODE_NAME_CASE(CRC_W_D_W) 4830 NODE_NAME_CASE(CRCC_W_B_W) 4831 NODE_NAME_CASE(CRCC_W_H_W) 4832 NODE_NAME_CASE(CRCC_W_W_W) 4833 NODE_NAME_CASE(CRCC_W_D_W) 4834 NODE_NAME_CASE(CSRRD) 4835 NODE_NAME_CASE(CSRWR) 4836 NODE_NAME_CASE(CSRXCHG) 4837 NODE_NAME_CASE(IOCSRRD_B) 4838 NODE_NAME_CASE(IOCSRRD_H) 4839 NODE_NAME_CASE(IOCSRRD_W) 4840 NODE_NAME_CASE(IOCSRRD_D) 4841 NODE_NAME_CASE(IOCSRWR_B) 4842 NODE_NAME_CASE(IOCSRWR_H) 4843 NODE_NAME_CASE(IOCSRWR_W) 4844 NODE_NAME_CASE(IOCSRWR_D) 4845 NODE_NAME_CASE(CPUCFG) 4846 NODE_NAME_CASE(MOVGR2FCSR) 4847 NODE_NAME_CASE(MOVFCSR2GR) 4848 NODE_NAME_CASE(CACOP_D) 4849 NODE_NAME_CASE(CACOP_W) 4850 NODE_NAME_CASE(VSHUF) 4851 NODE_NAME_CASE(VPICKEV) 4852 NODE_NAME_CASE(VPICKOD) 4853 NODE_NAME_CASE(VPACKEV) 4854 NODE_NAME_CASE(VPACKOD) 4855 NODE_NAME_CASE(VILVL) 4856 NODE_NAME_CASE(VILVH) 4857 NODE_NAME_CASE(VSHUF4I) 4858 NODE_NAME_CASE(VREPLVEI) 4859 NODE_NAME_CASE(VREPLGR2VR) 4860 NODE_NAME_CASE(XVPERMI) 4861 NODE_NAME_CASE(VPICK_SEXT_ELT) 4862 NODE_NAME_CASE(VPICK_ZEXT_ELT) 4863 NODE_NAME_CASE(VREPLVE) 4864 NODE_NAME_CASE(VALL_ZERO) 4865 NODE_NAME_CASE(VANY_ZERO) 4866 NODE_NAME_CASE(VALL_NONZERO) 4867 NODE_NAME_CASE(VANY_NONZERO) 4868 NODE_NAME_CASE(FRECIPE) 4869 NODE_NAME_CASE(FRSQRTE) 4870 } 4871 #undef NODE_NAME_CASE 4872 return nullptr; 4873 } 4874 4875 //===----------------------------------------------------------------------===// 4876 // Calling Convention Implementation 4877 //===----------------------------------------------------------------------===// 4878 4879 // Eight general-purpose registers a0-a7 used for passing integer arguments, 4880 // with a0-a1 reused to return values. Generally, the GPRs are used to pass 4881 // fixed-point arguments, and floating-point arguments when no FPR is available 4882 // or with soft float ABI. 4883 const MCPhysReg ArgGPRs[] = {LoongArch::R4, LoongArch::R5, LoongArch::R6, 4884 LoongArch::R7, LoongArch::R8, LoongArch::R9, 4885 LoongArch::R10, LoongArch::R11}; 4886 // Eight floating-point registers fa0-fa7 used for passing floating-point 4887 // arguments, and fa0-fa1 are also used to return values. 4888 const MCPhysReg ArgFPR32s[] = {LoongArch::F0, LoongArch::F1, LoongArch::F2, 4889 LoongArch::F3, LoongArch::F4, LoongArch::F5, 4890 LoongArch::F6, LoongArch::F7}; 4891 // FPR32 and FPR64 alias each other. 4892 const MCPhysReg ArgFPR64s[] = { 4893 LoongArch::F0_64, LoongArch::F1_64, LoongArch::F2_64, LoongArch::F3_64, 4894 LoongArch::F4_64, LoongArch::F5_64, LoongArch::F6_64, LoongArch::F7_64}; 4895 4896 const MCPhysReg ArgVRs[] = {LoongArch::VR0, LoongArch::VR1, LoongArch::VR2, 4897 LoongArch::VR3, LoongArch::VR4, LoongArch::VR5, 4898 LoongArch::VR6, LoongArch::VR7}; 4899 4900 const MCPhysReg ArgXRs[] = {LoongArch::XR0, LoongArch::XR1, LoongArch::XR2, 4901 LoongArch::XR3, LoongArch::XR4, LoongArch::XR5, 4902 LoongArch::XR6, LoongArch::XR7}; 4903 4904 // Pass a 2*GRLen argument that has been split into two GRLen values through 4905 // registers or the stack as necessary. 4906 static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State, 4907 CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1, 4908 unsigned ValNo2, MVT ValVT2, MVT LocVT2, 4909 ISD::ArgFlagsTy ArgFlags2) { 4910 unsigned GRLenInBytes = GRLen / 8; 4911 if (Register Reg = State.AllocateReg(ArgGPRs)) { 4912 // At least one half can be passed via register. 4913 State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg, 4914 VA1.getLocVT(), CCValAssign::Full)); 4915 } else { 4916 // Both halves must be passed on the stack, with proper alignment. 4917 Align StackAlign = 4918 std::max(Align(GRLenInBytes), ArgFlags1.getNonZeroOrigAlign()); 4919 State.addLoc( 4920 CCValAssign::getMem(VA1.getValNo(), VA1.getValVT(), 4921 State.AllocateStack(GRLenInBytes, StackAlign), 4922 VA1.getLocVT(), CCValAssign::Full)); 4923 State.addLoc(CCValAssign::getMem( 4924 ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)), 4925 LocVT2, CCValAssign::Full)); 4926 return false; 4927 } 4928 if (Register Reg = State.AllocateReg(ArgGPRs)) { 4929 // The second half can also be passed via register. 4930 State.addLoc( 4931 CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full)); 4932 } else { 4933 // The second half is passed via the stack, without additional alignment. 4934 State.addLoc(CCValAssign::getMem( 4935 ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)), 4936 LocVT2, CCValAssign::Full)); 4937 } 4938 return false; 4939 } 4940 4941 // Implements the LoongArch calling convention. Returns true upon failure. 4942 static bool CC_LoongArch(const DataLayout &DL, LoongArchABI::ABI ABI, 4943 unsigned ValNo, MVT ValVT, 4944 CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, 4945 CCState &State, bool IsFixed, bool IsRet, 4946 Type *OrigTy) { 4947 unsigned GRLen = DL.getLargestLegalIntTypeSizeInBits(); 4948 assert((GRLen == 32 || GRLen == 64) && "Unspport GRLen"); 4949 MVT GRLenVT = GRLen == 32 ? MVT::i32 : MVT::i64; 4950 MVT LocVT = ValVT; 4951 4952 // Any return value split into more than two values can't be returned 4953 // directly. 4954 if (IsRet && ValNo > 1) 4955 return true; 4956 4957 // If passing a variadic argument, or if no FPR is available. 4958 bool UseGPRForFloat = true; 4959 4960 switch (ABI) { 4961 default: 4962 llvm_unreachable("Unexpected ABI"); 4963 break; 4964 case LoongArchABI::ABI_ILP32F: 4965 case LoongArchABI::ABI_LP64F: 4966 case LoongArchABI::ABI_ILP32D: 4967 case LoongArchABI::ABI_LP64D: 4968 UseGPRForFloat = !IsFixed; 4969 break; 4970 case LoongArchABI::ABI_ILP32S: 4971 case LoongArchABI::ABI_LP64S: 4972 break; 4973 } 4974 4975 // FPR32 and FPR64 alias each other. 4976 if (State.getFirstUnallocated(ArgFPR32s) == std::size(ArgFPR32s)) 4977 UseGPRForFloat = true; 4978 4979 if (UseGPRForFloat && ValVT == MVT::f32) { 4980 LocVT = GRLenVT; 4981 LocInfo = CCValAssign::BCvt; 4982 } else if (UseGPRForFloat && GRLen == 64 && ValVT == MVT::f64) { 4983 LocVT = MVT::i64; 4984 LocInfo = CCValAssign::BCvt; 4985 } else if (UseGPRForFloat && GRLen == 32 && ValVT == MVT::f64) { 4986 // TODO: Handle passing f64 on LA32 with D feature. 4987 report_fatal_error("Passing f64 with GPR on LA32 is undefined"); 4988 } 4989 4990 // If this is a variadic argument, the LoongArch calling convention requires 4991 // that it is assigned an 'even' or 'aligned' register if it has (2*GRLen)/8 4992 // byte alignment. An aligned register should be used regardless of whether 4993 // the original argument was split during legalisation or not. The argument 4994 // will not be passed by registers if the original type is larger than 4995 // 2*GRLen, so the register alignment rule does not apply. 4996 unsigned TwoGRLenInBytes = (2 * GRLen) / 8; 4997 if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoGRLenInBytes && 4998 DL.getTypeAllocSize(OrigTy) == TwoGRLenInBytes) { 4999 unsigned RegIdx = State.getFirstUnallocated(ArgGPRs); 5000 // Skip 'odd' register if necessary. 5001 if (RegIdx != std::size(ArgGPRs) && RegIdx % 2 == 1) 5002 State.AllocateReg(ArgGPRs); 5003 } 5004 5005 SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs(); 5006 SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags = 5007 State.getPendingArgFlags(); 5008 5009 assert(PendingLocs.size() == PendingArgFlags.size() && 5010 "PendingLocs and PendingArgFlags out of sync"); 5011 5012 // Split arguments might be passed indirectly, so keep track of the pending 5013 // values. 5014 if (ValVT.isScalarInteger() && (ArgFlags.isSplit() || !PendingLocs.empty())) { 5015 LocVT = GRLenVT; 5016 LocInfo = CCValAssign::Indirect; 5017 PendingLocs.push_back( 5018 CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo)); 5019 PendingArgFlags.push_back(ArgFlags); 5020 if (!ArgFlags.isSplitEnd()) { 5021 return false; 5022 } 5023 } 5024 5025 // If the split argument only had two elements, it should be passed directly 5026 // in registers or on the stack. 5027 if (ValVT.isScalarInteger() && ArgFlags.isSplitEnd() && 5028 PendingLocs.size() <= 2) { 5029 assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()"); 5030 // Apply the normal calling convention rules to the first half of the 5031 // split argument. 5032 CCValAssign VA = PendingLocs[0]; 5033 ISD::ArgFlagsTy AF = PendingArgFlags[0]; 5034 PendingLocs.clear(); 5035 PendingArgFlags.clear(); 5036 return CC_LoongArchAssign2GRLen(GRLen, State, VA, AF, ValNo, ValVT, LocVT, 5037 ArgFlags); 5038 } 5039 5040 // Allocate to a register if possible, or else a stack slot. 5041 Register Reg; 5042 unsigned StoreSizeBytes = GRLen / 8; 5043 Align StackAlign = Align(GRLen / 8); 5044 5045 if (ValVT == MVT::f32 && !UseGPRForFloat) 5046 Reg = State.AllocateReg(ArgFPR32s); 5047 else if (ValVT == MVT::f64 && !UseGPRForFloat) 5048 Reg = State.AllocateReg(ArgFPR64s); 5049 else if (ValVT.is128BitVector()) 5050 Reg = State.AllocateReg(ArgVRs); 5051 else if (ValVT.is256BitVector()) 5052 Reg = State.AllocateReg(ArgXRs); 5053 else 5054 Reg = State.AllocateReg(ArgGPRs); 5055 5056 unsigned StackOffset = 5057 Reg ? 0 : State.AllocateStack(StoreSizeBytes, StackAlign); 5058 5059 // If we reach this point and PendingLocs is non-empty, we must be at the 5060 // end of a split argument that must be passed indirectly. 5061 if (!PendingLocs.empty()) { 5062 assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()"); 5063 assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()"); 5064 for (auto &It : PendingLocs) { 5065 if (Reg) 5066 It.convertToReg(Reg); 5067 else 5068 It.convertToMem(StackOffset); 5069 State.addLoc(It); 5070 } 5071 PendingLocs.clear(); 5072 PendingArgFlags.clear(); 5073 return false; 5074 } 5075 assert((!UseGPRForFloat || LocVT == GRLenVT) && 5076 "Expected an GRLenVT at this stage"); 5077 5078 if (Reg) { 5079 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 5080 return false; 5081 } 5082 5083 // When a floating-point value is passed on the stack, no bit-cast is needed. 5084 if (ValVT.isFloatingPoint()) { 5085 LocVT = ValVT; 5086 LocInfo = CCValAssign::Full; 5087 } 5088 5089 State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo)); 5090 return false; 5091 } 5092 5093 void LoongArchTargetLowering::analyzeInputArgs( 5094 MachineFunction &MF, CCState &CCInfo, 5095 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet, 5096 LoongArchCCAssignFn Fn) const { 5097 FunctionType *FType = MF.getFunction().getFunctionType(); 5098 for (unsigned i = 0, e = Ins.size(); i != e; ++i) { 5099 MVT ArgVT = Ins[i].VT; 5100 Type *ArgTy = nullptr; 5101 if (IsRet) 5102 ArgTy = FType->getReturnType(); 5103 else if (Ins[i].isOrigArg()) 5104 ArgTy = FType->getParamType(Ins[i].getOrigArgIndex()); 5105 LoongArchABI::ABI ABI = 5106 MF.getSubtarget<LoongArchSubtarget>().getTargetABI(); 5107 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Ins[i].Flags, 5108 CCInfo, /*IsFixed=*/true, IsRet, ArgTy)) { 5109 LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type " << ArgVT 5110 << '\n'); 5111 llvm_unreachable(""); 5112 } 5113 } 5114 } 5115 5116 void LoongArchTargetLowering::analyzeOutputArgs( 5117 MachineFunction &MF, CCState &CCInfo, 5118 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet, 5119 CallLoweringInfo *CLI, LoongArchCCAssignFn Fn) const { 5120 for (unsigned i = 0, e = Outs.size(); i != e; ++i) { 5121 MVT ArgVT = Outs[i].VT; 5122 Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr; 5123 LoongArchABI::ABI ABI = 5124 MF.getSubtarget<LoongArchSubtarget>().getTargetABI(); 5125 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Outs[i].Flags, 5126 CCInfo, Outs[i].IsFixed, IsRet, OrigTy)) { 5127 LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type " << ArgVT 5128 << "\n"); 5129 llvm_unreachable(""); 5130 } 5131 } 5132 } 5133 5134 // Convert Val to a ValVT. Should not be called for CCValAssign::Indirect 5135 // values. 5136 static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, 5137 const CCValAssign &VA, const SDLoc &DL) { 5138 switch (VA.getLocInfo()) { 5139 default: 5140 llvm_unreachable("Unexpected CCValAssign::LocInfo"); 5141 case CCValAssign::Full: 5142 case CCValAssign::Indirect: 5143 break; 5144 case CCValAssign::BCvt: 5145 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) 5146 Val = DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, Val); 5147 else 5148 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val); 5149 break; 5150 } 5151 return Val; 5152 } 5153 5154 static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain, 5155 const CCValAssign &VA, const SDLoc &DL, 5156 const ISD::InputArg &In, 5157 const LoongArchTargetLowering &TLI) { 5158 MachineFunction &MF = DAG.getMachineFunction(); 5159 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 5160 EVT LocVT = VA.getLocVT(); 5161 SDValue Val; 5162 const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT()); 5163 Register VReg = RegInfo.createVirtualRegister(RC); 5164 RegInfo.addLiveIn(VA.getLocReg(), VReg); 5165 Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT); 5166 5167 // If input is sign extended from 32 bits, note it for the OptW pass. 5168 if (In.isOrigArg()) { 5169 Argument *OrigArg = MF.getFunction().getArg(In.getOrigArgIndex()); 5170 if (OrigArg->getType()->isIntegerTy()) { 5171 unsigned BitWidth = OrigArg->getType()->getIntegerBitWidth(); 5172 // An input zero extended from i31 can also be considered sign extended. 5173 if ((BitWidth <= 32 && In.Flags.isSExt()) || 5174 (BitWidth < 32 && In.Flags.isZExt())) { 5175 LoongArchMachineFunctionInfo *LAFI = 5176 MF.getInfo<LoongArchMachineFunctionInfo>(); 5177 LAFI->addSExt32Register(VReg); 5178 } 5179 } 5180 } 5181 5182 return convertLocVTToValVT(DAG, Val, VA, DL); 5183 } 5184 5185 // The caller is responsible for loading the full value if the argument is 5186 // passed with CCValAssign::Indirect. 5187 static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, 5188 const CCValAssign &VA, const SDLoc &DL) { 5189 MachineFunction &MF = DAG.getMachineFunction(); 5190 MachineFrameInfo &MFI = MF.getFrameInfo(); 5191 EVT ValVT = VA.getValVT(); 5192 int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(), 5193 /*IsImmutable=*/true); 5194 SDValue FIN = DAG.getFrameIndex( 5195 FI, MVT::getIntegerVT(DAG.getDataLayout().getPointerSizeInBits(0))); 5196 5197 ISD::LoadExtType ExtType; 5198 switch (VA.getLocInfo()) { 5199 default: 5200 llvm_unreachable("Unexpected CCValAssign::LocInfo"); 5201 case CCValAssign::Full: 5202 case CCValAssign::Indirect: 5203 case CCValAssign::BCvt: 5204 ExtType = ISD::NON_EXTLOAD; 5205 break; 5206 } 5207 return DAG.getExtLoad( 5208 ExtType, DL, VA.getLocVT(), Chain, FIN, 5209 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), ValVT); 5210 } 5211 5212 static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, 5213 const CCValAssign &VA, const SDLoc &DL) { 5214 EVT LocVT = VA.getLocVT(); 5215 5216 switch (VA.getLocInfo()) { 5217 default: 5218 llvm_unreachable("Unexpected CCValAssign::LocInfo"); 5219 case CCValAssign::Full: 5220 break; 5221 case CCValAssign::BCvt: 5222 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) 5223 Val = DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Val); 5224 else 5225 Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val); 5226 break; 5227 } 5228 return Val; 5229 } 5230 5231 static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, 5232 CCValAssign::LocInfo LocInfo, 5233 ISD::ArgFlagsTy ArgFlags, CCState &State) { 5234 if (LocVT == MVT::i32 || LocVT == MVT::i64) { 5235 // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, SpLim 5236 // s0 s1 s2 s3 s4 s5 s6 s7 s8 5237 static const MCPhysReg GPRList[] = { 5238 LoongArch::R23, LoongArch::R24, LoongArch::R25, 5239 LoongArch::R26, LoongArch::R27, LoongArch::R28, 5240 LoongArch::R29, LoongArch::R30, LoongArch::R31}; 5241 if (MCRegister Reg = State.AllocateReg(GPRList)) { 5242 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 5243 return false; 5244 } 5245 } 5246 5247 if (LocVT == MVT::f32) { 5248 // Pass in STG registers: F1, F2, F3, F4 5249 // fs0,fs1,fs2,fs3 5250 static const MCPhysReg FPR32List[] = {LoongArch::F24, LoongArch::F25, 5251 LoongArch::F26, LoongArch::F27}; 5252 if (MCRegister Reg = State.AllocateReg(FPR32List)) { 5253 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 5254 return false; 5255 } 5256 } 5257 5258 if (LocVT == MVT::f64) { 5259 // Pass in STG registers: D1, D2, D3, D4 5260 // fs4,fs5,fs6,fs7 5261 static const MCPhysReg FPR64List[] = {LoongArch::F28_64, LoongArch::F29_64, 5262 LoongArch::F30_64, LoongArch::F31_64}; 5263 if (MCRegister Reg = State.AllocateReg(FPR64List)) { 5264 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 5265 return false; 5266 } 5267 } 5268 5269 report_fatal_error("No registers left in GHC calling convention"); 5270 return true; 5271 } 5272 5273 // Transform physical registers into virtual registers. 5274 SDValue LoongArchTargetLowering::LowerFormalArguments( 5275 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, 5276 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL, 5277 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { 5278 5279 MachineFunction &MF = DAG.getMachineFunction(); 5280 5281 switch (CallConv) { 5282 default: 5283 llvm_unreachable("Unsupported calling convention"); 5284 case CallingConv::C: 5285 case CallingConv::Fast: 5286 break; 5287 case CallingConv::GHC: 5288 if (!MF.getSubtarget().hasFeature(LoongArch::FeatureBasicF) || 5289 !MF.getSubtarget().hasFeature(LoongArch::FeatureBasicD)) 5290 report_fatal_error( 5291 "GHC calling convention requires the F and D extensions"); 5292 } 5293 5294 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 5295 MVT GRLenVT = Subtarget.getGRLenVT(); 5296 unsigned GRLenInBytes = Subtarget.getGRLen() / 8; 5297 // Used with varargs to acumulate store chains. 5298 std::vector<SDValue> OutChains; 5299 5300 // Assign locations to all of the incoming arguments. 5301 SmallVector<CCValAssign> ArgLocs; 5302 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); 5303 5304 if (CallConv == CallingConv::GHC) 5305 CCInfo.AnalyzeFormalArguments(Ins, CC_LoongArch_GHC); 5306 else 5307 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false, CC_LoongArch); 5308 5309 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 5310 CCValAssign &VA = ArgLocs[i]; 5311 SDValue ArgValue; 5312 if (VA.isRegLoc()) 5313 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, Ins[i], *this); 5314 else 5315 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL); 5316 if (VA.getLocInfo() == CCValAssign::Indirect) { 5317 // If the original argument was split and passed by reference, we need to 5318 // load all parts of it here (using the same address). 5319 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue, 5320 MachinePointerInfo())); 5321 unsigned ArgIndex = Ins[i].OrigArgIndex; 5322 unsigned ArgPartOffset = Ins[i].PartOffset; 5323 assert(ArgPartOffset == 0); 5324 while (i + 1 != e && Ins[i + 1].OrigArgIndex == ArgIndex) { 5325 CCValAssign &PartVA = ArgLocs[i + 1]; 5326 unsigned PartOffset = Ins[i + 1].PartOffset - ArgPartOffset; 5327 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL); 5328 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset); 5329 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address, 5330 MachinePointerInfo())); 5331 ++i; 5332 } 5333 continue; 5334 } 5335 InVals.push_back(ArgValue); 5336 } 5337 5338 if (IsVarArg) { 5339 ArrayRef<MCPhysReg> ArgRegs = ArrayRef(ArgGPRs); 5340 unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs); 5341 const TargetRegisterClass *RC = &LoongArch::GPRRegClass; 5342 MachineFrameInfo &MFI = MF.getFrameInfo(); 5343 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 5344 auto *LoongArchFI = MF.getInfo<LoongArchMachineFunctionInfo>(); 5345 5346 // Offset of the first variable argument from stack pointer, and size of 5347 // the vararg save area. For now, the varargs save area is either zero or 5348 // large enough to hold a0-a7. 5349 int VaArgOffset, VarArgsSaveSize; 5350 5351 // If all registers are allocated, then all varargs must be passed on the 5352 // stack and we don't need to save any argregs. 5353 if (ArgRegs.size() == Idx) { 5354 VaArgOffset = CCInfo.getStackSize(); 5355 VarArgsSaveSize = 0; 5356 } else { 5357 VarArgsSaveSize = GRLenInBytes * (ArgRegs.size() - Idx); 5358 VaArgOffset = -VarArgsSaveSize; 5359 } 5360 5361 // Record the frame index of the first variable argument 5362 // which is a value necessary to VASTART. 5363 int FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true); 5364 LoongArchFI->setVarArgsFrameIndex(FI); 5365 5366 // If saving an odd number of registers then create an extra stack slot to 5367 // ensure that the frame pointer is 2*GRLen-aligned, which in turn ensures 5368 // offsets to even-numbered registered remain 2*GRLen-aligned. 5369 if (Idx % 2) { 5370 MFI.CreateFixedObject(GRLenInBytes, VaArgOffset - (int)GRLenInBytes, 5371 true); 5372 VarArgsSaveSize += GRLenInBytes; 5373 } 5374 5375 // Copy the integer registers that may have been used for passing varargs 5376 // to the vararg save area. 5377 for (unsigned I = Idx; I < ArgRegs.size(); 5378 ++I, VaArgOffset += GRLenInBytes) { 5379 const Register Reg = RegInfo.createVirtualRegister(RC); 5380 RegInfo.addLiveIn(ArgRegs[I], Reg); 5381 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, GRLenVT); 5382 FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true); 5383 SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); 5384 SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff, 5385 MachinePointerInfo::getFixedStack(MF, FI)); 5386 cast<StoreSDNode>(Store.getNode()) 5387 ->getMemOperand() 5388 ->setValue((Value *)nullptr); 5389 OutChains.push_back(Store); 5390 } 5391 LoongArchFI->setVarArgsSaveSize(VarArgsSaveSize); 5392 } 5393 5394 // All stores are grouped in one node to allow the matching between 5395 // the size of Ins and InVals. This only happens for vararg functions. 5396 if (!OutChains.empty()) { 5397 OutChains.push_back(Chain); 5398 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains); 5399 } 5400 5401 return Chain; 5402 } 5403 5404 bool LoongArchTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const { 5405 return CI->isTailCall(); 5406 } 5407 5408 // Check if the return value is used as only a return value, as otherwise 5409 // we can't perform a tail-call. 5410 bool LoongArchTargetLowering::isUsedByReturnOnly(SDNode *N, 5411 SDValue &Chain) const { 5412 if (N->getNumValues() != 1) 5413 return false; 5414 if (!N->hasNUsesOfValue(1, 0)) 5415 return false; 5416 5417 SDNode *Copy = *N->user_begin(); 5418 if (Copy->getOpcode() != ISD::CopyToReg) 5419 return false; 5420 5421 // If the ISD::CopyToReg has a glue operand, we conservatively assume it 5422 // isn't safe to perform a tail call. 5423 if (Copy->getGluedNode()) 5424 return false; 5425 5426 // The copy must be used by a LoongArchISD::RET, and nothing else. 5427 bool HasRet = false; 5428 for (SDNode *Node : Copy->users()) { 5429 if (Node->getOpcode() != LoongArchISD::RET) 5430 return false; 5431 HasRet = true; 5432 } 5433 5434 if (!HasRet) 5435 return false; 5436 5437 Chain = Copy->getOperand(0); 5438 return true; 5439 } 5440 5441 // Check whether the call is eligible for tail call optimization. 5442 bool LoongArchTargetLowering::isEligibleForTailCallOptimization( 5443 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF, 5444 const SmallVectorImpl<CCValAssign> &ArgLocs) const { 5445 5446 auto CalleeCC = CLI.CallConv; 5447 auto &Outs = CLI.Outs; 5448 auto &Caller = MF.getFunction(); 5449 auto CallerCC = Caller.getCallingConv(); 5450 5451 // Do not tail call opt if the stack is used to pass parameters. 5452 if (CCInfo.getStackSize() != 0) 5453 return false; 5454 5455 // Do not tail call opt if any parameters need to be passed indirectly. 5456 for (auto &VA : ArgLocs) 5457 if (VA.getLocInfo() == CCValAssign::Indirect) 5458 return false; 5459 5460 // Do not tail call opt if either caller or callee uses struct return 5461 // semantics. 5462 auto IsCallerStructRet = Caller.hasStructRetAttr(); 5463 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet(); 5464 if (IsCallerStructRet || IsCalleeStructRet) 5465 return false; 5466 5467 // Do not tail call opt if either the callee or caller has a byval argument. 5468 for (auto &Arg : Outs) 5469 if (Arg.Flags.isByVal()) 5470 return false; 5471 5472 // The callee has to preserve all registers the caller needs to preserve. 5473 const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo(); 5474 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC); 5475 if (CalleeCC != CallerCC) { 5476 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC); 5477 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved)) 5478 return false; 5479 } 5480 return true; 5481 } 5482 5483 static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG) { 5484 return DAG.getDataLayout().getPrefTypeAlign( 5485 VT.getTypeForEVT(*DAG.getContext())); 5486 } 5487 5488 // Lower a call to a callseq_start + CALL + callseq_end chain, and add input 5489 // and output parameter nodes. 5490 SDValue 5491 LoongArchTargetLowering::LowerCall(CallLoweringInfo &CLI, 5492 SmallVectorImpl<SDValue> &InVals) const { 5493 SelectionDAG &DAG = CLI.DAG; 5494 SDLoc &DL = CLI.DL; 5495 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; 5496 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; 5497 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; 5498 SDValue Chain = CLI.Chain; 5499 SDValue Callee = CLI.Callee; 5500 CallingConv::ID CallConv = CLI.CallConv; 5501 bool IsVarArg = CLI.IsVarArg; 5502 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 5503 MVT GRLenVT = Subtarget.getGRLenVT(); 5504 bool &IsTailCall = CLI.IsTailCall; 5505 5506 MachineFunction &MF = DAG.getMachineFunction(); 5507 5508 // Analyze the operands of the call, assigning locations to each operand. 5509 SmallVector<CCValAssign> ArgLocs; 5510 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); 5511 5512 if (CallConv == CallingConv::GHC) 5513 ArgCCInfo.AnalyzeCallOperands(Outs, CC_LoongArch_GHC); 5514 else 5515 analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI, CC_LoongArch); 5516 5517 // Check if it's really possible to do a tail call. 5518 if (IsTailCall) 5519 IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs); 5520 5521 if (IsTailCall) 5522 ++NumTailCalls; 5523 else if (CLI.CB && CLI.CB->isMustTailCall()) 5524 report_fatal_error("failed to perform tail call elimination on a call " 5525 "site marked musttail"); 5526 5527 // Get a count of how many bytes are to be pushed on the stack. 5528 unsigned NumBytes = ArgCCInfo.getStackSize(); 5529 5530 // Create local copies for byval args. 5531 SmallVector<SDValue> ByValArgs; 5532 for (unsigned i = 0, e = Outs.size(); i != e; ++i) { 5533 ISD::ArgFlagsTy Flags = Outs[i].Flags; 5534 if (!Flags.isByVal()) 5535 continue; 5536 5537 SDValue Arg = OutVals[i]; 5538 unsigned Size = Flags.getByValSize(); 5539 Align Alignment = Flags.getNonZeroByValAlign(); 5540 5541 int FI = 5542 MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false); 5543 SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); 5544 SDValue SizeNode = DAG.getConstant(Size, DL, GRLenVT); 5545 5546 Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment, 5547 /*IsVolatile=*/false, 5548 /*AlwaysInline=*/false, /*CI=*/nullptr, std::nullopt, 5549 MachinePointerInfo(), MachinePointerInfo()); 5550 ByValArgs.push_back(FIPtr); 5551 } 5552 5553 if (!IsTailCall) 5554 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL); 5555 5556 // Copy argument values to their designated locations. 5557 SmallVector<std::pair<Register, SDValue>> RegsToPass; 5558 SmallVector<SDValue> MemOpChains; 5559 SDValue StackPtr; 5560 for (unsigned i = 0, j = 0, e = ArgLocs.size(); i != e; ++i) { 5561 CCValAssign &VA = ArgLocs[i]; 5562 SDValue ArgValue = OutVals[i]; 5563 ISD::ArgFlagsTy Flags = Outs[i].Flags; 5564 5565 // Promote the value if needed. 5566 // For now, only handle fully promoted and indirect arguments. 5567 if (VA.getLocInfo() == CCValAssign::Indirect) { 5568 // Store the argument in a stack slot and pass its address. 5569 Align StackAlign = 5570 std::max(getPrefTypeAlign(Outs[i].ArgVT, DAG), 5571 getPrefTypeAlign(ArgValue.getValueType(), DAG)); 5572 TypeSize StoredSize = ArgValue.getValueType().getStoreSize(); 5573 // If the original argument was split and passed by reference, we need to 5574 // store the required parts of it here (and pass just one address). 5575 unsigned ArgIndex = Outs[i].OrigArgIndex; 5576 unsigned ArgPartOffset = Outs[i].PartOffset; 5577 assert(ArgPartOffset == 0); 5578 // Calculate the total size to store. We don't have access to what we're 5579 // actually storing other than performing the loop and collecting the 5580 // info. 5581 SmallVector<std::pair<SDValue, SDValue>> Parts; 5582 while (i + 1 != e && Outs[i + 1].OrigArgIndex == ArgIndex) { 5583 SDValue PartValue = OutVals[i + 1]; 5584 unsigned PartOffset = Outs[i + 1].PartOffset - ArgPartOffset; 5585 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL); 5586 EVT PartVT = PartValue.getValueType(); 5587 5588 StoredSize += PartVT.getStoreSize(); 5589 StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG)); 5590 Parts.push_back(std::make_pair(PartValue, Offset)); 5591 ++i; 5592 } 5593 SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign); 5594 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex(); 5595 MemOpChains.push_back( 5596 DAG.getStore(Chain, DL, ArgValue, SpillSlot, 5597 MachinePointerInfo::getFixedStack(MF, FI))); 5598 for (const auto &Part : Parts) { 5599 SDValue PartValue = Part.first; 5600 SDValue PartOffset = Part.second; 5601 SDValue Address = 5602 DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset); 5603 MemOpChains.push_back( 5604 DAG.getStore(Chain, DL, PartValue, Address, 5605 MachinePointerInfo::getFixedStack(MF, FI))); 5606 } 5607 ArgValue = SpillSlot; 5608 } else { 5609 ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL); 5610 } 5611 5612 // Use local copy if it is a byval arg. 5613 if (Flags.isByVal()) 5614 ArgValue = ByValArgs[j++]; 5615 5616 if (VA.isRegLoc()) { 5617 // Queue up the argument copies and emit them at the end. 5618 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue)); 5619 } else { 5620 assert(VA.isMemLoc() && "Argument not register or memory"); 5621 assert(!IsTailCall && "Tail call not allowed if stack is used " 5622 "for passing parameters"); 5623 5624 // Work out the address of the stack slot. 5625 if (!StackPtr.getNode()) 5626 StackPtr = DAG.getCopyFromReg(Chain, DL, LoongArch::R3, PtrVT); 5627 SDValue Address = 5628 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, 5629 DAG.getIntPtrConstant(VA.getLocMemOffset(), DL)); 5630 5631 // Emit the store. 5632 MemOpChains.push_back( 5633 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo())); 5634 } 5635 } 5636 5637 // Join the stores, which are independent of one another. 5638 if (!MemOpChains.empty()) 5639 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains); 5640 5641 SDValue Glue; 5642 5643 // Build a sequence of copy-to-reg nodes, chained and glued together. 5644 for (auto &Reg : RegsToPass) { 5645 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue); 5646 Glue = Chain.getValue(1); 5647 } 5648 5649 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a 5650 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't 5651 // split it and then direct call can be matched by PseudoCALL. 5652 if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) { 5653 const GlobalValue *GV = S->getGlobal(); 5654 unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(GV) 5655 ? LoongArchII::MO_CALL 5656 : LoongArchII::MO_CALL_PLT; 5657 Callee = DAG.getTargetGlobalAddress(S->getGlobal(), DL, PtrVT, 0, OpFlags); 5658 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { 5659 unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(nullptr) 5660 ? LoongArchII::MO_CALL 5661 : LoongArchII::MO_CALL_PLT; 5662 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, OpFlags); 5663 } 5664 5665 // The first call operand is the chain and the second is the target address. 5666 SmallVector<SDValue> Ops; 5667 Ops.push_back(Chain); 5668 Ops.push_back(Callee); 5669 5670 // Add argument registers to the end of the list so that they are 5671 // known live into the call. 5672 for (auto &Reg : RegsToPass) 5673 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType())); 5674 5675 if (!IsTailCall) { 5676 // Add a register mask operand representing the call-preserved registers. 5677 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); 5678 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv); 5679 assert(Mask && "Missing call preserved mask for calling convention"); 5680 Ops.push_back(DAG.getRegisterMask(Mask)); 5681 } 5682 5683 // Glue the call to the argument copies, if any. 5684 if (Glue.getNode()) 5685 Ops.push_back(Glue); 5686 5687 // Emit the call. 5688 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); 5689 unsigned Op; 5690 switch (DAG.getTarget().getCodeModel()) { 5691 default: 5692 report_fatal_error("Unsupported code model"); 5693 case CodeModel::Small: 5694 Op = IsTailCall ? LoongArchISD::TAIL : LoongArchISD::CALL; 5695 break; 5696 case CodeModel::Medium: 5697 assert(Subtarget.is64Bit() && "Medium code model requires LA64"); 5698 Op = IsTailCall ? LoongArchISD::TAIL_MEDIUM : LoongArchISD::CALL_MEDIUM; 5699 break; 5700 case CodeModel::Large: 5701 assert(Subtarget.is64Bit() && "Large code model requires LA64"); 5702 Op = IsTailCall ? LoongArchISD::TAIL_LARGE : LoongArchISD::CALL_LARGE; 5703 break; 5704 } 5705 5706 if (IsTailCall) { 5707 MF.getFrameInfo().setHasTailCall(); 5708 SDValue Ret = DAG.getNode(Op, DL, NodeTys, Ops); 5709 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge); 5710 return Ret; 5711 } 5712 5713 Chain = DAG.getNode(Op, DL, NodeTys, Ops); 5714 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge); 5715 Glue = Chain.getValue(1); 5716 5717 // Mark the end of the call, which is glued to the call itself. 5718 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL); 5719 Glue = Chain.getValue(1); 5720 5721 // Assign locations to each value returned by this call. 5722 SmallVector<CCValAssign> RVLocs; 5723 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext()); 5724 analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, CC_LoongArch); 5725 5726 // Copy all of the result registers out of their specified physreg. 5727 for (auto &VA : RVLocs) { 5728 // Copy the value out. 5729 SDValue RetValue = 5730 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue); 5731 // Glue the RetValue to the end of the call sequence. 5732 Chain = RetValue.getValue(1); 5733 Glue = RetValue.getValue(2); 5734 5735 RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL); 5736 5737 InVals.push_back(RetValue); 5738 } 5739 5740 return Chain; 5741 } 5742 5743 bool LoongArchTargetLowering::CanLowerReturn( 5744 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, 5745 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context, 5746 const Type *RetTy) const { 5747 SmallVector<CCValAssign> RVLocs; 5748 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context); 5749 5750 for (unsigned i = 0, e = Outs.size(); i != e; ++i) { 5751 LoongArchABI::ABI ABI = 5752 MF.getSubtarget<LoongArchSubtarget>().getTargetABI(); 5753 if (CC_LoongArch(MF.getDataLayout(), ABI, i, Outs[i].VT, CCValAssign::Full, 5754 Outs[i].Flags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true, 5755 nullptr)) 5756 return false; 5757 } 5758 return true; 5759 } 5760 5761 SDValue LoongArchTargetLowering::LowerReturn( 5762 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, 5763 const SmallVectorImpl<ISD::OutputArg> &Outs, 5764 const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL, 5765 SelectionDAG &DAG) const { 5766 // Stores the assignment of the return value to a location. 5767 SmallVector<CCValAssign> RVLocs; 5768 5769 // Info about the registers and stack slot. 5770 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs, 5771 *DAG.getContext()); 5772 5773 analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true, 5774 nullptr, CC_LoongArch); 5775 if (CallConv == CallingConv::GHC && !RVLocs.empty()) 5776 report_fatal_error("GHC functions return void only"); 5777 SDValue Glue; 5778 SmallVector<SDValue, 4> RetOps(1, Chain); 5779 5780 // Copy the result values into the output registers. 5781 for (unsigned i = 0, e = RVLocs.size(); i < e; ++i) { 5782 CCValAssign &VA = RVLocs[i]; 5783 assert(VA.isRegLoc() && "Can only return in registers!"); 5784 5785 // Handle a 'normal' return. 5786 SDValue Val = convertValVTToLocVT(DAG, OutVals[i], VA, DL); 5787 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue); 5788 5789 // Guarantee that all emitted copies are stuck together. 5790 Glue = Chain.getValue(1); 5791 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); 5792 } 5793 5794 RetOps[0] = Chain; // Update chain. 5795 5796 // Add the glue node if we have it. 5797 if (Glue.getNode()) 5798 RetOps.push_back(Glue); 5799 5800 return DAG.getNode(LoongArchISD::RET, DL, MVT::Other, RetOps); 5801 } 5802 5803 bool LoongArchTargetLowering::isFPImmVLDILegal(const APFloat &Imm, 5804 EVT VT) const { 5805 if (!Subtarget.hasExtLSX()) 5806 return false; 5807 5808 if (VT == MVT::f32) { 5809 uint64_t masked = Imm.bitcastToAPInt().getZExtValue() & 0x7e07ffff; 5810 return (masked == 0x3e000000 || masked == 0x40000000); 5811 } 5812 5813 if (VT == MVT::f64) { 5814 uint64_t masked = Imm.bitcastToAPInt().getZExtValue() & 0x7fc0ffffffffffff; 5815 return (masked == 0x3fc0000000000000 || masked == 0x4000000000000000); 5816 } 5817 5818 return false; 5819 } 5820 5821 bool LoongArchTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT, 5822 bool ForCodeSize) const { 5823 // TODO: Maybe need more checks here after vector extension is supported. 5824 if (VT == MVT::f32 && !Subtarget.hasBasicF()) 5825 return false; 5826 if (VT == MVT::f64 && !Subtarget.hasBasicD()) 5827 return false; 5828 return (Imm.isZero() || Imm.isExactlyValue(1.0) || isFPImmVLDILegal(Imm, VT)); 5829 } 5830 5831 bool LoongArchTargetLowering::isCheapToSpeculateCttz(Type *) const { 5832 return true; 5833 } 5834 5835 bool LoongArchTargetLowering::isCheapToSpeculateCtlz(Type *) const { 5836 return true; 5837 } 5838 5839 bool LoongArchTargetLowering::shouldInsertFencesForAtomic( 5840 const Instruction *I) const { 5841 if (!Subtarget.is64Bit()) 5842 return isa<LoadInst>(I) || isa<StoreInst>(I); 5843 5844 if (isa<LoadInst>(I)) 5845 return true; 5846 5847 // On LA64, atomic store operations with IntegerBitWidth of 32 and 64 do not 5848 // require fences beacuse we can use amswap_db.[w/d]. 5849 Type *Ty = I->getOperand(0)->getType(); 5850 if (isa<StoreInst>(I) && Ty->isIntegerTy()) { 5851 unsigned Size = Ty->getIntegerBitWidth(); 5852 return (Size == 8 || Size == 16); 5853 } 5854 5855 return false; 5856 } 5857 5858 EVT LoongArchTargetLowering::getSetCCResultType(const DataLayout &DL, 5859 LLVMContext &Context, 5860 EVT VT) const { 5861 if (!VT.isVector()) 5862 return getPointerTy(DL); 5863 return VT.changeVectorElementTypeToInteger(); 5864 } 5865 5866 bool LoongArchTargetLowering::hasAndNot(SDValue Y) const { 5867 // TODO: Support vectors. 5868 return Y.getValueType().isScalarInteger() && !isa<ConstantSDNode>(Y); 5869 } 5870 5871 bool LoongArchTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, 5872 const CallInst &I, 5873 MachineFunction &MF, 5874 unsigned Intrinsic) const { 5875 switch (Intrinsic) { 5876 default: 5877 return false; 5878 case Intrinsic::loongarch_masked_atomicrmw_xchg_i32: 5879 case Intrinsic::loongarch_masked_atomicrmw_add_i32: 5880 case Intrinsic::loongarch_masked_atomicrmw_sub_i32: 5881 case Intrinsic::loongarch_masked_atomicrmw_nand_i32: 5882 Info.opc = ISD::INTRINSIC_W_CHAIN; 5883 Info.memVT = MVT::i32; 5884 Info.ptrVal = I.getArgOperand(0); 5885 Info.offset = 0; 5886 Info.align = Align(4); 5887 Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore | 5888 MachineMemOperand::MOVolatile; 5889 return true; 5890 // TODO: Add more Intrinsics later. 5891 } 5892 } 5893 5894 // When -mlamcas is enabled, MinCmpXchgSizeInBits will be set to 8, 5895 // atomicrmw and/or/xor operations with operands less than 32 bits cannot be 5896 // expanded to am{and/or/xor}[_db].w through AtomicExpandPass. To prevent 5897 // regression, we need to implement it manually. 5898 void LoongArchTargetLowering::emitExpandAtomicRMW(AtomicRMWInst *AI) const { 5899 AtomicRMWInst::BinOp Op = AI->getOperation(); 5900 5901 assert((Op == AtomicRMWInst::Or || Op == AtomicRMWInst::Xor || 5902 Op == AtomicRMWInst::And) && 5903 "Unable to expand"); 5904 unsigned MinWordSize = 4; 5905 5906 IRBuilder<> Builder(AI); 5907 LLVMContext &Ctx = Builder.getContext(); 5908 const DataLayout &DL = AI->getDataLayout(); 5909 Type *ValueType = AI->getType(); 5910 Type *WordType = Type::getIntNTy(Ctx, MinWordSize * 8); 5911 5912 Value *Addr = AI->getPointerOperand(); 5913 PointerType *PtrTy = cast<PointerType>(Addr->getType()); 5914 IntegerType *IntTy = DL.getIndexType(Ctx, PtrTy->getAddressSpace()); 5915 5916 Value *AlignedAddr = Builder.CreateIntrinsic( 5917 Intrinsic::ptrmask, {PtrTy, IntTy}, 5918 {Addr, ConstantInt::get(IntTy, ~(uint64_t)(MinWordSize - 1))}, nullptr, 5919 "AlignedAddr"); 5920 5921 Value *AddrInt = Builder.CreatePtrToInt(Addr, IntTy); 5922 Value *PtrLSB = Builder.CreateAnd(AddrInt, MinWordSize - 1, "PtrLSB"); 5923 Value *ShiftAmt = Builder.CreateShl(PtrLSB, 3); 5924 ShiftAmt = Builder.CreateTrunc(ShiftAmt, WordType, "ShiftAmt"); 5925 Value *Mask = Builder.CreateShl( 5926 ConstantInt::get(WordType, 5927 (1 << (DL.getTypeStoreSize(ValueType) * 8)) - 1), 5928 ShiftAmt, "Mask"); 5929 Value *Inv_Mask = Builder.CreateNot(Mask, "Inv_Mask"); 5930 Value *ValOperand_Shifted = 5931 Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), WordType), 5932 ShiftAmt, "ValOperand_Shifted"); 5933 Value *NewOperand; 5934 if (Op == AtomicRMWInst::And) 5935 NewOperand = Builder.CreateOr(ValOperand_Shifted, Inv_Mask, "AndOperand"); 5936 else 5937 NewOperand = ValOperand_Shifted; 5938 5939 AtomicRMWInst *NewAI = 5940 Builder.CreateAtomicRMW(Op, AlignedAddr, NewOperand, Align(MinWordSize), 5941 AI->getOrdering(), AI->getSyncScopeID()); 5942 5943 Value *Shift = Builder.CreateLShr(NewAI, ShiftAmt, "shifted"); 5944 Value *Trunc = Builder.CreateTrunc(Shift, ValueType, "extracted"); 5945 Value *FinalOldResult = Builder.CreateBitCast(Trunc, ValueType); 5946 AI->replaceAllUsesWith(FinalOldResult); 5947 AI->eraseFromParent(); 5948 } 5949 5950 TargetLowering::AtomicExpansionKind 5951 LoongArchTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { 5952 // TODO: Add more AtomicRMWInst that needs to be extended. 5953 5954 // Since floating-point operation requires a non-trivial set of data 5955 // operations, use CmpXChg to expand. 5956 if (AI->isFloatingPointOperation() || 5957 AI->getOperation() == AtomicRMWInst::UIncWrap || 5958 AI->getOperation() == AtomicRMWInst::UDecWrap || 5959 AI->getOperation() == AtomicRMWInst::USubCond || 5960 AI->getOperation() == AtomicRMWInst::USubSat) 5961 return AtomicExpansionKind::CmpXChg; 5962 5963 if (Subtarget.hasLAM_BH() && Subtarget.is64Bit() && 5964 (AI->getOperation() == AtomicRMWInst::Xchg || 5965 AI->getOperation() == AtomicRMWInst::Add || 5966 AI->getOperation() == AtomicRMWInst::Sub)) { 5967 return AtomicExpansionKind::None; 5968 } 5969 5970 unsigned Size = AI->getType()->getPrimitiveSizeInBits(); 5971 if (Subtarget.hasLAMCAS()) { 5972 if (Size < 32 && (AI->getOperation() == AtomicRMWInst::And || 5973 AI->getOperation() == AtomicRMWInst::Or || 5974 AI->getOperation() == AtomicRMWInst::Xor)) 5975 return AtomicExpansionKind::Expand; 5976 if (AI->getOperation() == AtomicRMWInst::Nand || Size < 32) 5977 return AtomicExpansionKind::CmpXChg; 5978 } 5979 5980 if (Size == 8 || Size == 16) 5981 return AtomicExpansionKind::MaskedIntrinsic; 5982 return AtomicExpansionKind::None; 5983 } 5984 5985 static Intrinsic::ID 5986 getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen, 5987 AtomicRMWInst::BinOp BinOp) { 5988 if (GRLen == 64) { 5989 switch (BinOp) { 5990 default: 5991 llvm_unreachable("Unexpected AtomicRMW BinOp"); 5992 case AtomicRMWInst::Xchg: 5993 return Intrinsic::loongarch_masked_atomicrmw_xchg_i64; 5994 case AtomicRMWInst::Add: 5995 return Intrinsic::loongarch_masked_atomicrmw_add_i64; 5996 case AtomicRMWInst::Sub: 5997 return Intrinsic::loongarch_masked_atomicrmw_sub_i64; 5998 case AtomicRMWInst::Nand: 5999 return Intrinsic::loongarch_masked_atomicrmw_nand_i64; 6000 case AtomicRMWInst::UMax: 6001 return Intrinsic::loongarch_masked_atomicrmw_umax_i64; 6002 case AtomicRMWInst::UMin: 6003 return Intrinsic::loongarch_masked_atomicrmw_umin_i64; 6004 case AtomicRMWInst::Max: 6005 return Intrinsic::loongarch_masked_atomicrmw_max_i64; 6006 case AtomicRMWInst::Min: 6007 return Intrinsic::loongarch_masked_atomicrmw_min_i64; 6008 // TODO: support other AtomicRMWInst. 6009 } 6010 } 6011 6012 if (GRLen == 32) { 6013 switch (BinOp) { 6014 default: 6015 llvm_unreachable("Unexpected AtomicRMW BinOp"); 6016 case AtomicRMWInst::Xchg: 6017 return Intrinsic::loongarch_masked_atomicrmw_xchg_i32; 6018 case AtomicRMWInst::Add: 6019 return Intrinsic::loongarch_masked_atomicrmw_add_i32; 6020 case AtomicRMWInst::Sub: 6021 return Intrinsic::loongarch_masked_atomicrmw_sub_i32; 6022 case AtomicRMWInst::Nand: 6023 return Intrinsic::loongarch_masked_atomicrmw_nand_i32; 6024 // TODO: support other AtomicRMWInst. 6025 } 6026 } 6027 6028 llvm_unreachable("Unexpected GRLen\n"); 6029 } 6030 6031 TargetLowering::AtomicExpansionKind 6032 LoongArchTargetLowering::shouldExpandAtomicCmpXchgInIR( 6033 AtomicCmpXchgInst *CI) const { 6034 6035 if (Subtarget.hasLAMCAS()) 6036 return AtomicExpansionKind::None; 6037 6038 unsigned Size = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits(); 6039 if (Size == 8 || Size == 16) 6040 return AtomicExpansionKind::MaskedIntrinsic; 6041 return AtomicExpansionKind::None; 6042 } 6043 6044 Value *LoongArchTargetLowering::emitMaskedAtomicCmpXchgIntrinsic( 6045 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, 6046 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const { 6047 AtomicOrdering FailOrd = CI->getFailureOrdering(); 6048 Value *FailureOrdering = 6049 Builder.getIntN(Subtarget.getGRLen(), static_cast<uint64_t>(FailOrd)); 6050 6051 // TODO: Support cmpxchg on LA32. 6052 Intrinsic::ID CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i64; 6053 CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty()); 6054 NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty()); 6055 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty()); 6056 Type *Tys[] = {AlignedAddr->getType()}; 6057 Value *Result = Builder.CreateIntrinsic( 6058 CmpXchgIntrID, Tys, {AlignedAddr, CmpVal, NewVal, Mask, FailureOrdering}); 6059 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty()); 6060 return Result; 6061 } 6062 6063 Value *LoongArchTargetLowering::emitMaskedAtomicRMWIntrinsic( 6064 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, 6065 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const { 6066 // In the case of an atomicrmw xchg with a constant 0/-1 operand, replace 6067 // the atomic instruction with an AtomicRMWInst::And/Or with appropriate 6068 // mask, as this produces better code than the LL/SC loop emitted by 6069 // int_loongarch_masked_atomicrmw_xchg. 6070 if (AI->getOperation() == AtomicRMWInst::Xchg && 6071 isa<ConstantInt>(AI->getValOperand())) { 6072 ConstantInt *CVal = cast<ConstantInt>(AI->getValOperand()); 6073 if (CVal->isZero()) 6074 return Builder.CreateAtomicRMW(AtomicRMWInst::And, AlignedAddr, 6075 Builder.CreateNot(Mask, "Inv_Mask"), 6076 AI->getAlign(), Ord); 6077 if (CVal->isMinusOne()) 6078 return Builder.CreateAtomicRMW(AtomicRMWInst::Or, AlignedAddr, Mask, 6079 AI->getAlign(), Ord); 6080 } 6081 6082 unsigned GRLen = Subtarget.getGRLen(); 6083 Value *Ordering = 6084 Builder.getIntN(GRLen, static_cast<uint64_t>(AI->getOrdering())); 6085 Type *Tys[] = {AlignedAddr->getType()}; 6086 Function *LlwOpScwLoop = Intrinsic::getOrInsertDeclaration( 6087 AI->getModule(), 6088 getIntrinsicForMaskedAtomicRMWBinOp(GRLen, AI->getOperation()), Tys); 6089 6090 if (GRLen == 64) { 6091 Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty()); 6092 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty()); 6093 ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty()); 6094 } 6095 6096 Value *Result; 6097 6098 // Must pass the shift amount needed to sign extend the loaded value prior 6099 // to performing a signed comparison for min/max. ShiftAmt is the number of 6100 // bits to shift the value into position. Pass GRLen-ShiftAmt-ValWidth, which 6101 // is the number of bits to left+right shift the value in order to 6102 // sign-extend. 6103 if (AI->getOperation() == AtomicRMWInst::Min || 6104 AI->getOperation() == AtomicRMWInst::Max) { 6105 const DataLayout &DL = AI->getDataLayout(); 6106 unsigned ValWidth = 6107 DL.getTypeStoreSizeInBits(AI->getValOperand()->getType()); 6108 Value *SextShamt = 6109 Builder.CreateSub(Builder.getIntN(GRLen, GRLen - ValWidth), ShiftAmt); 6110 Result = Builder.CreateCall(LlwOpScwLoop, 6111 {AlignedAddr, Incr, Mask, SextShamt, Ordering}); 6112 } else { 6113 Result = 6114 Builder.CreateCall(LlwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering}); 6115 } 6116 6117 if (GRLen == 64) 6118 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty()); 6119 return Result; 6120 } 6121 6122 bool LoongArchTargetLowering::isFMAFasterThanFMulAndFAdd( 6123 const MachineFunction &MF, EVT VT) const { 6124 VT = VT.getScalarType(); 6125 6126 if (!VT.isSimple()) 6127 return false; 6128 6129 switch (VT.getSimpleVT().SimpleTy) { 6130 case MVT::f32: 6131 case MVT::f64: 6132 return true; 6133 default: 6134 break; 6135 } 6136 6137 return false; 6138 } 6139 6140 Register LoongArchTargetLowering::getExceptionPointerRegister( 6141 const Constant *PersonalityFn) const { 6142 return LoongArch::R4; 6143 } 6144 6145 Register LoongArchTargetLowering::getExceptionSelectorRegister( 6146 const Constant *PersonalityFn) const { 6147 return LoongArch::R5; 6148 } 6149 6150 //===----------------------------------------------------------------------===// 6151 // Target Optimization Hooks 6152 //===----------------------------------------------------------------------===// 6153 6154 static int getEstimateRefinementSteps(EVT VT, 6155 const LoongArchSubtarget &Subtarget) { 6156 // Feature FRECIPE instrucions relative accuracy is 2^-14. 6157 // IEEE float has 23 digits and double has 52 digits. 6158 int RefinementSteps = VT.getScalarType() == MVT::f64 ? 2 : 1; 6159 return RefinementSteps; 6160 } 6161 6162 SDValue LoongArchTargetLowering::getSqrtEstimate(SDValue Operand, 6163 SelectionDAG &DAG, int Enabled, 6164 int &RefinementSteps, 6165 bool &UseOneConstNR, 6166 bool Reciprocal) const { 6167 if (Subtarget.hasFrecipe()) { 6168 SDLoc DL(Operand); 6169 EVT VT = Operand.getValueType(); 6170 6171 if (VT == MVT::f32 || (VT == MVT::f64 && Subtarget.hasBasicD()) || 6172 (VT == MVT::v4f32 && Subtarget.hasExtLSX()) || 6173 (VT == MVT::v2f64 && Subtarget.hasExtLSX()) || 6174 (VT == MVT::v8f32 && Subtarget.hasExtLASX()) || 6175 (VT == MVT::v4f64 && Subtarget.hasExtLASX())) { 6176 6177 if (RefinementSteps == ReciprocalEstimate::Unspecified) 6178 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget); 6179 6180 SDValue Estimate = DAG.getNode(LoongArchISD::FRSQRTE, DL, VT, Operand); 6181 if (Reciprocal) 6182 Estimate = DAG.getNode(ISD::FMUL, DL, VT, Operand, Estimate); 6183 6184 return Estimate; 6185 } 6186 } 6187 6188 return SDValue(); 6189 } 6190 6191 SDValue LoongArchTargetLowering::getRecipEstimate(SDValue Operand, 6192 SelectionDAG &DAG, 6193 int Enabled, 6194 int &RefinementSteps) const { 6195 if (Subtarget.hasFrecipe()) { 6196 SDLoc DL(Operand); 6197 EVT VT = Operand.getValueType(); 6198 6199 if (VT == MVT::f32 || (VT == MVT::f64 && Subtarget.hasBasicD()) || 6200 (VT == MVT::v4f32 && Subtarget.hasExtLSX()) || 6201 (VT == MVT::v2f64 && Subtarget.hasExtLSX()) || 6202 (VT == MVT::v8f32 && Subtarget.hasExtLASX()) || 6203 (VT == MVT::v4f64 && Subtarget.hasExtLASX())) { 6204 6205 if (RefinementSteps == ReciprocalEstimate::Unspecified) 6206 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget); 6207 6208 return DAG.getNode(LoongArchISD::FRECIPE, DL, VT, Operand); 6209 } 6210 } 6211 6212 return SDValue(); 6213 } 6214 6215 //===----------------------------------------------------------------------===// 6216 // LoongArch Inline Assembly Support 6217 //===----------------------------------------------------------------------===// 6218 6219 LoongArchTargetLowering::ConstraintType 6220 LoongArchTargetLowering::getConstraintType(StringRef Constraint) const { 6221 // LoongArch specific constraints in GCC: config/loongarch/constraints.md 6222 // 6223 // 'f': A floating-point register (if available). 6224 // 'k': A memory operand whose address is formed by a base register and 6225 // (optionally scaled) index register. 6226 // 'l': A signed 16-bit constant. 6227 // 'm': A memory operand whose address is formed by a base register and 6228 // offset that is suitable for use in instructions with the same 6229 // addressing mode as st.w and ld.w. 6230 // 'I': A signed 12-bit constant (for arithmetic instructions). 6231 // 'J': Integer zero. 6232 // 'K': An unsigned 12-bit constant (for logic instructions). 6233 // "ZB": An address that is held in a general-purpose register. The offset is 6234 // zero. 6235 // "ZC": A memory operand whose address is formed by a base register and 6236 // offset that is suitable for use in instructions with the same 6237 // addressing mode as ll.w and sc.w. 6238 if (Constraint.size() == 1) { 6239 switch (Constraint[0]) { 6240 default: 6241 break; 6242 case 'f': 6243 return C_RegisterClass; 6244 case 'l': 6245 case 'I': 6246 case 'J': 6247 case 'K': 6248 return C_Immediate; 6249 case 'k': 6250 return C_Memory; 6251 } 6252 } 6253 6254 if (Constraint == "ZC" || Constraint == "ZB") 6255 return C_Memory; 6256 6257 // 'm' is handled here. 6258 return TargetLowering::getConstraintType(Constraint); 6259 } 6260 6261 InlineAsm::ConstraintCode LoongArchTargetLowering::getInlineAsmMemConstraint( 6262 StringRef ConstraintCode) const { 6263 return StringSwitch<InlineAsm::ConstraintCode>(ConstraintCode) 6264 .Case("k", InlineAsm::ConstraintCode::k) 6265 .Case("ZB", InlineAsm::ConstraintCode::ZB) 6266 .Case("ZC", InlineAsm::ConstraintCode::ZC) 6267 .Default(TargetLowering::getInlineAsmMemConstraint(ConstraintCode)); 6268 } 6269 6270 std::pair<unsigned, const TargetRegisterClass *> 6271 LoongArchTargetLowering::getRegForInlineAsmConstraint( 6272 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const { 6273 // First, see if this is a constraint that directly corresponds to a LoongArch 6274 // register class. 6275 if (Constraint.size() == 1) { 6276 switch (Constraint[0]) { 6277 case 'r': 6278 // TODO: Support fixed vectors up to GRLen? 6279 if (VT.isVector()) 6280 break; 6281 return std::make_pair(0U, &LoongArch::GPRRegClass); 6282 case 'f': 6283 if (Subtarget.hasBasicF() && VT == MVT::f32) 6284 return std::make_pair(0U, &LoongArch::FPR32RegClass); 6285 if (Subtarget.hasBasicD() && VT == MVT::f64) 6286 return std::make_pair(0U, &LoongArch::FPR64RegClass); 6287 if (Subtarget.hasExtLSX() && 6288 TRI->isTypeLegalForClass(LoongArch::LSX128RegClass, VT)) 6289 return std::make_pair(0U, &LoongArch::LSX128RegClass); 6290 if (Subtarget.hasExtLASX() && 6291 TRI->isTypeLegalForClass(LoongArch::LASX256RegClass, VT)) 6292 return std::make_pair(0U, &LoongArch::LASX256RegClass); 6293 break; 6294 default: 6295 break; 6296 } 6297 } 6298 6299 // TargetLowering::getRegForInlineAsmConstraint uses the name of the TableGen 6300 // record (e.g. the "R0" in `def R0`) to choose registers for InlineAsm 6301 // constraints while the official register name is prefixed with a '$'. So we 6302 // clip the '$' from the original constraint string (e.g. {$r0} to {r0}.) 6303 // before it being parsed. And TargetLowering::getRegForInlineAsmConstraint is 6304 // case insensitive, so no need to convert the constraint to upper case here. 6305 // 6306 // For now, no need to support ABI names (e.g. `$a0`) as clang will correctly 6307 // decode the usage of register name aliases into their official names. And 6308 // AFAIK, the not yet upstreamed `rustc` for LoongArch will always use 6309 // official register names. 6310 if (Constraint.starts_with("{$r") || Constraint.starts_with("{$f") || 6311 Constraint.starts_with("{$vr") || Constraint.starts_with("{$xr")) { 6312 bool IsFP = Constraint[2] == 'f'; 6313 std::pair<StringRef, StringRef> Temp = Constraint.split('$'); 6314 std::pair<unsigned, const TargetRegisterClass *> R; 6315 R = TargetLowering::getRegForInlineAsmConstraint( 6316 TRI, join_items("", Temp.first, Temp.second), VT); 6317 // Match those names to the widest floating point register type available. 6318 if (IsFP) { 6319 unsigned RegNo = R.first; 6320 if (LoongArch::F0 <= RegNo && RegNo <= LoongArch::F31) { 6321 if (Subtarget.hasBasicD() && (VT == MVT::f64 || VT == MVT::Other)) { 6322 unsigned DReg = RegNo - LoongArch::F0 + LoongArch::F0_64; 6323 return std::make_pair(DReg, &LoongArch::FPR64RegClass); 6324 } 6325 } 6326 } 6327 return R; 6328 } 6329 6330 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); 6331 } 6332 6333 void LoongArchTargetLowering::LowerAsmOperandForConstraint( 6334 SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops, 6335 SelectionDAG &DAG) const { 6336 // Currently only support length 1 constraints. 6337 if (Constraint.size() == 1) { 6338 switch (Constraint[0]) { 6339 case 'l': 6340 // Validate & create a 16-bit signed immediate operand. 6341 if (auto *C = dyn_cast<ConstantSDNode>(Op)) { 6342 uint64_t CVal = C->getSExtValue(); 6343 if (isInt<16>(CVal)) 6344 Ops.push_back(DAG.getSignedTargetConstant(CVal, SDLoc(Op), 6345 Subtarget.getGRLenVT())); 6346 } 6347 return; 6348 case 'I': 6349 // Validate & create a 12-bit signed immediate operand. 6350 if (auto *C = dyn_cast<ConstantSDNode>(Op)) { 6351 uint64_t CVal = C->getSExtValue(); 6352 if (isInt<12>(CVal)) 6353 Ops.push_back(DAG.getSignedTargetConstant(CVal, SDLoc(Op), 6354 Subtarget.getGRLenVT())); 6355 } 6356 return; 6357 case 'J': 6358 // Validate & create an integer zero operand. 6359 if (auto *C = dyn_cast<ConstantSDNode>(Op)) 6360 if (C->getZExtValue() == 0) 6361 Ops.push_back( 6362 DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getGRLenVT())); 6363 return; 6364 case 'K': 6365 // Validate & create a 12-bit unsigned immediate operand. 6366 if (auto *C = dyn_cast<ConstantSDNode>(Op)) { 6367 uint64_t CVal = C->getZExtValue(); 6368 if (isUInt<12>(CVal)) 6369 Ops.push_back( 6370 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getGRLenVT())); 6371 } 6372 return; 6373 default: 6374 break; 6375 } 6376 } 6377 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); 6378 } 6379 6380 #define GET_REGISTER_MATCHER 6381 #include "LoongArchGenAsmMatcher.inc" 6382 6383 Register 6384 LoongArchTargetLowering::getRegisterByName(const char *RegName, LLT VT, 6385 const MachineFunction &MF) const { 6386 std::pair<StringRef, StringRef> Name = StringRef(RegName).split('$'); 6387 std::string NewRegName = Name.second.str(); 6388 Register Reg = MatchRegisterAltName(NewRegName); 6389 if (Reg == LoongArch::NoRegister) 6390 Reg = MatchRegisterName(NewRegName); 6391 if (Reg == LoongArch::NoRegister) 6392 report_fatal_error( 6393 Twine("Invalid register name \"" + StringRef(RegName) + "\".")); 6394 BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF); 6395 if (!ReservedRegs.test(Reg)) 6396 report_fatal_error(Twine("Trying to obtain non-reserved register \"" + 6397 StringRef(RegName) + "\".")); 6398 return Reg; 6399 } 6400 6401 bool LoongArchTargetLowering::decomposeMulByConstant(LLVMContext &Context, 6402 EVT VT, SDValue C) const { 6403 // TODO: Support vectors. 6404 if (!VT.isScalarInteger()) 6405 return false; 6406 6407 // Omit the optimization if the data size exceeds GRLen. 6408 if (VT.getSizeInBits() > Subtarget.getGRLen()) 6409 return false; 6410 6411 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) { 6412 const APInt &Imm = ConstNode->getAPIntValue(); 6413 // Break MUL into (SLLI + ADD/SUB) or ALSL. 6414 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() || 6415 (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2()) 6416 return true; 6417 // Break MUL into (ALSL x, (SLLI x, imm0), imm1). 6418 if (ConstNode->hasOneUse() && 6419 ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() || 6420 (Imm - 8).isPowerOf2() || (Imm - 16).isPowerOf2())) 6421 return true; 6422 // Break (MUL x, imm) into (ADD (SLLI x, s0), (SLLI x, s1)), 6423 // in which the immediate has two set bits. Or Break (MUL x, imm) 6424 // into (SUB (SLLI x, s0), (SLLI x, s1)), in which the immediate 6425 // equals to (1 << s0) - (1 << s1). 6426 if (ConstNode->hasOneUse() && !(Imm.sge(-2048) && Imm.sle(4095))) { 6427 unsigned Shifts = Imm.countr_zero(); 6428 // Reject immediates which can be composed via a single LUI. 6429 if (Shifts >= 12) 6430 return false; 6431 // Reject multiplications can be optimized to 6432 // (SLLI (ALSL x, x, 1/2/3/4), s). 6433 APInt ImmPop = Imm.ashr(Shifts); 6434 if (ImmPop == 3 || ImmPop == 5 || ImmPop == 9 || ImmPop == 17) 6435 return false; 6436 // We do not consider the case `(-Imm - ImmSmall).isPowerOf2()`, 6437 // since it needs one more instruction than other 3 cases. 6438 APInt ImmSmall = APInt(Imm.getBitWidth(), 1ULL << Shifts, true); 6439 if ((Imm - ImmSmall).isPowerOf2() || (Imm + ImmSmall).isPowerOf2() || 6440 (ImmSmall - Imm).isPowerOf2()) 6441 return true; 6442 } 6443 } 6444 6445 return false; 6446 } 6447 6448 bool LoongArchTargetLowering::isLegalAddressingMode(const DataLayout &DL, 6449 const AddrMode &AM, 6450 Type *Ty, unsigned AS, 6451 Instruction *I) const { 6452 // LoongArch has four basic addressing modes: 6453 // 1. reg 6454 // 2. reg + 12-bit signed offset 6455 // 3. reg + 14-bit signed offset left-shifted by 2 6456 // 4. reg1 + reg2 6457 // TODO: Add more checks after support vector extension. 6458 6459 // No global is ever allowed as a base. 6460 if (AM.BaseGV) 6461 return false; 6462 6463 // Require a 12-bit signed offset or 14-bit signed offset left-shifted by 2 6464 // with `UAL` feature. 6465 if (!isInt<12>(AM.BaseOffs) && 6466 !(isShiftedInt<14, 2>(AM.BaseOffs) && Subtarget.hasUAL())) 6467 return false; 6468 6469 switch (AM.Scale) { 6470 case 0: 6471 // "r+i" or just "i", depending on HasBaseReg. 6472 break; 6473 case 1: 6474 // "r+r+i" is not allowed. 6475 if (AM.HasBaseReg && AM.BaseOffs) 6476 return false; 6477 // Otherwise we have "r+r" or "r+i". 6478 break; 6479 case 2: 6480 // "2*r+r" or "2*r+i" is not allowed. 6481 if (AM.HasBaseReg || AM.BaseOffs) 6482 return false; 6483 // Allow "2*r" as "r+r". 6484 break; 6485 default: 6486 return false; 6487 } 6488 6489 return true; 6490 } 6491 6492 bool LoongArchTargetLowering::isLegalICmpImmediate(int64_t Imm) const { 6493 return isInt<12>(Imm); 6494 } 6495 6496 bool LoongArchTargetLowering::isLegalAddImmediate(int64_t Imm) const { 6497 return isInt<12>(Imm); 6498 } 6499 6500 bool LoongArchTargetLowering::isZExtFree(SDValue Val, EVT VT2) const { 6501 // Zexts are free if they can be combined with a load. 6502 // Don't advertise i32->i64 zextload as being free for LA64. It interacts 6503 // poorly with type legalization of compares preferring sext. 6504 if (auto *LD = dyn_cast<LoadSDNode>(Val)) { 6505 EVT MemVT = LD->getMemoryVT(); 6506 if ((MemVT == MVT::i8 || MemVT == MVT::i16) && 6507 (LD->getExtensionType() == ISD::NON_EXTLOAD || 6508 LD->getExtensionType() == ISD::ZEXTLOAD)) 6509 return true; 6510 } 6511 6512 return TargetLowering::isZExtFree(Val, VT2); 6513 } 6514 6515 bool LoongArchTargetLowering::isSExtCheaperThanZExt(EVT SrcVT, 6516 EVT DstVT) const { 6517 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64; 6518 } 6519 6520 bool LoongArchTargetLowering::signExtendConstant(const ConstantInt *CI) const { 6521 return Subtarget.is64Bit() && CI->getType()->isIntegerTy(32); 6522 } 6523 6524 bool LoongArchTargetLowering::hasAndNotCompare(SDValue Y) const { 6525 // TODO: Support vectors. 6526 if (Y.getValueType().isVector()) 6527 return false; 6528 6529 return !isa<ConstantSDNode>(Y); 6530 } 6531 6532 ISD::NodeType LoongArchTargetLowering::getExtendForAtomicCmpSwapArg() const { 6533 // LAMCAS will use amcas[_DB].{b/h/w/d} which does not require extension. 6534 return Subtarget.hasLAMCAS() ? ISD::ANY_EXTEND : ISD::SIGN_EXTEND; 6535 } 6536 6537 bool LoongArchTargetLowering::shouldSignExtendTypeInLibCall( 6538 Type *Ty, bool IsSigned) const { 6539 if (Subtarget.is64Bit() && Ty->isIntegerTy(32)) 6540 return true; 6541 6542 return IsSigned; 6543 } 6544 6545 bool LoongArchTargetLowering::shouldExtendTypeInLibCall(EVT Type) const { 6546 // Return false to suppress the unnecessary extensions if the LibCall 6547 // arguments or return value is a float narrower than GRLEN on a soft FP ABI. 6548 if (Subtarget.isSoftFPABI() && (Type.isFloatingPoint() && !Type.isVector() && 6549 Type.getSizeInBits() < Subtarget.getGRLen())) 6550 return false; 6551 return true; 6552 } 6553 6554 // memcpy, and other memory intrinsics, typically tries to use wider load/store 6555 // if the source/dest is aligned and the copy size is large enough. We therefore 6556 // want to align such objects passed to memory intrinsics. 6557 bool LoongArchTargetLowering::shouldAlignPointerArgs(CallInst *CI, 6558 unsigned &MinSize, 6559 Align &PrefAlign) const { 6560 if (!isa<MemIntrinsic>(CI)) 6561 return false; 6562 6563 if (Subtarget.is64Bit()) { 6564 MinSize = 8; 6565 PrefAlign = Align(8); 6566 } else { 6567 MinSize = 4; 6568 PrefAlign = Align(4); 6569 } 6570 6571 return true; 6572 } 6573