15ffd83dbSDimitry Andric //===- AArch64LegalizerInfo.cpp ----------------------------------*- C++ -*-==// 25ffd83dbSDimitry Andric // 35ffd83dbSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 45ffd83dbSDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 55ffd83dbSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 65ffd83dbSDimitry Andric // 75ffd83dbSDimitry Andric //===----------------------------------------------------------------------===// 85ffd83dbSDimitry Andric /// \file 95ffd83dbSDimitry Andric /// This file implements the targeting of the Machinelegalizer class for 105ffd83dbSDimitry Andric /// AArch64. 115ffd83dbSDimitry Andric /// \todo This should be generated by TableGen. 125ffd83dbSDimitry Andric //===----------------------------------------------------------------------===// 135ffd83dbSDimitry Andric 145ffd83dbSDimitry Andric #include "AArch64LegalizerInfo.h" 15fe6060f1SDimitry Andric #include "AArch64RegisterBankInfo.h" 165ffd83dbSDimitry Andric #include "AArch64Subtarget.h" 175f757f3fSDimitry Andric #include "llvm/ADT/STLExtras.h" 185f757f3fSDimitry Andric #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" 195ffd83dbSDimitry Andric #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h" 20e8d8bef9SDimitry Andric #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" 21349cc55cSDimitry Andric #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" 225ffd83dbSDimitry Andric #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" 235ffd83dbSDimitry Andric #include "llvm/CodeGen/GlobalISel/Utils.h" 245ffd83dbSDimitry Andric #include "llvm/CodeGen/MachineInstr.h" 255ffd83dbSDimitry Andric #include "llvm/CodeGen/MachineRegisterInfo.h" 265ffd83dbSDimitry Andric #include "llvm/CodeGen/TargetOpcodes.h" 275ffd83dbSDimitry Andric #include "llvm/CodeGen/ValueTypes.h" 285ffd83dbSDimitry Andric #include "llvm/IR/DerivedTypes.h" 29349cc55cSDimitry Andric #include "llvm/IR/Intrinsics.h" 30fe6060f1SDimitry Andric #include "llvm/IR/IntrinsicsAArch64.h" 315ffd83dbSDimitry Andric #include "llvm/IR/Type.h" 32e8d8bef9SDimitry Andric #include "llvm/Support/MathExtras.h" 33fe6060f1SDimitry Andric #include <initializer_list> 345ffd83dbSDimitry Andric 355ffd83dbSDimitry Andric #define DEBUG_TYPE "aarch64-legalinfo" 365ffd83dbSDimitry Andric 375ffd83dbSDimitry Andric using namespace llvm; 385ffd83dbSDimitry Andric using namespace LegalizeActions; 395ffd83dbSDimitry Andric using namespace LegalizeMutations; 405ffd83dbSDimitry Andric using namespace LegalityPredicates; 41349cc55cSDimitry Andric using namespace MIPatternMatch; 425ffd83dbSDimitry Andric 435ffd83dbSDimitry Andric AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) 445ffd83dbSDimitry Andric : ST(&ST) { 455ffd83dbSDimitry Andric using namespace TargetOpcode; 465ffd83dbSDimitry Andric const LLT p0 = LLT::pointer(0, 64); 475ffd83dbSDimitry Andric const LLT s8 = LLT::scalar(8); 485ffd83dbSDimitry Andric const LLT s16 = LLT::scalar(16); 495ffd83dbSDimitry Andric const LLT s32 = LLT::scalar(32); 505ffd83dbSDimitry Andric const LLT s64 = LLT::scalar(64); 515ffd83dbSDimitry Andric const LLT s128 = LLT::scalar(128); 52fe6060f1SDimitry Andric const LLT v16s8 = LLT::fixed_vector(16, 8); 53fe6060f1SDimitry Andric const LLT v8s8 = LLT::fixed_vector(8, 8); 54fe6060f1SDimitry Andric const LLT v4s8 = LLT::fixed_vector(4, 8); 55*0fca6ea1SDimitry Andric const LLT v2s8 = LLT::fixed_vector(2, 8); 56fe6060f1SDimitry Andric const LLT v8s16 = LLT::fixed_vector(8, 16); 57fe6060f1SDimitry Andric const LLT v4s16 = LLT::fixed_vector(4, 16); 58fe6060f1SDimitry Andric const LLT v2s16 = LLT::fixed_vector(2, 16); 59fe6060f1SDimitry Andric const LLT v2s32 = LLT::fixed_vector(2, 32); 60fe6060f1SDimitry Andric const LLT v4s32 = LLT::fixed_vector(4, 32); 61fe6060f1SDimitry Andric const LLT v2s64 = LLT::fixed_vector(2, 64); 62fe6060f1SDimitry Andric const LLT v2p0 = LLT::fixed_vector(2, p0); 635ffd83dbSDimitry Andric 64*0fca6ea1SDimitry Andric const LLT nxv16s8 = LLT::scalable_vector(16, s8); 65*0fca6ea1SDimitry Andric const LLT nxv8s16 = LLT::scalable_vector(8, s16); 66*0fca6ea1SDimitry Andric const LLT nxv4s32 = LLT::scalable_vector(4, s32); 67*0fca6ea1SDimitry Andric const LLT nxv2s64 = LLT::scalable_vector(2, s64); 68*0fca6ea1SDimitry Andric 69e8d8bef9SDimitry Andric std::initializer_list<LLT> PackedVectorAllTypeList = {/* Begin 128bit types */ 70e8d8bef9SDimitry Andric v16s8, v8s16, v4s32, 71e8d8bef9SDimitry Andric v2s64, v2p0, 72e8d8bef9SDimitry Andric /* End 128bit types */ 73e8d8bef9SDimitry Andric /* Begin 64bit types */ 74e8d8bef9SDimitry Andric v8s8, v4s16, v2s32}; 755f757f3fSDimitry Andric std::initializer_list<LLT> ScalarAndPtrTypesList = {s8, s16, s32, s64, p0}; 765f757f3fSDimitry Andric SmallVector<LLT, 8> PackedVectorAllTypesVec(PackedVectorAllTypeList); 775f757f3fSDimitry Andric SmallVector<LLT, 8> ScalarAndPtrTypesVec(ScalarAndPtrTypesList); 78e8d8bef9SDimitry Andric 795ffd83dbSDimitry Andric const TargetMachine &TM = ST.getTargetLowering()->getTargetMachine(); 805ffd83dbSDimitry Andric 815ffd83dbSDimitry Andric // FIXME: support subtargets which have neon/fp-armv8 disabled. 825ffd83dbSDimitry Andric if (!ST.hasNEON() || !ST.hasFPARMv8()) { 83fe6060f1SDimitry Andric getLegacyLegalizerInfo().computeTables(); 845ffd83dbSDimitry Andric return; 855ffd83dbSDimitry Andric } 865ffd83dbSDimitry Andric 87e8d8bef9SDimitry Andric // Some instructions only support s16 if the subtarget has full 16-bit FP 88e8d8bef9SDimitry Andric // support. 89e8d8bef9SDimitry Andric const bool HasFP16 = ST.hasFullFP16(); 90e8d8bef9SDimitry Andric const LLT &MinFPScalar = HasFP16 ? s16 : s32; 91e8d8bef9SDimitry Andric 92bdd1243dSDimitry Andric const bool HasCSSC = ST.hasCSSC(); 9306c3fb27SDimitry Andric const bool HasRCPC3 = ST.hasRCPC3(); 94bdd1243dSDimitry Andric 9506c3fb27SDimitry Andric getActionDefinitionsBuilder( 9606c3fb27SDimitry Andric {G_IMPLICIT_DEF, G_FREEZE, G_CONSTANT_FOLD_BARRIER}) 97753f127fSDimitry Andric .legalFor({p0, s8, s16, s32, s64}) 98e8d8bef9SDimitry Andric .legalFor(PackedVectorAllTypeList) 99349cc55cSDimitry Andric .widenScalarToNextPow2(0) 100fe6060f1SDimitry Andric .clampScalar(0, s8, s64) 101*0fca6ea1SDimitry Andric .moreElementsToNextPow2(0) 102*0fca6ea1SDimitry Andric .widenVectorEltsToVectorMinSize(0, 64) 103*0fca6ea1SDimitry Andric .clampNumElements(0, v8s8, v16s8) 104*0fca6ea1SDimitry Andric .clampNumElements(0, v4s16, v8s16) 105*0fca6ea1SDimitry Andric .clampNumElements(0, v2s32, v4s32) 106*0fca6ea1SDimitry Andric .clampNumElements(0, v2s64, v2s64); 1075ffd83dbSDimitry Andric 108349cc55cSDimitry Andric getActionDefinitionsBuilder(G_PHI) 109349cc55cSDimitry Andric .legalFor({p0, s16, s32, s64}) 110e8d8bef9SDimitry Andric .legalFor(PackedVectorAllTypeList) 111349cc55cSDimitry Andric .widenScalarToNextPow2(0) 1125ffd83dbSDimitry Andric .clampScalar(0, s16, s64) 113349cc55cSDimitry Andric // Maximum: sN * k = 128 114349cc55cSDimitry Andric .clampMaxNumElements(0, s8, 16) 115349cc55cSDimitry Andric .clampMaxNumElements(0, s16, 8) 116349cc55cSDimitry Andric .clampMaxNumElements(0, s32, 4) 117349cc55cSDimitry Andric .clampMaxNumElements(0, s64, 2) 118349cc55cSDimitry Andric .clampMaxNumElements(0, p0, 2); 1195ffd83dbSDimitry Andric 1205ffd83dbSDimitry Andric getActionDefinitionsBuilder(G_BSWAP) 121*0fca6ea1SDimitry Andric .legalFor({s32, s64, v4s16, v8s16, v2s32, v4s32, v2s64}) 122*0fca6ea1SDimitry Andric .widenScalarOrEltToNextPow2(0, 16) 123*0fca6ea1SDimitry Andric .clampScalar(0, s32, s64) 124*0fca6ea1SDimitry Andric .clampNumElements(0, v4s16, v8s16) 125*0fca6ea1SDimitry Andric .clampNumElements(0, v2s32, v4s32) 126*0fca6ea1SDimitry Andric .clampNumElements(0, v2s64, v2s64) 127*0fca6ea1SDimitry Andric .moreElementsToNextPow2(0); 1285ffd83dbSDimitry Andric 1295ffd83dbSDimitry Andric getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR}) 1305f757f3fSDimitry Andric .legalFor({s32, s64, v2s32, v2s64, v4s32, v4s16, v8s16, v16s8, v8s8}) 1315ffd83dbSDimitry Andric .widenScalarToNextPow2(0) 132349cc55cSDimitry Andric .clampScalar(0, s32, s64) 13306c3fb27SDimitry Andric .clampMaxNumElements(0, s8, 16) 13406c3fb27SDimitry Andric .clampMaxNumElements(0, s16, 8) 1355ffd83dbSDimitry Andric .clampNumElements(0, v2s32, v4s32) 1365ffd83dbSDimitry Andric .clampNumElements(0, v2s64, v2s64) 13706c3fb27SDimitry Andric .minScalarOrEltIf( 13806c3fb27SDimitry Andric [=](const LegalityQuery &Query) { 13906c3fb27SDimitry Andric return Query.Types[0].getNumElements() <= 2; 14006c3fb27SDimitry Andric }, 14106c3fb27SDimitry Andric 0, s32) 14206c3fb27SDimitry Andric .minScalarOrEltIf( 14306c3fb27SDimitry Andric [=](const LegalityQuery &Query) { 14406c3fb27SDimitry Andric return Query.Types[0].getNumElements() <= 4; 14506c3fb27SDimitry Andric }, 14606c3fb27SDimitry Andric 0, s16) 14706c3fb27SDimitry Andric .minScalarOrEltIf( 14806c3fb27SDimitry Andric [=](const LegalityQuery &Query) { 14906c3fb27SDimitry Andric return Query.Types[0].getNumElements() <= 16; 15006c3fb27SDimitry Andric }, 15106c3fb27SDimitry Andric 0, s8) 1525ffd83dbSDimitry Andric .moreElementsToNextPow2(0); 1535ffd83dbSDimitry Andric 154e8d8bef9SDimitry Andric getActionDefinitionsBuilder({G_SHL, G_ASHR, G_LSHR}) 155eaeb601bSDimitry Andric .customIf([=](const LegalityQuery &Query) { 156eaeb601bSDimitry Andric const auto &SrcTy = Query.Types[0]; 157eaeb601bSDimitry Andric const auto &AmtTy = Query.Types[1]; 158eaeb601bSDimitry Andric return !SrcTy.isVector() && SrcTy.getSizeInBits() == 32 && 159eaeb601bSDimitry Andric AmtTy.getSizeInBits() == 32; 160eaeb601bSDimitry Andric }) 161e8d8bef9SDimitry Andric .legalFor({ 162e8d8bef9SDimitry Andric {s32, s32}, 163eaeb601bSDimitry Andric {s32, s64}, 164e8d8bef9SDimitry Andric {s64, s64}, 165e8d8bef9SDimitry Andric {v8s8, v8s8}, 166e8d8bef9SDimitry Andric {v16s8, v16s8}, 167e8d8bef9SDimitry Andric {v4s16, v4s16}, 168e8d8bef9SDimitry Andric {v8s16, v8s16}, 169eaeb601bSDimitry Andric {v2s32, v2s32}, 170eaeb601bSDimitry Andric {v4s32, v4s32}, 171e8d8bef9SDimitry Andric {v2s64, v2s64}, 172e8d8bef9SDimitry Andric }) 173fe6060f1SDimitry Andric .widenScalarToNextPow2(0) 1745ffd83dbSDimitry Andric .clampScalar(1, s32, s64) 1755ffd83dbSDimitry Andric .clampScalar(0, s32, s64) 1767a6dacacSDimitry Andric .clampNumElements(0, v8s8, v16s8) 1777a6dacacSDimitry Andric .clampNumElements(0, v4s16, v8s16) 1785ffd83dbSDimitry Andric .clampNumElements(0, v2s32, v4s32) 1795ffd83dbSDimitry Andric .clampNumElements(0, v2s64, v2s64) 1805ffd83dbSDimitry Andric .moreElementsToNextPow2(0) 1815ffd83dbSDimitry Andric .minScalarSameAs(1, 0); 1825ffd83dbSDimitry Andric 1835ffd83dbSDimitry Andric getActionDefinitionsBuilder(G_PTR_ADD) 1845ffd83dbSDimitry Andric .legalFor({{p0, s64}, {v2p0, v2s64}}) 185*0fca6ea1SDimitry Andric .clampScalarOrElt(1, s64, s64) 186*0fca6ea1SDimitry Andric .clampNumElements(0, v2p0, v2p0); 1875ffd83dbSDimitry Andric 1885ffd83dbSDimitry Andric getActionDefinitionsBuilder(G_PTRMASK).legalFor({{p0, s64}}); 1895ffd83dbSDimitry Andric 1905ffd83dbSDimitry Andric getActionDefinitionsBuilder({G_SDIV, G_UDIV}) 1915ffd83dbSDimitry Andric .legalFor({s32, s64}) 1925ffd83dbSDimitry Andric .libcallFor({s128}) 1935ffd83dbSDimitry Andric .clampScalar(0, s32, s64) 1945ffd83dbSDimitry Andric .widenScalarToNextPow2(0) 1955ffd83dbSDimitry Andric .scalarize(0); 1965ffd83dbSDimitry Andric 197fe6060f1SDimitry Andric getActionDefinitionsBuilder({G_SREM, G_UREM, G_SDIVREM, G_UDIVREM}) 19881ad6265SDimitry Andric .lowerFor({s8, s16, s32, s64, v2s64, v4s32, v2s32}) 199349cc55cSDimitry Andric .widenScalarOrEltToNextPow2(0) 200349cc55cSDimitry Andric .clampScalarOrElt(0, s32, s64) 201349cc55cSDimitry Andric .clampNumElements(0, v2s32, v4s32) 202349cc55cSDimitry Andric .clampNumElements(0, v2s64, v2s64) 203349cc55cSDimitry Andric .moreElementsToNextPow2(0); 2045ffd83dbSDimitry Andric 2055ffd83dbSDimitry Andric 206349cc55cSDimitry Andric getActionDefinitionsBuilder({G_SMULO, G_UMULO}) 207349cc55cSDimitry Andric .widenScalarToNextPow2(0, /*Min = */ 32) 208349cc55cSDimitry Andric .clampScalar(0, s32, s64) 20981ad6265SDimitry Andric .lower(); 210349cc55cSDimitry Andric 211349cc55cSDimitry Andric getActionDefinitionsBuilder({G_SMULH, G_UMULH}) 212349cc55cSDimitry Andric .legalFor({s64, v8s16, v16s8, v4s32}) 213349cc55cSDimitry Andric .lower(); 2145ffd83dbSDimitry Andric 215bdd1243dSDimitry Andric auto &MinMaxActions = getActionDefinitionsBuilder( 216bdd1243dSDimitry Andric {G_SMIN, G_SMAX, G_UMIN, G_UMAX}); 217bdd1243dSDimitry Andric if (HasCSSC) 218bdd1243dSDimitry Andric MinMaxActions 219bdd1243dSDimitry Andric .legalFor({s32, s64, v8s8, v16s8, v4s16, v8s16, v2s32, v4s32}) 220bdd1243dSDimitry Andric // Making clamping conditional on CSSC extension as without legal types we 221bdd1243dSDimitry Andric // lower to CMP which can fold one of the two sxtb's we'd otherwise need 222bdd1243dSDimitry Andric // if we detect a type smaller than 32-bit. 223bdd1243dSDimitry Andric .minScalar(0, s32); 224bdd1243dSDimitry Andric else 225bdd1243dSDimitry Andric MinMaxActions 226bdd1243dSDimitry Andric .legalFor({v8s8, v16s8, v4s16, v8s16, v2s32, v4s32}); 227bdd1243dSDimitry Andric MinMaxActions 228fe6060f1SDimitry Andric .clampNumElements(0, v8s8, v16s8) 229fe6060f1SDimitry Andric .clampNumElements(0, v4s16, v8s16) 230fe6060f1SDimitry Andric .clampNumElements(0, v2s32, v4s32) 231fe6060f1SDimitry Andric // FIXME: This sholdn't be needed as v2s64 types are going to 232fe6060f1SDimitry Andric // be expanded anyway, but G_ICMP doesn't support splitting vectors yet 233fe6060f1SDimitry Andric .clampNumElements(0, v2s64, v2s64) 234fe6060f1SDimitry Andric .lower(); 235fe6060f1SDimitry Andric 236e8d8bef9SDimitry Andric getActionDefinitionsBuilder( 237fe6060f1SDimitry Andric {G_SADDE, G_SSUBE, G_UADDE, G_USUBE, G_SADDO, G_SSUBO, G_UADDO, G_USUBO}) 238753f127fSDimitry Andric .legalFor({{s32, s32}, {s64, s32}}) 239fe6060f1SDimitry Andric .clampScalar(0, s32, s64) 240753f127fSDimitry Andric .clampScalar(1, s32, s64) 241fe6060f1SDimitry Andric .widenScalarToNextPow2(0); 2425ffd83dbSDimitry Andric 2435f757f3fSDimitry Andric getActionDefinitionsBuilder({G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FMA, G_FNEG, 2445f757f3fSDimitry Andric G_FABS, G_FSQRT, G_FMAXNUM, G_FMINNUM, 2455f757f3fSDimitry Andric G_FMAXIMUM, G_FMINIMUM, G_FCEIL, G_FFLOOR, 2465f757f3fSDimitry Andric G_FRINT, G_FNEARBYINT, G_INTRINSIC_TRUNC, 2475f757f3fSDimitry Andric G_INTRINSIC_ROUND, G_INTRINSIC_ROUNDEVEN}) 2485f757f3fSDimitry Andric .legalFor({MinFPScalar, s32, s64, v2s32, v4s32, v2s64}) 2495f757f3fSDimitry Andric .legalIf([=](const LegalityQuery &Query) { 2505f757f3fSDimitry Andric const auto &Ty = Query.Types[0]; 2515f757f3fSDimitry Andric return (Ty == v8s16 || Ty == v4s16) && HasFP16; 2525f757f3fSDimitry Andric }) 2535f757f3fSDimitry Andric .libcallFor({s128}) 2545f757f3fSDimitry Andric .minScalarOrElt(0, MinFPScalar) 2555f757f3fSDimitry Andric .clampNumElements(0, v4s16, v8s16) 256e8d8bef9SDimitry Andric .clampNumElements(0, v2s32, v4s32) 2575f757f3fSDimitry Andric .clampNumElements(0, v2s64, v2s64) 2585f757f3fSDimitry Andric .moreElementsToNextPow2(0); 2595ffd83dbSDimitry Andric 2605f757f3fSDimitry Andric getActionDefinitionsBuilder(G_FREM) 2615f757f3fSDimitry Andric .libcallFor({s32, s64}) 2625f757f3fSDimitry Andric .minScalar(0, s32) 2635f757f3fSDimitry Andric .scalarize(0); 2645ffd83dbSDimitry Andric 265*0fca6ea1SDimitry Andric getActionDefinitionsBuilder({G_INTRINSIC_LRINT, G_INTRINSIC_LLRINT}) 266*0fca6ea1SDimitry Andric .legalFor({{s64, MinFPScalar}, {s64, s32}, {s64, s64}}) 267*0fca6ea1SDimitry Andric .libcallFor({{s64, s128}}) 268*0fca6ea1SDimitry Andric .minScalarOrElt(1, MinFPScalar); 2695ffd83dbSDimitry Andric 2705ffd83dbSDimitry Andric getActionDefinitionsBuilder( 271*0fca6ea1SDimitry Andric {G_FCOS, G_FSIN, G_FPOW, G_FLOG, G_FLOG2, G_FLOG10, G_FTAN, G_FEXP, 272*0fca6ea1SDimitry Andric G_FEXP2, G_FEXP10, G_FACOS, G_FASIN, G_FATAN, G_FCOSH, G_FSINH, G_FTANH}) 2735ffd83dbSDimitry Andric // We need a call for these, so we always need to scalarize. 2745ffd83dbSDimitry Andric .scalarize(0) 2755ffd83dbSDimitry Andric // Regardless of FP16 support, widen 16-bit elements to 32-bits. 2765ffd83dbSDimitry Andric .minScalar(0, s32) 2775f757f3fSDimitry Andric .libcallFor({s32, s64}); 2781db9f3b2SDimitry Andric getActionDefinitionsBuilder(G_FPOWI) 2791db9f3b2SDimitry Andric .scalarize(0) 2801db9f3b2SDimitry Andric .minScalar(0, s32) 2811db9f3b2SDimitry Andric .libcallFor({{s32, s32}, {s64, s32}}); 2825ffd83dbSDimitry Andric 2835ffd83dbSDimitry Andric getActionDefinitionsBuilder(G_INSERT) 284349cc55cSDimitry Andric .legalIf(all(typeInSet(0, {s32, s64, p0}), 285753f127fSDimitry Andric typeInSet(1, {s8, s16, s32}), smallerThan(1, 0))) 2865ffd83dbSDimitry Andric .widenScalarToNextPow2(0) 287349cc55cSDimitry Andric .clampScalar(0, s32, s64) 288349cc55cSDimitry Andric .widenScalarToNextPow2(1) 289349cc55cSDimitry Andric .minScalar(1, s8) 2905ffd83dbSDimitry Andric .maxScalarIf(typeInSet(0, {s32}), 1, s16) 291349cc55cSDimitry Andric .maxScalarIf(typeInSet(0, {s64, p0}), 1, s32); 2925ffd83dbSDimitry Andric 2935ffd83dbSDimitry Andric getActionDefinitionsBuilder(G_EXTRACT) 294349cc55cSDimitry Andric .legalIf(all(typeInSet(0, {s16, s32, s64, p0}), 295349cc55cSDimitry Andric typeInSet(1, {s32, s64, s128, p0}), smallerThan(0, 1))) 2965ffd83dbSDimitry Andric .widenScalarToNextPow2(1) 297349cc55cSDimitry Andric .clampScalar(1, s32, s128) 298349cc55cSDimitry Andric .widenScalarToNextPow2(0) 299349cc55cSDimitry Andric .minScalar(0, s16) 3005ffd83dbSDimitry Andric .maxScalarIf(typeInSet(1, {s32}), 0, s16) 301349cc55cSDimitry Andric .maxScalarIf(typeInSet(1, {s64, p0}), 0, s32) 302349cc55cSDimitry Andric .maxScalarIf(typeInSet(1, {s128}), 0, s64); 3035ffd83dbSDimitry Andric 304753f127fSDimitry Andric 305753f127fSDimitry Andric for (unsigned Op : {G_SEXTLOAD, G_ZEXTLOAD}) { 306753f127fSDimitry Andric auto &Actions = getActionDefinitionsBuilder(Op); 307753f127fSDimitry Andric 308753f127fSDimitry Andric if (Op == G_SEXTLOAD) 309753f127fSDimitry Andric Actions.lowerIf(atomicOrderingAtLeastOrStrongerThan(0, AtomicOrdering::Unordered)); 310753f127fSDimitry Andric 311753f127fSDimitry Andric // Atomics have zero extending behavior. 312753f127fSDimitry Andric Actions 313fe6060f1SDimitry Andric .legalForTypesWithMemDesc({{s32, p0, s8, 8}, 314fe6060f1SDimitry Andric {s32, p0, s16, 8}, 315fe6060f1SDimitry Andric {s32, p0, s32, 8}, 316fe6060f1SDimitry Andric {s64, p0, s8, 2}, 317fe6060f1SDimitry Andric {s64, p0, s16, 2}, 318fe6060f1SDimitry Andric {s64, p0, s32, 4}, 319fe6060f1SDimitry Andric {s64, p0, s64, 8}, 320fe6060f1SDimitry Andric {p0, p0, s64, 8}, 321fe6060f1SDimitry Andric {v2s32, p0, s64, 8}}) 3225ffd83dbSDimitry Andric .widenScalarToNextPow2(0) 323349cc55cSDimitry Andric .clampScalar(0, s32, s64) 3245ffd83dbSDimitry Andric // TODO: We could support sum-of-pow2's but the lowering code doesn't know 3255ffd83dbSDimitry Andric // how to do that yet. 3265ffd83dbSDimitry Andric .unsupportedIfMemSizeNotPow2() 3275ffd83dbSDimitry Andric // Lower anything left over into G_*EXT and G_LOAD 3285ffd83dbSDimitry Andric .lower(); 329753f127fSDimitry Andric } 3305ffd83dbSDimitry Andric 3315ffd83dbSDimitry Andric auto IsPtrVecPred = [=](const LegalityQuery &Query) { 3325ffd83dbSDimitry Andric const LLT &ValTy = Query.Types[0]; 333*0fca6ea1SDimitry Andric return ValTy.isPointerVector() && ValTy.getAddressSpace() == 0; 3345ffd83dbSDimitry Andric }; 3355ffd83dbSDimitry Andric 336*0fca6ea1SDimitry Andric auto &LoadActions = getActionDefinitionsBuilder(G_LOAD); 337*0fca6ea1SDimitry Andric auto &StoreActions = getActionDefinitionsBuilder(G_STORE); 338*0fca6ea1SDimitry Andric 339*0fca6ea1SDimitry Andric if (ST.hasSVE()) { 340*0fca6ea1SDimitry Andric LoadActions.legalForTypesWithMemDesc({ 341*0fca6ea1SDimitry Andric // 128 bit base sizes 342*0fca6ea1SDimitry Andric {nxv16s8, p0, nxv16s8, 8}, 343*0fca6ea1SDimitry Andric {nxv8s16, p0, nxv8s16, 8}, 344*0fca6ea1SDimitry Andric {nxv4s32, p0, nxv4s32, 8}, 345*0fca6ea1SDimitry Andric {nxv2s64, p0, nxv2s64, 8}, 346*0fca6ea1SDimitry Andric }); 347*0fca6ea1SDimitry Andric 348*0fca6ea1SDimitry Andric // TODO: Add nxv2p0. Consider bitcastIf. 349*0fca6ea1SDimitry Andric // See #92130 350*0fca6ea1SDimitry Andric // https://github.com/llvm/llvm-project/pull/92130#discussion_r1616888461 351*0fca6ea1SDimitry Andric StoreActions.legalForTypesWithMemDesc({ 352*0fca6ea1SDimitry Andric // 128 bit base sizes 353*0fca6ea1SDimitry Andric {nxv16s8, p0, nxv16s8, 8}, 354*0fca6ea1SDimitry Andric {nxv8s16, p0, nxv8s16, 8}, 355*0fca6ea1SDimitry Andric {nxv4s32, p0, nxv4s32, 8}, 356*0fca6ea1SDimitry Andric {nxv2s64, p0, nxv2s64, 8}, 357*0fca6ea1SDimitry Andric }); 358*0fca6ea1SDimitry Andric } 359*0fca6ea1SDimitry Andric 360*0fca6ea1SDimitry Andric LoadActions 361349cc55cSDimitry Andric .customIf([=](const LegalityQuery &Query) { 36206c3fb27SDimitry Andric return HasRCPC3 && Query.Types[0] == s128 && 36306c3fb27SDimitry Andric Query.MMODescrs[0].Ordering == AtomicOrdering::Acquire; 36406c3fb27SDimitry Andric }) 36506c3fb27SDimitry Andric .customIf([=](const LegalityQuery &Query) { 366349cc55cSDimitry Andric return Query.Types[0] == s128 && 367349cc55cSDimitry Andric Query.MMODescrs[0].Ordering != AtomicOrdering::NotAtomic; 368349cc55cSDimitry Andric }) 369fe6060f1SDimitry Andric .legalForTypesWithMemDesc({{s8, p0, s8, 8}, 370fe6060f1SDimitry Andric {s16, p0, s16, 8}, 371fe6060f1SDimitry Andric {s32, p0, s32, 8}, 372fe6060f1SDimitry Andric {s64, p0, s64, 8}, 373fe6060f1SDimitry Andric {p0, p0, s64, 8}, 374fe6060f1SDimitry Andric {s128, p0, s128, 8}, 375fe6060f1SDimitry Andric {v8s8, p0, s64, 8}, 376fe6060f1SDimitry Andric {v16s8, p0, s128, 8}, 377fe6060f1SDimitry Andric {v4s16, p0, s64, 8}, 378fe6060f1SDimitry Andric {v8s16, p0, s128, 8}, 379fe6060f1SDimitry Andric {v2s32, p0, s64, 8}, 380fe6060f1SDimitry Andric {v4s32, p0, s128, 8}, 381fe6060f1SDimitry Andric {v2s64, p0, s128, 8}}) 3825ffd83dbSDimitry Andric // These extends are also legal 3831db9f3b2SDimitry Andric .legalForTypesWithMemDesc( 3841db9f3b2SDimitry Andric {{s32, p0, s8, 8}, {s32, p0, s16, 8}, {s64, p0, s32, 8}}) 385349cc55cSDimitry Andric .widenScalarToNextPow2(0, /* MinSize = */ 8) 386*0fca6ea1SDimitry Andric .clampMaxNumElements(0, s8, 16) 387*0fca6ea1SDimitry Andric .clampMaxNumElements(0, s16, 8) 388*0fca6ea1SDimitry Andric .clampMaxNumElements(0, s32, 4) 389*0fca6ea1SDimitry Andric .clampMaxNumElements(0, s64, 2) 390*0fca6ea1SDimitry Andric .clampMaxNumElements(0, p0, 2) 39181ad6265SDimitry Andric .lowerIfMemSizeNotByteSizePow2() 392349cc55cSDimitry Andric .clampScalar(0, s8, s64) 39306c3fb27SDimitry Andric .narrowScalarIf( 39406c3fb27SDimitry Andric [=](const LegalityQuery &Query) { 395fe6060f1SDimitry Andric // Clamp extending load results to 32-bits. 396fe6060f1SDimitry Andric return Query.Types[0].isScalar() && 397fe6060f1SDimitry Andric Query.Types[0] != Query.MMODescrs[0].MemoryTy && 398fe6060f1SDimitry Andric Query.Types[0].getSizeInBits() > 32; 399fe6060f1SDimitry Andric }, 400fe6060f1SDimitry Andric changeTo(0, s32)) 401*0fca6ea1SDimitry Andric // TODO: Use BITCAST for v2i8, v2i16 after G_TRUNC gets sorted out 402*0fca6ea1SDimitry Andric .bitcastIf(typeInSet(0, {v4s8}), 403*0fca6ea1SDimitry Andric [=](const LegalityQuery &Query) { 404*0fca6ea1SDimitry Andric const LLT VecTy = Query.Types[0]; 405*0fca6ea1SDimitry Andric return std::pair(0, LLT::scalar(VecTy.getSizeInBits())); 406*0fca6ea1SDimitry Andric }) 407fe6060f1SDimitry Andric .customIf(IsPtrVecPred) 408*0fca6ea1SDimitry Andric .scalarizeIf(typeInSet(0, {v2s16, v2s8}), 0); 4095ffd83dbSDimitry Andric 410*0fca6ea1SDimitry Andric StoreActions 411349cc55cSDimitry Andric .customIf([=](const LegalityQuery &Query) { 41206c3fb27SDimitry Andric return HasRCPC3 && Query.Types[0] == s128 && 41306c3fb27SDimitry Andric Query.MMODescrs[0].Ordering == AtomicOrdering::Release; 41406c3fb27SDimitry Andric }) 41506c3fb27SDimitry Andric .customIf([=](const LegalityQuery &Query) { 416349cc55cSDimitry Andric return Query.Types[0] == s128 && 417349cc55cSDimitry Andric Query.MMODescrs[0].Ordering != AtomicOrdering::NotAtomic; 418349cc55cSDimitry Andric }) 41906c3fb27SDimitry Andric .legalForTypesWithMemDesc( 42006c3fb27SDimitry Andric {{s8, p0, s8, 8}, {s16, p0, s8, 8}, // truncstorei8 from s16 421fe6060f1SDimitry Andric {s32, p0, s8, 8}, // truncstorei8 from s32 422fe6060f1SDimitry Andric {s64, p0, s8, 8}, // truncstorei8 from s64 42306c3fb27SDimitry Andric {s16, p0, s16, 8}, {s32, p0, s16, 8}, // truncstorei16 from s32 424fe6060f1SDimitry Andric {s64, p0, s16, 8}, // truncstorei16 from s64 42506c3fb27SDimitry Andric {s32, p0, s8, 8}, {s32, p0, s16, 8}, {s32, p0, s32, 8}, 42606c3fb27SDimitry Andric {s64, p0, s64, 8}, {s64, p0, s32, 8}, // truncstorei32 from s64 42706c3fb27SDimitry Andric {p0, p0, s64, 8}, {s128, p0, s128, 8}, {v16s8, p0, s128, 8}, 42806c3fb27SDimitry Andric {v8s8, p0, s64, 8}, {v4s16, p0, s64, 8}, {v8s16, p0, s128, 8}, 42906c3fb27SDimitry Andric {v2s32, p0, s64, 8}, {v4s32, p0, s128, 8}, {v2s64, p0, s128, 8}}) 4305ffd83dbSDimitry Andric .clampScalar(0, s8, s64) 4315ffd83dbSDimitry Andric .lowerIf([=](const LegalityQuery &Query) { 4325ffd83dbSDimitry Andric return Query.Types[0].isScalar() && 433fe6060f1SDimitry Andric Query.Types[0] != Query.MMODescrs[0].MemoryTy; 4345ffd83dbSDimitry Andric }) 435fe6060f1SDimitry Andric // Maximum: sN * k = 128 436fe6060f1SDimitry Andric .clampMaxNumElements(0, s8, 16) 437fe6060f1SDimitry Andric .clampMaxNumElements(0, s16, 8) 438fe6060f1SDimitry Andric .clampMaxNumElements(0, s32, 4) 439fe6060f1SDimitry Andric .clampMaxNumElements(0, s64, 2) 440349cc55cSDimitry Andric .clampMaxNumElements(0, p0, 2) 441fe6060f1SDimitry Andric .lowerIfMemSizeNotPow2() 442*0fca6ea1SDimitry Andric // TODO: Use BITCAST for v2i8, v2i16 after G_TRUNC gets sorted out 443*0fca6ea1SDimitry Andric .bitcastIf(typeInSet(0, {v4s8}), 444*0fca6ea1SDimitry Andric [=](const LegalityQuery &Query) { 445*0fca6ea1SDimitry Andric const LLT VecTy = Query.Types[0]; 446*0fca6ea1SDimitry Andric return std::pair(0, LLT::scalar(VecTy.getSizeInBits())); 447*0fca6ea1SDimitry Andric }) 448fe6060f1SDimitry Andric .customIf(IsPtrVecPred) 449*0fca6ea1SDimitry Andric .scalarizeIf(typeInSet(0, {v2s16, v2s8}), 0); 4505ffd83dbSDimitry Andric 4515f757f3fSDimitry Andric getActionDefinitionsBuilder(G_INDEXED_STORE) 4525f757f3fSDimitry Andric // Idx 0 == Ptr, Idx 1 == Val 4535f757f3fSDimitry Andric // TODO: we can implement legalizations but as of now these are 4545f757f3fSDimitry Andric // generated in a very specific way. 4555f757f3fSDimitry Andric .legalForTypesWithMemDesc({ 4565f757f3fSDimitry Andric {p0, s8, s8, 8}, 4575f757f3fSDimitry Andric {p0, s16, s16, 8}, 4585f757f3fSDimitry Andric {p0, s32, s8, 8}, 4595f757f3fSDimitry Andric {p0, s32, s16, 8}, 4605f757f3fSDimitry Andric {p0, s32, s32, 8}, 4615f757f3fSDimitry Andric {p0, s64, s64, 8}, 4625f757f3fSDimitry Andric {p0, p0, p0, 8}, 4635f757f3fSDimitry Andric {p0, v8s8, v8s8, 8}, 4645f757f3fSDimitry Andric {p0, v16s8, v16s8, 8}, 4655f757f3fSDimitry Andric {p0, v4s16, v4s16, 8}, 4665f757f3fSDimitry Andric {p0, v8s16, v8s16, 8}, 4675f757f3fSDimitry Andric {p0, v2s32, v2s32, 8}, 4685f757f3fSDimitry Andric {p0, v4s32, v4s32, 8}, 4695f757f3fSDimitry Andric {p0, v2s64, v2s64, 8}, 4705f757f3fSDimitry Andric {p0, v2p0, v2p0, 8}, 4715f757f3fSDimitry Andric {p0, s128, s128, 8}, 4725f757f3fSDimitry Andric }) 4735f757f3fSDimitry Andric .unsupported(); 4745f757f3fSDimitry Andric 4755f757f3fSDimitry Andric auto IndexedLoadBasicPred = [=](const LegalityQuery &Query) { 4765f757f3fSDimitry Andric LLT LdTy = Query.Types[0]; 4775f757f3fSDimitry Andric LLT PtrTy = Query.Types[1]; 4787a6dacacSDimitry Andric if (!llvm::is_contained(PackedVectorAllTypesVec, LdTy) && 4797a6dacacSDimitry Andric !llvm::is_contained(ScalarAndPtrTypesVec, LdTy) && LdTy != s128) 4805f757f3fSDimitry Andric return false; 4815f757f3fSDimitry Andric if (PtrTy != p0) 4825f757f3fSDimitry Andric return false; 4835f757f3fSDimitry Andric return true; 4845f757f3fSDimitry Andric }; 4855f757f3fSDimitry Andric getActionDefinitionsBuilder(G_INDEXED_LOAD) 4865f757f3fSDimitry Andric .unsupportedIf( 4875f757f3fSDimitry Andric atomicOrderingAtLeastOrStrongerThan(0, AtomicOrdering::Unordered)) 4885f757f3fSDimitry Andric .legalIf(IndexedLoadBasicPred) 4895f757f3fSDimitry Andric .unsupported(); 4905f757f3fSDimitry Andric getActionDefinitionsBuilder({G_INDEXED_SEXTLOAD, G_INDEXED_ZEXTLOAD}) 4915f757f3fSDimitry Andric .unsupportedIf( 4925f757f3fSDimitry Andric atomicOrderingAtLeastOrStrongerThan(0, AtomicOrdering::Unordered)) 4935f757f3fSDimitry Andric .legalIf(all(typeInSet(0, {s16, s32, s64}), 4945f757f3fSDimitry Andric LegalityPredicate([=](const LegalityQuery &Q) { 4955f757f3fSDimitry Andric LLT LdTy = Q.Types[0]; 4965f757f3fSDimitry Andric LLT PtrTy = Q.Types[1]; 4975f757f3fSDimitry Andric LLT MemTy = Q.MMODescrs[0].MemoryTy; 4985f757f3fSDimitry Andric if (PtrTy != p0) 4995f757f3fSDimitry Andric return false; 5005f757f3fSDimitry Andric if (LdTy == s16) 5015f757f3fSDimitry Andric return MemTy == s8; 5025f757f3fSDimitry Andric if (LdTy == s32) 5035f757f3fSDimitry Andric return MemTy == s8 || MemTy == s16; 5045f757f3fSDimitry Andric if (LdTy == s64) 5055f757f3fSDimitry Andric return MemTy == s8 || MemTy == s16 || MemTy == s32; 5065f757f3fSDimitry Andric return false; 5075f757f3fSDimitry Andric }))) 5085f757f3fSDimitry Andric .unsupported(); 5095f757f3fSDimitry Andric 5105ffd83dbSDimitry Andric // Constants 5115ffd83dbSDimitry Andric getActionDefinitionsBuilder(G_CONSTANT) 5125ffd83dbSDimitry Andric .legalFor({p0, s8, s16, s32, s64}) 513349cc55cSDimitry Andric .widenScalarToNextPow2(0) 514349cc55cSDimitry Andric .clampScalar(0, s8, s64); 5155ffd83dbSDimitry Andric getActionDefinitionsBuilder(G_FCONSTANT) 516e8d8bef9SDimitry Andric .legalIf([=](const LegalityQuery &Query) { 517e8d8bef9SDimitry Andric const auto &Ty = Query.Types[0]; 518e8d8bef9SDimitry Andric if (HasFP16 && Ty == s16) 519e8d8bef9SDimitry Andric return true; 520e8d8bef9SDimitry Andric return Ty == s32 || Ty == s64 || Ty == s128; 521e8d8bef9SDimitry Andric }) 522e8d8bef9SDimitry Andric .clampScalar(0, MinFPScalar, s128); 5235ffd83dbSDimitry Andric 5247a6dacacSDimitry Andric // FIXME: fix moreElementsToNextPow2 52506c3fb27SDimitry Andric getActionDefinitionsBuilder(G_ICMP) 526*0fca6ea1SDimitry Andric .legalFor({{s32, s32}, {s32, s64}, {s32, p0}}) 527349cc55cSDimitry Andric .widenScalarOrEltToNextPow2(1) 5285ffd83dbSDimitry Andric .clampScalar(1, s32, s64) 5295ffd83dbSDimitry Andric .clampScalar(0, s32, s32) 5305ffd83dbSDimitry Andric .minScalarEltSameAsIf( 5315ffd83dbSDimitry Andric [=](const LegalityQuery &Query) { 5325ffd83dbSDimitry Andric const LLT &Ty = Query.Types[0]; 5335ffd83dbSDimitry Andric const LLT &SrcTy = Query.Types[1]; 534*0fca6ea1SDimitry Andric return Ty.isVector() && !SrcTy.isPointerVector() && 5355ffd83dbSDimitry Andric Ty.getElementType() != SrcTy.getElementType(); 5365ffd83dbSDimitry Andric }, 5375ffd83dbSDimitry Andric 0, 1) 5385ffd83dbSDimitry Andric .minScalarOrEltIf( 5395ffd83dbSDimitry Andric [=](const LegalityQuery &Query) { return Query.Types[1] == v2s16; }, 5405ffd83dbSDimitry Andric 1, s32) 5415ffd83dbSDimitry Andric .minScalarOrEltIf( 5425ffd83dbSDimitry Andric [=](const LegalityQuery &Query) { return Query.Types[1] == v2p0; }, 0, 5435ffd83dbSDimitry Andric s64) 544*0fca6ea1SDimitry Andric .moreElementsToNextPow2(1) 545*0fca6ea1SDimitry Andric .clampNumElements(1, v8s8, v16s8) 546*0fca6ea1SDimitry Andric .clampNumElements(1, v4s16, v8s16) 547*0fca6ea1SDimitry Andric .clampNumElements(1, v2s32, v4s32) 548*0fca6ea1SDimitry Andric .clampNumElements(1, v2s64, v2s64) 549*0fca6ea1SDimitry Andric .customIf(isVector(0)); 5505ffd83dbSDimitry Andric 55106c3fb27SDimitry Andric getActionDefinitionsBuilder(G_FCMP) 552*0fca6ea1SDimitry Andric .legalFor({{s32, MinFPScalar}, 55306c3fb27SDimitry Andric {s32, s32}, 55406c3fb27SDimitry Andric {s32, s64}, 55506c3fb27SDimitry Andric {v4s32, v4s32}, 55606c3fb27SDimitry Andric {v2s32, v2s32}, 557*0fca6ea1SDimitry Andric {v2s64, v2s64}}) 558*0fca6ea1SDimitry Andric .legalIf([=](const LegalityQuery &Query) { 559*0fca6ea1SDimitry Andric const auto &Ty = Query.Types[1]; 560*0fca6ea1SDimitry Andric return (Ty == v8s16 || Ty == v4s16) && Ty == Query.Types[0] && HasFP16; 561*0fca6ea1SDimitry Andric }) 56206c3fb27SDimitry Andric .widenScalarOrEltToNextPow2(1) 56306c3fb27SDimitry Andric .clampScalar(0, s32, s32) 564*0fca6ea1SDimitry Andric .clampScalarOrElt(1, MinFPScalar, s64) 56506c3fb27SDimitry Andric .minScalarEltSameAsIf( 56606c3fb27SDimitry Andric [=](const LegalityQuery &Query) { 56706c3fb27SDimitry Andric const LLT &Ty = Query.Types[0]; 56806c3fb27SDimitry Andric const LLT &SrcTy = Query.Types[1]; 569*0fca6ea1SDimitry Andric return Ty.isVector() && !SrcTy.isPointerVector() && 57006c3fb27SDimitry Andric Ty.getElementType() != SrcTy.getElementType(); 57106c3fb27SDimitry Andric }, 57206c3fb27SDimitry Andric 0, 1) 573*0fca6ea1SDimitry Andric .clampNumElements(1, v4s16, v8s16) 574*0fca6ea1SDimitry Andric .clampNumElements(1, v2s32, v4s32) 575*0fca6ea1SDimitry Andric .clampMaxNumElements(1, s64, 2) 576*0fca6ea1SDimitry Andric .moreElementsToNextPow2(1); 57706c3fb27SDimitry Andric 5785ffd83dbSDimitry Andric // Extensions 5795ffd83dbSDimitry Andric auto ExtLegalFunc = [=](const LegalityQuery &Query) { 5805ffd83dbSDimitry Andric unsigned DstSize = Query.Types[0].getSizeInBits(); 5815ffd83dbSDimitry Andric 5825f757f3fSDimitry Andric // Handle legal vectors using legalFor 5835f757f3fSDimitry Andric if (Query.Types[0].isVector()) 5845ffd83dbSDimitry Andric return false; 5855ffd83dbSDimitry Andric 5865f757f3fSDimitry Andric if (DstSize < 8 || DstSize >= 128 || !isPowerOf2_32(DstSize)) 5875f757f3fSDimitry Andric return false; // Extending to a scalar s128 needs narrowing. 5885f757f3fSDimitry Andric 5895ffd83dbSDimitry Andric const LLT &SrcTy = Query.Types[1]; 5905ffd83dbSDimitry Andric 5915ffd83dbSDimitry Andric // Make sure we fit in a register otherwise. Don't bother checking that 5925ffd83dbSDimitry Andric // the source type is below 128 bits. We shouldn't be allowing anything 5935ffd83dbSDimitry Andric // through which is wider than the destination in the first place. 5945ffd83dbSDimitry Andric unsigned SrcSize = SrcTy.getSizeInBits(); 5955ffd83dbSDimitry Andric if (SrcSize < 8 || !isPowerOf2_32(SrcSize)) 5965ffd83dbSDimitry Andric return false; 5975ffd83dbSDimitry Andric 5985ffd83dbSDimitry Andric return true; 5995ffd83dbSDimitry Andric }; 6005ffd83dbSDimitry Andric getActionDefinitionsBuilder({G_ZEXT, G_SEXT, G_ANYEXT}) 6015ffd83dbSDimitry Andric .legalIf(ExtLegalFunc) 6025f757f3fSDimitry Andric .legalFor({{v2s64, v2s32}, {v4s32, v4s16}, {v8s16, v8s8}}) 6035f757f3fSDimitry Andric .clampScalar(0, s64, s64) // Just for s128, others are handled above. 604*0fca6ea1SDimitry Andric .moreElementsToNextPow2(0) 6055f757f3fSDimitry Andric .clampMaxNumElements(1, s8, 8) 6065f757f3fSDimitry Andric .clampMaxNumElements(1, s16, 4) 6075f757f3fSDimitry Andric .clampMaxNumElements(1, s32, 2) 6085f757f3fSDimitry Andric // Tries to convert a large EXTEND into two smaller EXTENDs 6095f757f3fSDimitry Andric .lowerIf([=](const LegalityQuery &Query) { 6105f757f3fSDimitry Andric return (Query.Types[0].getScalarSizeInBits() > 6115f757f3fSDimitry Andric Query.Types[1].getScalarSizeInBits() * 2) && 6125f757f3fSDimitry Andric Query.Types[0].isVector() && 6135f757f3fSDimitry Andric (Query.Types[1].getScalarSizeInBits() == 8 || 6145f757f3fSDimitry Andric Query.Types[1].getScalarSizeInBits() == 16); 615*0fca6ea1SDimitry Andric }) 616*0fca6ea1SDimitry Andric .clampMinNumElements(1, s8, 8) 617*0fca6ea1SDimitry Andric .clampMinNumElements(1, s16, 4); 6185ffd83dbSDimitry Andric 619e8d8bef9SDimitry Andric getActionDefinitionsBuilder(G_TRUNC) 6205f757f3fSDimitry Andric .legalFor({{v2s32, v2s64}, {v4s16, v4s32}, {v8s8, v8s16}}) 6215f757f3fSDimitry Andric .moreElementsToNextPow2(0) 6225f757f3fSDimitry Andric .clampMaxNumElements(0, s8, 8) 6235f757f3fSDimitry Andric .clampMaxNumElements(0, s16, 4) 6245f757f3fSDimitry Andric .clampMaxNumElements(0, s32, 2) 625e8d8bef9SDimitry Andric .minScalarOrEltIf( 626e8d8bef9SDimitry Andric [=](const LegalityQuery &Query) { return Query.Types[0].isVector(); }, 627e8d8bef9SDimitry Andric 0, s8) 6285f757f3fSDimitry Andric .lowerIf([=](const LegalityQuery &Query) { 629e8d8bef9SDimitry Andric LLT DstTy = Query.Types[0]; 630e8d8bef9SDimitry Andric LLT SrcTy = Query.Types[1]; 63174626c16SDimitry Andric return DstTy.isVector() && SrcTy.getSizeInBits() > 128 && 63274626c16SDimitry Andric DstTy.getScalarSizeInBits() * 2 <= SrcTy.getScalarSizeInBits(); 633e8d8bef9SDimitry Andric }) 634*0fca6ea1SDimitry Andric .clampMinNumElements(0, s8, 8) 635*0fca6ea1SDimitry Andric .clampMinNumElements(0, s16, 4) 636e8d8bef9SDimitry Andric .alwaysLegal(); 6375ffd83dbSDimitry Andric 638bdd1243dSDimitry Andric getActionDefinitionsBuilder(G_SEXT_INREG) 639bdd1243dSDimitry Andric .legalFor({s32, s64}) 640bdd1243dSDimitry Andric .legalFor(PackedVectorAllTypeList) 6415f757f3fSDimitry Andric .maxScalar(0, s64) 642647cbc5dSDimitry Andric .clampNumElements(0, v8s8, v16s8) 643647cbc5dSDimitry Andric .clampNumElements(0, v4s16, v8s16) 644647cbc5dSDimitry Andric .clampNumElements(0, v2s32, v4s32) 645647cbc5dSDimitry Andric .clampMaxNumElements(0, s64, 2) 646bdd1243dSDimitry Andric .lower(); 6475ffd83dbSDimitry Andric 6485ffd83dbSDimitry Andric // FP conversions 649e8d8bef9SDimitry Andric getActionDefinitionsBuilder(G_FPTRUNC) 650e8d8bef9SDimitry Andric .legalFor( 651e8d8bef9SDimitry Andric {{s16, s32}, {s16, s64}, {s32, s64}, {v4s16, v4s32}, {v2s32, v2s64}}) 652*0fca6ea1SDimitry Andric .libcallFor({{s16, s128}, {s32, s128}, {s64, s128}}) 65306c3fb27SDimitry Andric .clampNumElements(0, v4s16, v4s16) 65406c3fb27SDimitry Andric .clampNumElements(0, v2s32, v2s32) 65506c3fb27SDimitry Andric .scalarize(0); 65606c3fb27SDimitry Andric 657e8d8bef9SDimitry Andric getActionDefinitionsBuilder(G_FPEXT) 658e8d8bef9SDimitry Andric .legalFor( 659e8d8bef9SDimitry Andric {{s32, s16}, {s64, s16}, {s64, s32}, {v4s32, v4s16}, {v2s64, v2s32}}) 660*0fca6ea1SDimitry Andric .libcallFor({{s128, s64}, {s128, s32}, {s128, s16}}) 66106c3fb27SDimitry Andric .clampNumElements(0, v4s32, v4s32) 66206c3fb27SDimitry Andric .clampNumElements(0, v2s64, v2s64) 66306c3fb27SDimitry Andric .scalarize(0); 6645ffd83dbSDimitry Andric 6655ffd83dbSDimitry Andric // Conversions 6665ffd83dbSDimitry Andric getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI}) 667*0fca6ea1SDimitry Andric .legalFor({{s32, s32}, 668*0fca6ea1SDimitry Andric {s64, s32}, 669*0fca6ea1SDimitry Andric {s32, s64}, 670*0fca6ea1SDimitry Andric {s64, s64}, 671*0fca6ea1SDimitry Andric {v2s64, v2s64}, 672*0fca6ea1SDimitry Andric {v4s32, v4s32}, 673*0fca6ea1SDimitry Andric {v2s32, v2s32}}) 6745f757f3fSDimitry Andric .legalIf([=](const LegalityQuery &Query) { 6755f757f3fSDimitry Andric return HasFP16 && 6765f757f3fSDimitry Andric (Query.Types[1] == s16 || Query.Types[1] == v4s16 || 6775f757f3fSDimitry Andric Query.Types[1] == v8s16) && 6785f757f3fSDimitry Andric (Query.Types[0] == s32 || Query.Types[0] == s64 || 6795f757f3fSDimitry Andric Query.Types[0] == v4s16 || Query.Types[0] == v8s16); 6805f757f3fSDimitry Andric }) 681*0fca6ea1SDimitry Andric .scalarizeIf(scalarOrEltWiderThan(0, 64), 0) 682*0fca6ea1SDimitry Andric .scalarizeIf(scalarOrEltWiderThan(1, 64), 1) 683*0fca6ea1SDimitry Andric // The range of a fp16 value fits into an i17, so we can lower the width 684*0fca6ea1SDimitry Andric // to i64. 685*0fca6ea1SDimitry Andric .narrowScalarIf( 686*0fca6ea1SDimitry Andric [=](const LegalityQuery &Query) { 687*0fca6ea1SDimitry Andric return Query.Types[1] == s16 && Query.Types[0].getSizeInBits() > 64; 688*0fca6ea1SDimitry Andric }, 689*0fca6ea1SDimitry Andric changeTo(0, s64)) 6905f757f3fSDimitry Andric .moreElementsToNextPow2(0) 691*0fca6ea1SDimitry Andric .widenScalarOrEltToNextPow2OrMinSize(0) 692*0fca6ea1SDimitry Andric .minScalar(0, s32) 693*0fca6ea1SDimitry Andric .widenScalarOrEltToNextPow2OrMinSize(1, /*MinSize=*/HasFP16 ? 16 : 32) 6945f757f3fSDimitry Andric .widenScalarIf( 6955f757f3fSDimitry Andric [=](const LegalityQuery &Query) { 696*0fca6ea1SDimitry Andric return Query.Types[0].getScalarSizeInBits() <= 64 && 697*0fca6ea1SDimitry Andric Query.Types[0].getScalarSizeInBits() > 6985f757f3fSDimitry Andric Query.Types[1].getScalarSizeInBits(); 6995f757f3fSDimitry Andric }, 7005f757f3fSDimitry Andric LegalizeMutations::changeElementSizeTo(1, 0)) 7015f757f3fSDimitry Andric .widenScalarIf( 7025f757f3fSDimitry Andric [=](const LegalityQuery &Query) { 703*0fca6ea1SDimitry Andric return Query.Types[1].getScalarSizeInBits() <= 64 && 704*0fca6ea1SDimitry Andric Query.Types[0].getScalarSizeInBits() < 7055f757f3fSDimitry Andric Query.Types[1].getScalarSizeInBits(); 7065f757f3fSDimitry Andric }, 7075f757f3fSDimitry Andric LegalizeMutations::changeElementSizeTo(0, 1)) 7085f757f3fSDimitry Andric .clampNumElements(0, v4s16, v8s16) 7095f757f3fSDimitry Andric .clampNumElements(0, v2s32, v4s32) 710*0fca6ea1SDimitry Andric .clampMaxNumElements(0, s64, 2) 711*0fca6ea1SDimitry Andric .libcallFor( 712*0fca6ea1SDimitry Andric {{s32, s128}, {s64, s128}, {s128, s128}, {s128, s32}, {s128, s64}}); 7135ffd83dbSDimitry Andric 7145ffd83dbSDimitry Andric getActionDefinitionsBuilder({G_SITOFP, G_UITOFP}) 715*0fca6ea1SDimitry Andric .legalFor({{s32, s32}, 716*0fca6ea1SDimitry Andric {s64, s32}, 717*0fca6ea1SDimitry Andric {s32, s64}, 718*0fca6ea1SDimitry Andric {s64, s64}, 719*0fca6ea1SDimitry Andric {v2s64, v2s64}, 720*0fca6ea1SDimitry Andric {v4s32, v4s32}, 721*0fca6ea1SDimitry Andric {v2s32, v2s32}}) 7225f757f3fSDimitry Andric .legalIf([=](const LegalityQuery &Query) { 7235f757f3fSDimitry Andric return HasFP16 && 7245f757f3fSDimitry Andric (Query.Types[0] == s16 || Query.Types[0] == v4s16 || 7255f757f3fSDimitry Andric Query.Types[0] == v8s16) && 7265f757f3fSDimitry Andric (Query.Types[1] == s32 || Query.Types[1] == s64 || 7275f757f3fSDimitry Andric Query.Types[1] == v4s16 || Query.Types[1] == v8s16); 7285f757f3fSDimitry Andric }) 729*0fca6ea1SDimitry Andric .scalarizeIf(scalarOrEltWiderThan(1, 64), 1) 730*0fca6ea1SDimitry Andric .scalarizeIf(scalarOrEltWiderThan(0, 64), 0) 731*0fca6ea1SDimitry Andric .moreElementsToNextPow2(1) 732*0fca6ea1SDimitry Andric .widenScalarOrEltToNextPow2OrMinSize(1) 733*0fca6ea1SDimitry Andric .minScalar(1, s32) 734*0fca6ea1SDimitry Andric .widenScalarOrEltToNextPow2OrMinSize(0, /*MinSize=*/HasFP16 ? 16 : 32) 7355f757f3fSDimitry Andric .widenScalarIf( 7365f757f3fSDimitry Andric [=](const LegalityQuery &Query) { 737*0fca6ea1SDimitry Andric return Query.Types[1].getScalarSizeInBits() <= 64 && 738*0fca6ea1SDimitry Andric Query.Types[0].getScalarSizeInBits() < 7395f757f3fSDimitry Andric Query.Types[1].getScalarSizeInBits(); 7405f757f3fSDimitry Andric }, 7415f757f3fSDimitry Andric LegalizeMutations::changeElementSizeTo(0, 1)) 7425f757f3fSDimitry Andric .widenScalarIf( 7435f757f3fSDimitry Andric [=](const LegalityQuery &Query) { 744*0fca6ea1SDimitry Andric return Query.Types[0].getScalarSizeInBits() <= 64 && 745*0fca6ea1SDimitry Andric Query.Types[0].getScalarSizeInBits() > 7465f757f3fSDimitry Andric Query.Types[1].getScalarSizeInBits(); 7475f757f3fSDimitry Andric }, 7485f757f3fSDimitry Andric LegalizeMutations::changeElementSizeTo(1, 0)) 7495f757f3fSDimitry Andric .clampNumElements(0, v4s16, v8s16) 7505f757f3fSDimitry Andric .clampNumElements(0, v2s32, v4s32) 751*0fca6ea1SDimitry Andric .clampMaxNumElements(0, s64, 2) 752*0fca6ea1SDimitry Andric .libcallFor({{s16, s128}, 753*0fca6ea1SDimitry Andric {s32, s128}, 754*0fca6ea1SDimitry Andric {s64, s128}, 755*0fca6ea1SDimitry Andric {s128, s128}, 756*0fca6ea1SDimitry Andric {s128, s32}, 757*0fca6ea1SDimitry Andric {s128, s64}}); 7585ffd83dbSDimitry Andric 7595ffd83dbSDimitry Andric // Control-flow 760753f127fSDimitry Andric getActionDefinitionsBuilder(G_BRCOND) 761753f127fSDimitry Andric .legalFor({s32}) 762753f127fSDimitry Andric .clampScalar(0, s32, s32); 7635ffd83dbSDimitry Andric getActionDefinitionsBuilder(G_BRINDIRECT).legalFor({p0}); 7645ffd83dbSDimitry Andric 7655ffd83dbSDimitry Andric getActionDefinitionsBuilder(G_SELECT) 766753f127fSDimitry Andric .legalFor({{s32, s32}, {s64, s32}, {p0, s32}}) 7675ffd83dbSDimitry Andric .widenScalarToNextPow2(0) 768349cc55cSDimitry Andric .clampScalar(0, s32, s64) 769753f127fSDimitry Andric .clampScalar(1, s32, s32) 770e8d8bef9SDimitry Andric .minScalarEltSameAsIf(all(isVector(0), isVector(1)), 1, 0) 771e8d8bef9SDimitry Andric .lowerIf(isVector(0)); 7725ffd83dbSDimitry Andric 7735ffd83dbSDimitry Andric // Pointer-handling 7745ffd83dbSDimitry Andric getActionDefinitionsBuilder(G_FRAME_INDEX).legalFor({p0}); 7755ffd83dbSDimitry Andric 7765ffd83dbSDimitry Andric if (TM.getCodeModel() == CodeModel::Small) 7775ffd83dbSDimitry Andric getActionDefinitionsBuilder(G_GLOBAL_VALUE).custom(); 7785ffd83dbSDimitry Andric else 7795ffd83dbSDimitry Andric getActionDefinitionsBuilder(G_GLOBAL_VALUE).legalFor({p0}); 7805ffd83dbSDimitry Andric 781*0fca6ea1SDimitry Andric getActionDefinitionsBuilder(G_PTRAUTH_GLOBAL_VALUE) 782*0fca6ea1SDimitry Andric .legalIf(all(typeIs(0, p0), typeIs(1, p0))); 783*0fca6ea1SDimitry Andric 7845ffd83dbSDimitry Andric getActionDefinitionsBuilder(G_PTRTOINT) 785bdd1243dSDimitry Andric .legalFor({{s64, p0}, {v2s64, v2p0}}) 786bdd1243dSDimitry Andric .widenScalarToNextPow2(0, 64) 787bdd1243dSDimitry Andric .clampScalar(0, s64, s64); 7885ffd83dbSDimitry Andric 7895ffd83dbSDimitry Andric getActionDefinitionsBuilder(G_INTTOPTR) 7905ffd83dbSDimitry Andric .unsupportedIf([&](const LegalityQuery &Query) { 7915ffd83dbSDimitry Andric return Query.Types[0].getSizeInBits() != Query.Types[1].getSizeInBits(); 7925ffd83dbSDimitry Andric }) 793fe6060f1SDimitry Andric .legalFor({{p0, s64}, {v2p0, v2s64}}); 7945ffd83dbSDimitry Andric 7955ffd83dbSDimitry Andric // Casts for 32 and 64-bit width type are just copies. 7965ffd83dbSDimitry Andric // Same for 128-bit width type, except they are on the FPR bank. 7975ffd83dbSDimitry Andric getActionDefinitionsBuilder(G_BITCAST) 798*0fca6ea1SDimitry Andric // Keeping 32-bit instructions legal to prevent regression in some tests 799*0fca6ea1SDimitry Andric .legalForCartesianProduct({s32, v2s16, v4s8}) 800*0fca6ea1SDimitry Andric .legalForCartesianProduct({s64, v8s8, v4s16, v2s32}) 801*0fca6ea1SDimitry Andric .legalForCartesianProduct({s128, v16s8, v8s16, v4s32, v2s64, v2p0}) 802*0fca6ea1SDimitry Andric .lowerIf([=](const LegalityQuery &Query) { 803*0fca6ea1SDimitry Andric return Query.Types[0].isVector() != Query.Types[1].isVector(); 804*0fca6ea1SDimitry Andric }) 805*0fca6ea1SDimitry Andric .moreElementsToNextPow2(0) 806*0fca6ea1SDimitry Andric .clampNumElements(0, v8s8, v16s8) 807*0fca6ea1SDimitry Andric .clampNumElements(0, v4s16, v8s16) 808*0fca6ea1SDimitry Andric .clampNumElements(0, v2s32, v4s32) 809*0fca6ea1SDimitry Andric .lower(); 8105ffd83dbSDimitry Andric 8115ffd83dbSDimitry Andric getActionDefinitionsBuilder(G_VASTART).legalFor({p0}); 8125ffd83dbSDimitry Andric 8135ffd83dbSDimitry Andric // va_list must be a pointer, but most sized types are pretty easy to handle 8145ffd83dbSDimitry Andric // as the destination. 8155ffd83dbSDimitry Andric getActionDefinitionsBuilder(G_VAARG) 8165ffd83dbSDimitry Andric .customForCartesianProduct({s8, s16, s32, s64, p0}, {p0}) 8175ffd83dbSDimitry Andric .clampScalar(0, s8, s64) 8185ffd83dbSDimitry Andric .widenScalarToNextPow2(0, /*Min*/ 8); 8195ffd83dbSDimitry Andric 8205ffd83dbSDimitry Andric getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG_WITH_SUCCESS) 821fe6060f1SDimitry Andric .lowerIf( 82281ad6265SDimitry Andric all(typeInSet(0, {s8, s16, s32, s64, s128}), typeIs(2, p0))); 823fe6060f1SDimitry Andric 8241db9f3b2SDimitry Andric LegalityPredicate UseOutlineAtomics = [&ST](const LegalityQuery &Query) { 8251db9f3b2SDimitry Andric return ST.outlineAtomics() && !ST.hasLSE(); 8261db9f3b2SDimitry Andric }; 8271db9f3b2SDimitry Andric 828fe6060f1SDimitry Andric getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG) 8291db9f3b2SDimitry Andric .legalIf(all(typeInSet(0, {s32, s64}), typeIs(1, p0), 8301db9f3b2SDimitry Andric predNot(UseOutlineAtomics))) 8311db9f3b2SDimitry Andric .customIf(all(typeIs(0, s128), predNot(UseOutlineAtomics))) 8321db9f3b2SDimitry Andric .customIf([UseOutlineAtomics](const LegalityQuery &Query) { 8331db9f3b2SDimitry Andric return Query.Types[0].getSizeInBits() == 128 && 8341db9f3b2SDimitry Andric !UseOutlineAtomics(Query); 835fe6060f1SDimitry Andric }) 8361db9f3b2SDimitry Andric .libcallIf(all(typeInSet(0, {s8, s16, s32, s64, s128}), typeIs(1, p0), 8371db9f3b2SDimitry Andric UseOutlineAtomics)) 8385f757f3fSDimitry Andric .clampScalar(0, s32, s64); 8395ffd83dbSDimitry Andric 8401db9f3b2SDimitry Andric getActionDefinitionsBuilder({G_ATOMICRMW_XCHG, G_ATOMICRMW_ADD, 8411db9f3b2SDimitry Andric G_ATOMICRMW_SUB, G_ATOMICRMW_AND, G_ATOMICRMW_OR, 8421db9f3b2SDimitry Andric G_ATOMICRMW_XOR}) 8431db9f3b2SDimitry Andric .legalIf(all(typeInSet(0, {s32, s64}), typeIs(1, p0), 8441db9f3b2SDimitry Andric predNot(UseOutlineAtomics))) 8451db9f3b2SDimitry Andric .libcallIf(all(typeInSet(0, {s8, s16, s32, s64}), typeIs(1, p0), 8461db9f3b2SDimitry Andric UseOutlineAtomics)) 8471db9f3b2SDimitry Andric .clampScalar(0, s32, s64); 8481db9f3b2SDimitry Andric 8491db9f3b2SDimitry Andric // Do not outline these atomics operations, as per comment in 8501db9f3b2SDimitry Andric // AArch64ISelLowering.cpp's shouldExpandAtomicRMWInIR(). 8515ffd83dbSDimitry Andric getActionDefinitionsBuilder( 8521db9f3b2SDimitry Andric {G_ATOMICRMW_MIN, G_ATOMICRMW_MAX, G_ATOMICRMW_UMIN, G_ATOMICRMW_UMAX}) 8535f757f3fSDimitry Andric .legalIf(all(typeInSet(0, {s32, s64}), typeIs(1, p0))) 8545f757f3fSDimitry Andric .clampScalar(0, s32, s64); 8555ffd83dbSDimitry Andric 8565ffd83dbSDimitry Andric getActionDefinitionsBuilder(G_BLOCK_ADDR).legalFor({p0}); 8575ffd83dbSDimitry Andric 8585ffd83dbSDimitry Andric // Merge/Unmerge 8595ffd83dbSDimitry Andric for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) { 8605ffd83dbSDimitry Andric unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1; 8615ffd83dbSDimitry Andric unsigned LitTyIdx = Op == G_MERGE_VALUES ? 1 : 0; 8625ffd83dbSDimitry Andric getActionDefinitionsBuilder(Op) 863349cc55cSDimitry Andric .widenScalarToNextPow2(LitTyIdx, 8) 864349cc55cSDimitry Andric .widenScalarToNextPow2(BigTyIdx, 32) 865349cc55cSDimitry Andric .clampScalar(LitTyIdx, s8, s64) 866349cc55cSDimitry Andric .clampScalar(BigTyIdx, s32, s128) 867349cc55cSDimitry Andric .legalIf([=](const LegalityQuery &Q) { 868349cc55cSDimitry Andric switch (Q.Types[BigTyIdx].getSizeInBits()) { 869349cc55cSDimitry Andric case 32: 870349cc55cSDimitry Andric case 64: 871349cc55cSDimitry Andric case 128: 872349cc55cSDimitry Andric break; 873349cc55cSDimitry Andric default: 874349cc55cSDimitry Andric return false; 8755ffd83dbSDimitry Andric } 876349cc55cSDimitry Andric switch (Q.Types[LitTyIdx].getSizeInBits()) { 877349cc55cSDimitry Andric case 8: 878349cc55cSDimitry Andric case 16: 879349cc55cSDimitry Andric case 32: 880349cc55cSDimitry Andric case 64: 881349cc55cSDimitry Andric return true; 882349cc55cSDimitry Andric default: 8835ffd83dbSDimitry Andric return false; 884349cc55cSDimitry Andric } 885349cc55cSDimitry Andric }); 8865ffd83dbSDimitry Andric } 8875ffd83dbSDimitry Andric 8885ffd83dbSDimitry Andric getActionDefinitionsBuilder(G_EXTRACT_VECTOR_ELT) 8895ffd83dbSDimitry Andric .unsupportedIf([=](const LegalityQuery &Query) { 8905ffd83dbSDimitry Andric const LLT &EltTy = Query.Types[1].getElementType(); 8915ffd83dbSDimitry Andric return Query.Types[0] != EltTy; 8925ffd83dbSDimitry Andric }) 8935ffd83dbSDimitry Andric .minScalar(2, s64) 8945f757f3fSDimitry Andric .customIf([=](const LegalityQuery &Query) { 8955ffd83dbSDimitry Andric const LLT &VecTy = Query.Types[1]; 8965ffd83dbSDimitry Andric return VecTy == v2s16 || VecTy == v4s16 || VecTy == v8s16 || 897e8d8bef9SDimitry Andric VecTy == v4s32 || VecTy == v2s64 || VecTy == v2s32 || 8985f757f3fSDimitry Andric VecTy == v8s8 || VecTy == v16s8 || VecTy == v2p0; 899e8d8bef9SDimitry Andric }) 900e8d8bef9SDimitry Andric .minScalarOrEltIf( 901e8d8bef9SDimitry Andric [=](const LegalityQuery &Query) { 902e8d8bef9SDimitry Andric // We want to promote to <M x s1> to <M x s64> if that wouldn't 903e8d8bef9SDimitry Andric // cause the total vec size to be > 128b. 904e8d8bef9SDimitry Andric return Query.Types[1].getNumElements() <= 2; 905e8d8bef9SDimitry Andric }, 906e8d8bef9SDimitry Andric 0, s64) 907e8d8bef9SDimitry Andric .minScalarOrEltIf( 908e8d8bef9SDimitry Andric [=](const LegalityQuery &Query) { 909e8d8bef9SDimitry Andric return Query.Types[1].getNumElements() <= 4; 910e8d8bef9SDimitry Andric }, 911e8d8bef9SDimitry Andric 0, s32) 912e8d8bef9SDimitry Andric .minScalarOrEltIf( 913e8d8bef9SDimitry Andric [=](const LegalityQuery &Query) { 914e8d8bef9SDimitry Andric return Query.Types[1].getNumElements() <= 8; 915e8d8bef9SDimitry Andric }, 916e8d8bef9SDimitry Andric 0, s16) 917e8d8bef9SDimitry Andric .minScalarOrEltIf( 918e8d8bef9SDimitry Andric [=](const LegalityQuery &Query) { 919e8d8bef9SDimitry Andric return Query.Types[1].getNumElements() <= 16; 920e8d8bef9SDimitry Andric }, 921e8d8bef9SDimitry Andric 0, s8) 922fe6060f1SDimitry Andric .minScalarOrElt(0, s8) // Worst case, we need at least s8. 9237a6dacacSDimitry Andric .moreElementsToNextPow2(1) 924fe6060f1SDimitry Andric .clampMaxNumElements(1, s64, 2) 925fe6060f1SDimitry Andric .clampMaxNumElements(1, s32, 4) 926fe6060f1SDimitry Andric .clampMaxNumElements(1, s16, 8) 927*0fca6ea1SDimitry Andric .clampMaxNumElements(1, s8, 16) 928fe6060f1SDimitry Andric .clampMaxNumElements(1, p0, 2); 9295ffd83dbSDimitry Andric 9305ffd83dbSDimitry Andric getActionDefinitionsBuilder(G_INSERT_VECTOR_ELT) 931*0fca6ea1SDimitry Andric .legalIf( 932*0fca6ea1SDimitry Andric typeInSet(0, {v16s8, v8s8, v8s16, v4s16, v4s32, v2s32, v2s64, v2p0})) 9333a079333SDimitry Andric .moreElementsToNextPow2(0) 934*0fca6ea1SDimitry Andric .widenVectorEltsToVectorMinSize(0, 64) 935*0fca6ea1SDimitry Andric .clampNumElements(0, v8s8, v16s8) 936*0fca6ea1SDimitry Andric .clampNumElements(0, v4s16, v8s16) 937*0fca6ea1SDimitry Andric .clampNumElements(0, v2s32, v4s32) 938*0fca6ea1SDimitry Andric .clampMaxNumElements(0, s64, 2) 939*0fca6ea1SDimitry Andric .clampMaxNumElements(0, p0, 2); 9405ffd83dbSDimitry Andric 9415ffd83dbSDimitry Andric getActionDefinitionsBuilder(G_BUILD_VECTOR) 942e8d8bef9SDimitry Andric .legalFor({{v8s8, s8}, 943e8d8bef9SDimitry Andric {v16s8, s8}, 944e8d8bef9SDimitry Andric {v4s16, s16}, 9455ffd83dbSDimitry Andric {v8s16, s16}, 9465ffd83dbSDimitry Andric {v2s32, s32}, 9475ffd83dbSDimitry Andric {v4s32, s32}, 9485ffd83dbSDimitry Andric {v2p0, p0}, 9495ffd83dbSDimitry Andric {v2s64, s64}}) 9505ffd83dbSDimitry Andric .clampNumElements(0, v4s32, v4s32) 9515ffd83dbSDimitry Andric .clampNumElements(0, v2s64, v2s64) 952349cc55cSDimitry Andric .minScalarOrElt(0, s8) 95306c3fb27SDimitry Andric .widenVectorEltsToVectorMinSize(0, 64) 9545ffd83dbSDimitry Andric .minScalarSameAs(1, 0); 9555ffd83dbSDimitry Andric 956fe6060f1SDimitry Andric getActionDefinitionsBuilder(G_BUILD_VECTOR_TRUNC).lower(); 957fe6060f1SDimitry Andric 958e8d8bef9SDimitry Andric getActionDefinitionsBuilder(G_CTLZ) 959e8d8bef9SDimitry Andric .legalForCartesianProduct( 9605ffd83dbSDimitry Andric {s32, s64, v8s8, v16s8, v4s16, v8s16, v2s32, v4s32}) 96106c3fb27SDimitry Andric .scalarize(1) 96206c3fb27SDimitry Andric .widenScalarToNextPow2(1, /*Min=*/32) 96306c3fb27SDimitry Andric .clampScalar(1, s32, s64) 96406c3fb27SDimitry Andric .scalarSameSizeAs(0, 1); 965fe6060f1SDimitry Andric getActionDefinitionsBuilder(G_CTLZ_ZERO_UNDEF).lower(); 966fe6060f1SDimitry Andric 967fe6060f1SDimitry Andric // TODO: Custom lowering for v2s32, v4s32, v2s64. 968349cc55cSDimitry Andric getActionDefinitionsBuilder(G_BITREVERSE) 969349cc55cSDimitry Andric .legalFor({s32, s64, v8s8, v16s8}) 970349cc55cSDimitry Andric .widenScalarToNextPow2(0, /*Min = */ 32) 971349cc55cSDimitry Andric .clampScalar(0, s32, s64); 972fe6060f1SDimitry Andric 973fe6060f1SDimitry Andric getActionDefinitionsBuilder(G_CTTZ_ZERO_UNDEF).lower(); 974fe6060f1SDimitry Andric 975fe6060f1SDimitry Andric getActionDefinitionsBuilder(G_CTTZ) 97661cfbce3SDimitry Andric .lowerIf(isVector(0)) 97706c3fb27SDimitry Andric .widenScalarToNextPow2(1, /*Min=*/32) 97806c3fb27SDimitry Andric .clampScalar(1, s32, s64) 97906c3fb27SDimitry Andric .scalarSameSizeAs(0, 1) 980bdd1243dSDimitry Andric .legalIf([=](const LegalityQuery &Query) { 981bdd1243dSDimitry Andric return (HasCSSC && typeInSet(0, {s32, s64})(Query)); 982bdd1243dSDimitry Andric }) 983bdd1243dSDimitry Andric .customIf([=](const LegalityQuery &Query) { 984bdd1243dSDimitry Andric return (!HasCSSC && typeInSet(0, {s32, s64})(Query)); 985bdd1243dSDimitry Andric }); 9865ffd83dbSDimitry Andric 9875ffd83dbSDimitry Andric getActionDefinitionsBuilder(G_SHUFFLE_VECTOR) 9885ffd83dbSDimitry Andric .legalIf([=](const LegalityQuery &Query) { 9895ffd83dbSDimitry Andric const LLT &DstTy = Query.Types[0]; 9905ffd83dbSDimitry Andric const LLT &SrcTy = Query.Types[1]; 9915ffd83dbSDimitry Andric // For now just support the TBL2 variant which needs the source vectors 9925ffd83dbSDimitry Andric // to be the same size as the dest. 9935ffd83dbSDimitry Andric if (DstTy != SrcTy) 9945ffd83dbSDimitry Andric return false; 9955f757f3fSDimitry Andric return llvm::is_contained( 9965f757f3fSDimitry Andric {v2s64, v2p0, v2s32, v4s32, v4s16, v16s8, v8s8, v8s16}, DstTy); 9975ffd83dbSDimitry Andric }) 9985ffd83dbSDimitry Andric // G_SHUFFLE_VECTOR can have scalar sources (from 1 x s vectors), we 9995ffd83dbSDimitry Andric // just want those lowered into G_BUILD_VECTOR 10005ffd83dbSDimitry Andric .lowerIf([=](const LegalityQuery &Query) { 10015ffd83dbSDimitry Andric return !Query.Types[1].isVector(); 10025ffd83dbSDimitry Andric }) 1003bdd1243dSDimitry Andric .moreElementsIf( 1004bdd1243dSDimitry Andric [](const LegalityQuery &Query) { 1005bdd1243dSDimitry Andric return Query.Types[0].isVector() && Query.Types[1].isVector() && 1006bdd1243dSDimitry Andric Query.Types[0].getNumElements() > 1007bdd1243dSDimitry Andric Query.Types[1].getNumElements(); 1008bdd1243dSDimitry Andric }, 1009bdd1243dSDimitry Andric changeTo(1, 0)) 1010fe6060f1SDimitry Andric .moreElementsToNextPow2(0) 101106c3fb27SDimitry Andric .moreElementsIf( 101206c3fb27SDimitry Andric [](const LegalityQuery &Query) { 101306c3fb27SDimitry Andric return Query.Types[0].isVector() && Query.Types[1].isVector() && 101406c3fb27SDimitry Andric Query.Types[0].getNumElements() < 101506c3fb27SDimitry Andric Query.Types[1].getNumElements(); 101606c3fb27SDimitry Andric }, 1017*0fca6ea1SDimitry Andric changeTo(0, 1)) 1018*0fca6ea1SDimitry Andric .widenScalarOrEltToNextPow2OrMinSize(0, 8) 1019*0fca6ea1SDimitry Andric .clampNumElements(0, v8s8, v16s8) 1020*0fca6ea1SDimitry Andric .clampNumElements(0, v4s16, v8s16) 1021*0fca6ea1SDimitry Andric .clampNumElements(0, v4s32, v4s32) 1022*0fca6ea1SDimitry Andric .clampNumElements(0, v2s64, v2s64); 10235ffd83dbSDimitry Andric 10245ffd83dbSDimitry Andric getActionDefinitionsBuilder(G_CONCAT_VECTORS) 1025*0fca6ea1SDimitry Andric .legalFor({{v4s32, v2s32}, {v8s16, v4s16}, {v16s8, v8s8}}) 1026*0fca6ea1SDimitry Andric .bitcastIf( 1027*0fca6ea1SDimitry Andric [=](const LegalityQuery &Query) { 1028*0fca6ea1SDimitry Andric return Query.Types[0].getSizeInBits() <= 128 && 1029*0fca6ea1SDimitry Andric Query.Types[1].getSizeInBits() <= 64; 1030*0fca6ea1SDimitry Andric }, 1031*0fca6ea1SDimitry Andric [=](const LegalityQuery &Query) { 1032*0fca6ea1SDimitry Andric const LLT DstTy = Query.Types[0]; 1033*0fca6ea1SDimitry Andric const LLT SrcTy = Query.Types[1]; 1034*0fca6ea1SDimitry Andric return std::pair( 1035*0fca6ea1SDimitry Andric 0, DstTy.changeElementSize(SrcTy.getSizeInBits()) 1036*0fca6ea1SDimitry Andric .changeElementCount( 1037*0fca6ea1SDimitry Andric DstTy.getElementCount().divideCoefficientBy( 1038*0fca6ea1SDimitry Andric SrcTy.getNumElements()))); 1039*0fca6ea1SDimitry Andric }); 10405ffd83dbSDimitry Andric 10415f757f3fSDimitry Andric getActionDefinitionsBuilder(G_JUMP_TABLE).legalFor({p0}); 10425ffd83dbSDimitry Andric 10435f757f3fSDimitry Andric getActionDefinitionsBuilder(G_BRJT).legalFor({{p0, s64}}); 10445ffd83dbSDimitry Andric 10455f757f3fSDimitry Andric getActionDefinitionsBuilder(G_DYN_STACKALLOC).custom(); 10465f757f3fSDimitry Andric 10475f757f3fSDimitry Andric getActionDefinitionsBuilder({G_STACKSAVE, G_STACKRESTORE}).lower(); 10485ffd83dbSDimitry Andric 10491fd87a68SDimitry Andric if (ST.hasMOPS()) { 10501fd87a68SDimitry Andric // G_BZERO is not supported. Currently it is only emitted by 10511fd87a68SDimitry Andric // PreLegalizerCombiner for G_MEMSET with zero constant. 10521fd87a68SDimitry Andric getActionDefinitionsBuilder(G_BZERO).unsupported(); 10531fd87a68SDimitry Andric 10541fd87a68SDimitry Andric getActionDefinitionsBuilder(G_MEMSET) 10551fd87a68SDimitry Andric .legalForCartesianProduct({p0}, {s64}, {s64}) 10561fd87a68SDimitry Andric .customForCartesianProduct({p0}, {s8}, {s64}) 10571fd87a68SDimitry Andric .immIdx(0); // Inform verifier imm idx 0 is handled. 10581fd87a68SDimitry Andric 10591fd87a68SDimitry Andric getActionDefinitionsBuilder({G_MEMCPY, G_MEMMOVE}) 10601fd87a68SDimitry Andric .legalForCartesianProduct({p0}, {p0}, {s64}) 10611fd87a68SDimitry Andric .immIdx(0); // Inform verifier imm idx 0 is handled. 10621fd87a68SDimitry Andric 10631fd87a68SDimitry Andric // G_MEMCPY_INLINE does not have a tailcall immediate 10641fd87a68SDimitry Andric getActionDefinitionsBuilder(G_MEMCPY_INLINE) 10651fd87a68SDimitry Andric .legalForCartesianProduct({p0}, {p0}, {s64}); 10661fd87a68SDimitry Andric 10671fd87a68SDimitry Andric } else { 1068fe6060f1SDimitry Andric getActionDefinitionsBuilder({G_BZERO, G_MEMCPY, G_MEMMOVE, G_MEMSET}) 1069fe6060f1SDimitry Andric .libcall(); 10701fd87a68SDimitry Andric } 1071e8d8bef9SDimitry Andric 1072bdd1243dSDimitry Andric // FIXME: Legal vector types are only legal with NEON. 1073bdd1243dSDimitry Andric auto &ABSActions = getActionDefinitionsBuilder(G_ABS); 1074bdd1243dSDimitry Andric if (HasCSSC) 1075bdd1243dSDimitry Andric ABSActions 1076bdd1243dSDimitry Andric .legalFor({s32, s64}); 1077*0fca6ea1SDimitry Andric ABSActions.legalFor(PackedVectorAllTypeList) 1078*0fca6ea1SDimitry Andric .customIf([=](const LegalityQuery &Q) { 1079*0fca6ea1SDimitry Andric // TODO: Fix suboptimal codegen for 128+ bit types. 1080*0fca6ea1SDimitry Andric LLT SrcTy = Q.Types[0]; 1081*0fca6ea1SDimitry Andric return SrcTy.isScalar() && SrcTy.getSizeInBits() < 128; 1082*0fca6ea1SDimitry Andric }) 1083*0fca6ea1SDimitry Andric .widenScalarIf( 1084*0fca6ea1SDimitry Andric [=](const LegalityQuery &Query) { return Query.Types[0] == v4s8; }, 1085*0fca6ea1SDimitry Andric [=](const LegalityQuery &Query) { return std::make_pair(0, v4s16); }) 1086*0fca6ea1SDimitry Andric .widenScalarIf( 1087*0fca6ea1SDimitry Andric [=](const LegalityQuery &Query) { return Query.Types[0] == v2s16; }, 1088*0fca6ea1SDimitry Andric [=](const LegalityQuery &Query) { return std::make_pair(0, v2s32); }) 1089*0fca6ea1SDimitry Andric .clampNumElements(0, v8s8, v16s8) 1090*0fca6ea1SDimitry Andric .clampNumElements(0, v4s16, v8s16) 1091*0fca6ea1SDimitry Andric .clampNumElements(0, v2s32, v4s32) 1092*0fca6ea1SDimitry Andric .clampNumElements(0, v2s64, v2s64) 1093*0fca6ea1SDimitry Andric .moreElementsToNextPow2(0) 1094*0fca6ea1SDimitry Andric .lower(); 1095e8d8bef9SDimitry Andric 10965f757f3fSDimitry Andric // For fadd reductions we have pairwise operations available. We treat the 10975f757f3fSDimitry Andric // usual legal types as legal and handle the lowering to pairwise instructions 10985f757f3fSDimitry Andric // later. 1099e8d8bef9SDimitry Andric getActionDefinitionsBuilder(G_VECREDUCE_FADD) 11005f757f3fSDimitry Andric .legalFor({{s32, v2s32}, {s32, v4s32}, {s64, v2s64}}) 11015f757f3fSDimitry Andric .legalIf([=](const LegalityQuery &Query) { 11025f757f3fSDimitry Andric const auto &Ty = Query.Types[1]; 11035f757f3fSDimitry Andric return (Ty == v4s16 || Ty == v8s16) && HasFP16; 11045f757f3fSDimitry Andric }) 11055f757f3fSDimitry Andric .minScalarOrElt(0, MinFPScalar) 1106fe6060f1SDimitry Andric .clampMaxNumElements(1, s64, 2) 11075f757f3fSDimitry Andric .clampMaxNumElements(1, s32, 4) 11085f757f3fSDimitry Andric .clampMaxNumElements(1, s16, 8) 1109e8d8bef9SDimitry Andric .lower(); 1110e8d8bef9SDimitry Andric 11111db9f3b2SDimitry Andric // For fmul reductions we need to split up into individual operations. We 11121db9f3b2SDimitry Andric // clamp to 128 bit vectors then to 64bit vectors to produce a cascade of 11131db9f3b2SDimitry Andric // smaller types, followed by scalarizing what remains. 11141db9f3b2SDimitry Andric getActionDefinitionsBuilder(G_VECREDUCE_FMUL) 11151db9f3b2SDimitry Andric .minScalarOrElt(0, MinFPScalar) 11161db9f3b2SDimitry Andric .clampMaxNumElements(1, s64, 2) 11171db9f3b2SDimitry Andric .clampMaxNumElements(1, s32, 4) 11181db9f3b2SDimitry Andric .clampMaxNumElements(1, s16, 8) 11191db9f3b2SDimitry Andric .clampMaxNumElements(1, s32, 2) 11201db9f3b2SDimitry Andric .clampMaxNumElements(1, s16, 4) 11211db9f3b2SDimitry Andric .scalarize(1) 11221db9f3b2SDimitry Andric .lower(); 11231db9f3b2SDimitry Andric 11241db9f3b2SDimitry Andric getActionDefinitionsBuilder({G_VECREDUCE_SEQ_FADD, G_VECREDUCE_SEQ_FMUL}) 11251db9f3b2SDimitry Andric .scalarize(2) 11261db9f3b2SDimitry Andric .lower(); 11271db9f3b2SDimitry Andric 1128e8d8bef9SDimitry Andric getActionDefinitionsBuilder(G_VECREDUCE_ADD) 11295f757f3fSDimitry Andric .legalFor({{s8, v16s8}, 11305f757f3fSDimitry Andric {s8, v8s8}, 11315f757f3fSDimitry Andric {s16, v8s16}, 11325f757f3fSDimitry Andric {s16, v4s16}, 11335f757f3fSDimitry Andric {s32, v4s32}, 11345f757f3fSDimitry Andric {s32, v2s32}, 11355f757f3fSDimitry Andric {s64, v2s64}}) 1136fe6060f1SDimitry Andric .clampMaxNumElements(1, s64, 2) 1137fe6060f1SDimitry Andric .clampMaxNumElements(1, s32, 4) 11385f757f3fSDimitry Andric .clampMaxNumElements(1, s16, 8) 11395f757f3fSDimitry Andric .clampMaxNumElements(1, s8, 16) 11405f757f3fSDimitry Andric .lower(); 11415f757f3fSDimitry Andric 11425f757f3fSDimitry Andric getActionDefinitionsBuilder({G_VECREDUCE_FMIN, G_VECREDUCE_FMAX, 11435f757f3fSDimitry Andric G_VECREDUCE_FMINIMUM, G_VECREDUCE_FMAXIMUM}) 11445f757f3fSDimitry Andric .legalFor({{s32, v4s32}, {s32, v2s32}, {s64, v2s64}}) 11455f757f3fSDimitry Andric .legalIf([=](const LegalityQuery &Query) { 11465f757f3fSDimitry Andric const auto &Ty = Query.Types[1]; 11475f757f3fSDimitry Andric return Query.Types[0] == s16 && (Ty == v8s16 || Ty == v4s16) && HasFP16; 11485f757f3fSDimitry Andric }) 11495f757f3fSDimitry Andric .minScalarOrElt(0, MinFPScalar) 11505f757f3fSDimitry Andric .clampMaxNumElements(1, s64, 2) 11515f757f3fSDimitry Andric .clampMaxNumElements(1, s32, 4) 11525f757f3fSDimitry Andric .clampMaxNumElements(1, s16, 8) 11535f757f3fSDimitry Andric .lower(); 11545f757f3fSDimitry Andric 11555f757f3fSDimitry Andric getActionDefinitionsBuilder(G_VECREDUCE_MUL) 11565f757f3fSDimitry Andric .clampMaxNumElements(1, s32, 2) 11575f757f3fSDimitry Andric .clampMaxNumElements(1, s16, 4) 11585f757f3fSDimitry Andric .clampMaxNumElements(1, s8, 8) 11595f757f3fSDimitry Andric .scalarize(1) 11605f757f3fSDimitry Andric .lower(); 11615f757f3fSDimitry Andric 11625f757f3fSDimitry Andric getActionDefinitionsBuilder( 11635f757f3fSDimitry Andric {G_VECREDUCE_SMIN, G_VECREDUCE_SMAX, G_VECREDUCE_UMIN, G_VECREDUCE_UMAX}) 11645f757f3fSDimitry Andric .legalFor({{s8, v8s8}, 11655f757f3fSDimitry Andric {s8, v16s8}, 11665f757f3fSDimitry Andric {s16, v4s16}, 11675f757f3fSDimitry Andric {s16, v8s16}, 11685f757f3fSDimitry Andric {s32, v2s32}, 11695f757f3fSDimitry Andric {s32, v4s32}}) 1170*0fca6ea1SDimitry Andric .moreElementsIf( 1171*0fca6ea1SDimitry Andric [=](const LegalityQuery &Query) { 1172*0fca6ea1SDimitry Andric return Query.Types[1].isVector() && 1173*0fca6ea1SDimitry Andric Query.Types[1].getElementType() != s8 && 1174*0fca6ea1SDimitry Andric Query.Types[1].getNumElements() & 1; 1175*0fca6ea1SDimitry Andric }, 1176*0fca6ea1SDimitry Andric LegalizeMutations::moreElementsToNextPow2(1)) 11775f757f3fSDimitry Andric .clampMaxNumElements(1, s64, 2) 11785f757f3fSDimitry Andric .clampMaxNumElements(1, s32, 4) 11795f757f3fSDimitry Andric .clampMaxNumElements(1, s16, 8) 11805f757f3fSDimitry Andric .clampMaxNumElements(1, s8, 16) 11815f757f3fSDimitry Andric .scalarize(1) 1182e8d8bef9SDimitry Andric .lower(); 1183e8d8bef9SDimitry Andric 1184349cc55cSDimitry Andric getActionDefinitionsBuilder( 1185349cc55cSDimitry Andric {G_VECREDUCE_OR, G_VECREDUCE_AND, G_VECREDUCE_XOR}) 1186349cc55cSDimitry Andric // Try to break down into smaller vectors as long as they're at least 64 1187349cc55cSDimitry Andric // bits. This lets us use vector operations for some parts of the 1188349cc55cSDimitry Andric // reduction. 1189349cc55cSDimitry Andric .fewerElementsIf( 1190349cc55cSDimitry Andric [=](const LegalityQuery &Q) { 1191349cc55cSDimitry Andric LLT SrcTy = Q.Types[1]; 1192349cc55cSDimitry Andric if (SrcTy.isScalar()) 1193349cc55cSDimitry Andric return false; 1194349cc55cSDimitry Andric if (!isPowerOf2_32(SrcTy.getNumElements())) 1195349cc55cSDimitry Andric return false; 1196349cc55cSDimitry Andric // We can usually perform 64b vector operations. 1197349cc55cSDimitry Andric return SrcTy.getSizeInBits() > 64; 1198349cc55cSDimitry Andric }, 1199349cc55cSDimitry Andric [=](const LegalityQuery &Q) { 1200349cc55cSDimitry Andric LLT SrcTy = Q.Types[1]; 1201349cc55cSDimitry Andric return std::make_pair(1, SrcTy.divide(2)); 1202349cc55cSDimitry Andric }) 1203349cc55cSDimitry Andric .scalarize(1) 1204349cc55cSDimitry Andric .lower(); 1205349cc55cSDimitry Andric 1206*0fca6ea1SDimitry Andric // TODO: Update this to correct handling when adding AArch64/SVE support. 1207*0fca6ea1SDimitry Andric getActionDefinitionsBuilder(G_VECTOR_COMPRESS).lower(); 1208fe6060f1SDimitry Andric 12095f757f3fSDimitry Andric getActionDefinitionsBuilder({G_FSHL, G_FSHR}) 12105f757f3fSDimitry Andric .customFor({{s32, s32}, {s32, s64}, {s64, s64}}) 12115f757f3fSDimitry Andric .lower(); 1212fe6060f1SDimitry Andric 1213fe6060f1SDimitry Andric getActionDefinitionsBuilder(G_ROTR) 1214fe6060f1SDimitry Andric .legalFor({{s32, s64}, {s64, s64}}) 1215fe6060f1SDimitry Andric .customIf([=](const LegalityQuery &Q) { 1216fe6060f1SDimitry Andric return Q.Types[0].isScalar() && Q.Types[1].getScalarSizeInBits() < 64; 1217fe6060f1SDimitry Andric }) 1218fe6060f1SDimitry Andric .lower(); 1219fe6060f1SDimitry Andric getActionDefinitionsBuilder(G_ROTL).lower(); 1220fe6060f1SDimitry Andric 1221fe6060f1SDimitry Andric getActionDefinitionsBuilder({G_SBFX, G_UBFX}) 1222fe6060f1SDimitry Andric .customFor({{s32, s32}, {s64, s64}}); 1223fe6060f1SDimitry Andric 1224fe6060f1SDimitry Andric auto always = [=](const LegalityQuery &Q) { return true; }; 1225bdd1243dSDimitry Andric auto &CTPOPActions = getActionDefinitionsBuilder(G_CTPOP); 1226bdd1243dSDimitry Andric if (HasCSSC) 1227bdd1243dSDimitry Andric CTPOPActions 1228bdd1243dSDimitry Andric .legalFor({{s32, s32}, 1229bdd1243dSDimitry Andric {s64, s64}, 1230bdd1243dSDimitry Andric {v8s8, v8s8}, 1231bdd1243dSDimitry Andric {v16s8, v16s8}}) 1232bdd1243dSDimitry Andric .customFor({{s128, s128}, 1233bdd1243dSDimitry Andric {v2s64, v2s64}, 1234bdd1243dSDimitry Andric {v2s32, v2s32}, 1235bdd1243dSDimitry Andric {v4s32, v4s32}, 1236bdd1243dSDimitry Andric {v4s16, v4s16}, 1237bdd1243dSDimitry Andric {v8s16, v8s16}}); 1238bdd1243dSDimitry Andric else 1239bdd1243dSDimitry Andric CTPOPActions 1240bdd1243dSDimitry Andric .legalFor({{v8s8, v8s8}, 1241bdd1243dSDimitry Andric {v16s8, v16s8}}) 1242fe6060f1SDimitry Andric .customFor({{s32, s32}, 1243fe6060f1SDimitry Andric {s64, s64}, 1244349cc55cSDimitry Andric {s128, s128}, 1245fe6060f1SDimitry Andric {v2s64, v2s64}, 1246fe6060f1SDimitry Andric {v2s32, v2s32}, 1247fe6060f1SDimitry Andric {v4s32, v4s32}, 1248fe6060f1SDimitry Andric {v4s16, v4s16}, 1249fe6060f1SDimitry Andric {v8s16, v8s16}}); 1250bdd1243dSDimitry Andric CTPOPActions 1251bdd1243dSDimitry Andric .clampScalar(0, s32, s128) 1252bdd1243dSDimitry Andric .widenScalarToNextPow2(0) 1253bdd1243dSDimitry Andric .minScalarEltSameAsIf(always, 1, 0) 1254bdd1243dSDimitry Andric .maxScalarEltSameAsIf(always, 1, 0); 1255fe6060f1SDimitry Andric 1256*0fca6ea1SDimitry Andric getActionDefinitionsBuilder({G_UADDSAT, G_SADDSAT, G_USUBSAT, G_SSUBSAT}) 1257*0fca6ea1SDimitry Andric .legalFor({v2s64, v2s32, v4s32, v4s16, v8s16, v8s8, v16s8}) 1258*0fca6ea1SDimitry Andric .clampNumElements(0, v8s8, v16s8) 1259*0fca6ea1SDimitry Andric .clampNumElements(0, v4s16, v8s16) 1260*0fca6ea1SDimitry Andric .clampNumElements(0, v2s32, v4s32) 1261*0fca6ea1SDimitry Andric .clampMaxNumElements(0, s64, 2) 1262*0fca6ea1SDimitry Andric .moreElementsToNextPow2(0) 1263*0fca6ea1SDimitry Andric .lower(); 1264349cc55cSDimitry Andric 1265349cc55cSDimitry Andric // TODO: Libcall support for s128. 1266349cc55cSDimitry Andric // TODO: s16 should be legal with full FP16 support. 1267349cc55cSDimitry Andric getActionDefinitionsBuilder({G_LROUND, G_LLROUND}) 1268349cc55cSDimitry Andric .legalFor({{s64, s32}, {s64, s64}}); 1269349cc55cSDimitry Andric 1270bdd1243dSDimitry Andric // TODO: Custom legalization for mismatched types. 1271*0fca6ea1SDimitry Andric getActionDefinitionsBuilder(G_FCOPYSIGN) 1272*0fca6ea1SDimitry Andric .moreElementsIf( 1273*0fca6ea1SDimitry Andric [](const LegalityQuery &Query) { return Query.Types[0].isScalar(); }, 1274*0fca6ea1SDimitry Andric [=](const LegalityQuery &Query) { 1275*0fca6ea1SDimitry Andric const LLT Ty = Query.Types[0]; 1276*0fca6ea1SDimitry Andric return std::pair(0, LLT::fixed_vector(Ty == s16 ? 4 : 2, Ty)); 1277*0fca6ea1SDimitry Andric }) 1278*0fca6ea1SDimitry Andric .lower(); 1279bdd1243dSDimitry Andric 1280bdd1243dSDimitry Andric getActionDefinitionsBuilder(G_FMAD).lower(); 1281bdd1243dSDimitry Andric 12825f757f3fSDimitry Andric // Access to floating-point environment. 1283297eecfbSDimitry Andric getActionDefinitionsBuilder({G_GET_FPENV, G_SET_FPENV, G_RESET_FPENV, 1284297eecfbSDimitry Andric G_GET_FPMODE, G_SET_FPMODE, G_RESET_FPMODE}) 12855f757f3fSDimitry Andric .libcall(); 12865f757f3fSDimitry Andric 12875f757f3fSDimitry Andric getActionDefinitionsBuilder(G_IS_FPCLASS).lower(); 12885f757f3fSDimitry Andric 12895f757f3fSDimitry Andric getActionDefinitionsBuilder(G_PREFETCH).custom(); 12905f757f3fSDimitry Andric 1291*0fca6ea1SDimitry Andric getActionDefinitionsBuilder({G_SCMP, G_UCMP}).lower(); 1292*0fca6ea1SDimitry Andric 1293fe6060f1SDimitry Andric getLegacyLegalizerInfo().computeTables(); 12945ffd83dbSDimitry Andric verify(*ST.getInstrInfo()); 12955ffd83dbSDimitry Andric } 12965ffd83dbSDimitry Andric 12971db9f3b2SDimitry Andric bool AArch64LegalizerInfo::legalizeCustom( 12981db9f3b2SDimitry Andric LegalizerHelper &Helper, MachineInstr &MI, 12991db9f3b2SDimitry Andric LostDebugLocObserver &LocObserver) const { 13005ffd83dbSDimitry Andric MachineIRBuilder &MIRBuilder = Helper.MIRBuilder; 13015ffd83dbSDimitry Andric MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); 13025ffd83dbSDimitry Andric GISelChangeObserver &Observer = Helper.Observer; 13035ffd83dbSDimitry Andric switch (MI.getOpcode()) { 13045ffd83dbSDimitry Andric default: 13055ffd83dbSDimitry Andric // No idea what to do. 13065ffd83dbSDimitry Andric return false; 13075ffd83dbSDimitry Andric case TargetOpcode::G_VAARG: 13085ffd83dbSDimitry Andric return legalizeVaArg(MI, MRI, MIRBuilder); 13095ffd83dbSDimitry Andric case TargetOpcode::G_LOAD: 13105ffd83dbSDimitry Andric case TargetOpcode::G_STORE: 13115ffd83dbSDimitry Andric return legalizeLoadStore(MI, MRI, MIRBuilder, Observer); 13125ffd83dbSDimitry Andric case TargetOpcode::G_SHL: 13135ffd83dbSDimitry Andric case TargetOpcode::G_ASHR: 13145ffd83dbSDimitry Andric case TargetOpcode::G_LSHR: 13155ffd83dbSDimitry Andric return legalizeShlAshrLshr(MI, MRI, MIRBuilder, Observer); 13165ffd83dbSDimitry Andric case TargetOpcode::G_GLOBAL_VALUE: 13175ffd83dbSDimitry Andric return legalizeSmallCMGlobalValue(MI, MRI, MIRBuilder, Observer); 1318fe6060f1SDimitry Andric case TargetOpcode::G_SBFX: 1319fe6060f1SDimitry Andric case TargetOpcode::G_UBFX: 1320fe6060f1SDimitry Andric return legalizeBitfieldExtract(MI, MRI, Helper); 13215f757f3fSDimitry Andric case TargetOpcode::G_FSHL: 13225f757f3fSDimitry Andric case TargetOpcode::G_FSHR: 13235f757f3fSDimitry Andric return legalizeFunnelShift(MI, MRI, MIRBuilder, Observer, Helper); 1324fe6060f1SDimitry Andric case TargetOpcode::G_ROTR: 1325fe6060f1SDimitry Andric return legalizeRotate(MI, MRI, Helper); 1326fe6060f1SDimitry Andric case TargetOpcode::G_CTPOP: 1327fe6060f1SDimitry Andric return legalizeCTPOP(MI, MRI, Helper); 1328fe6060f1SDimitry Andric case TargetOpcode::G_ATOMIC_CMPXCHG: 1329fe6060f1SDimitry Andric return legalizeAtomicCmpxchg128(MI, MRI, Helper); 1330fe6060f1SDimitry Andric case TargetOpcode::G_CTTZ: 1331fe6060f1SDimitry Andric return legalizeCTTZ(MI, Helper); 13321fd87a68SDimitry Andric case TargetOpcode::G_BZERO: 13331fd87a68SDimitry Andric case TargetOpcode::G_MEMCPY: 13341fd87a68SDimitry Andric case TargetOpcode::G_MEMMOVE: 13351fd87a68SDimitry Andric case TargetOpcode::G_MEMSET: 13361fd87a68SDimitry Andric return legalizeMemOps(MI, Helper); 13375f757f3fSDimitry Andric case TargetOpcode::G_EXTRACT_VECTOR_ELT: 13385f757f3fSDimitry Andric return legalizeExtractVectorElt(MI, MRI, Helper); 13395f757f3fSDimitry Andric case TargetOpcode::G_DYN_STACKALLOC: 13405f757f3fSDimitry Andric return legalizeDynStackAlloc(MI, Helper); 13415f757f3fSDimitry Andric case TargetOpcode::G_PREFETCH: 13425f757f3fSDimitry Andric return legalizePrefetch(MI, Helper); 1343*0fca6ea1SDimitry Andric case TargetOpcode::G_ABS: 1344*0fca6ea1SDimitry Andric return Helper.lowerAbsToCNeg(MI); 1345*0fca6ea1SDimitry Andric case TargetOpcode::G_ICMP: 1346*0fca6ea1SDimitry Andric return legalizeICMP(MI, MRI, MIRBuilder); 13475ffd83dbSDimitry Andric } 13485ffd83dbSDimitry Andric 13495ffd83dbSDimitry Andric llvm_unreachable("expected switch to return"); 13505ffd83dbSDimitry Andric } 13515ffd83dbSDimitry Andric 13525f757f3fSDimitry Andric bool AArch64LegalizerInfo::legalizeFunnelShift(MachineInstr &MI, 13535f757f3fSDimitry Andric MachineRegisterInfo &MRI, 13545f757f3fSDimitry Andric MachineIRBuilder &MIRBuilder, 13555f757f3fSDimitry Andric GISelChangeObserver &Observer, 13565f757f3fSDimitry Andric LegalizerHelper &Helper) const { 13575f757f3fSDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_FSHL || 13585f757f3fSDimitry Andric MI.getOpcode() == TargetOpcode::G_FSHR); 13595f757f3fSDimitry Andric 13605f757f3fSDimitry Andric // Keep as G_FSHR if shift amount is a G_CONSTANT, else use generic 13615f757f3fSDimitry Andric // lowering 13625f757f3fSDimitry Andric Register ShiftNo = MI.getOperand(3).getReg(); 13635f757f3fSDimitry Andric LLT ShiftTy = MRI.getType(ShiftNo); 13645f757f3fSDimitry Andric auto VRegAndVal = getIConstantVRegValWithLookThrough(ShiftNo, MRI); 13655f757f3fSDimitry Andric 13665f757f3fSDimitry Andric // Adjust shift amount according to Opcode (FSHL/FSHR) 13675f757f3fSDimitry Andric // Convert FSHL to FSHR 13685f757f3fSDimitry Andric LLT OperationTy = MRI.getType(MI.getOperand(0).getReg()); 13695f757f3fSDimitry Andric APInt BitWidth(ShiftTy.getSizeInBits(), OperationTy.getSizeInBits(), false); 13705f757f3fSDimitry Andric 13715f757f3fSDimitry Andric // Lower non-constant shifts and leave zero shifts to the optimizer. 13725f757f3fSDimitry Andric if (!VRegAndVal || VRegAndVal->Value.urem(BitWidth) == 0) 13735f757f3fSDimitry Andric return (Helper.lowerFunnelShiftAsShifts(MI) == 13745f757f3fSDimitry Andric LegalizerHelper::LegalizeResult::Legalized); 13755f757f3fSDimitry Andric 13765f757f3fSDimitry Andric APInt Amount = VRegAndVal->Value.urem(BitWidth); 13775f757f3fSDimitry Andric 13785f757f3fSDimitry Andric Amount = MI.getOpcode() == TargetOpcode::G_FSHL ? BitWidth - Amount : Amount; 13795f757f3fSDimitry Andric 13805f757f3fSDimitry Andric // If the instruction is G_FSHR, has a 64-bit G_CONSTANT for shift amount 13815f757f3fSDimitry Andric // in the range of 0 <-> BitWidth, it is legal 13825f757f3fSDimitry Andric if (ShiftTy.getSizeInBits() == 64 && MI.getOpcode() == TargetOpcode::G_FSHR && 13835f757f3fSDimitry Andric VRegAndVal->Value.ult(BitWidth)) 13845f757f3fSDimitry Andric return true; 13855f757f3fSDimitry Andric 13865f757f3fSDimitry Andric // Cast the ShiftNumber to a 64-bit type 13875f757f3fSDimitry Andric auto Cast64 = MIRBuilder.buildConstant(LLT::scalar(64), Amount.zext(64)); 13885f757f3fSDimitry Andric 13895f757f3fSDimitry Andric if (MI.getOpcode() == TargetOpcode::G_FSHR) { 13905f757f3fSDimitry Andric Observer.changingInstr(MI); 13915f757f3fSDimitry Andric MI.getOperand(3).setReg(Cast64.getReg(0)); 13925f757f3fSDimitry Andric Observer.changedInstr(MI); 13935f757f3fSDimitry Andric } 13945f757f3fSDimitry Andric // If Opcode is FSHL, remove the FSHL instruction and create a FSHR 13955f757f3fSDimitry Andric // instruction 13965f757f3fSDimitry Andric else if (MI.getOpcode() == TargetOpcode::G_FSHL) { 13975f757f3fSDimitry Andric MIRBuilder.buildInstr(TargetOpcode::G_FSHR, {MI.getOperand(0).getReg()}, 13985f757f3fSDimitry Andric {MI.getOperand(1).getReg(), MI.getOperand(2).getReg(), 13995f757f3fSDimitry Andric Cast64.getReg(0)}); 14005f757f3fSDimitry Andric MI.eraseFromParent(); 14015f757f3fSDimitry Andric } 14025f757f3fSDimitry Andric return true; 14035f757f3fSDimitry Andric } 14045f757f3fSDimitry Andric 1405*0fca6ea1SDimitry Andric bool AArch64LegalizerInfo::legalizeICMP(MachineInstr &MI, 1406*0fca6ea1SDimitry Andric MachineRegisterInfo &MRI, 1407*0fca6ea1SDimitry Andric MachineIRBuilder &MIRBuilder) const { 1408*0fca6ea1SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 1409*0fca6ea1SDimitry Andric Register SrcReg1 = MI.getOperand(2).getReg(); 1410*0fca6ea1SDimitry Andric Register SrcReg2 = MI.getOperand(3).getReg(); 1411*0fca6ea1SDimitry Andric LLT DstTy = MRI.getType(DstReg); 1412*0fca6ea1SDimitry Andric LLT SrcTy = MRI.getType(SrcReg1); 1413*0fca6ea1SDimitry Andric 1414*0fca6ea1SDimitry Andric // Check the vector types are legal 1415*0fca6ea1SDimitry Andric if (DstTy.getScalarSizeInBits() != SrcTy.getScalarSizeInBits() || 1416*0fca6ea1SDimitry Andric DstTy.getNumElements() != SrcTy.getNumElements() || 1417*0fca6ea1SDimitry Andric (DstTy.getSizeInBits() != 64 && DstTy.getSizeInBits() != 128)) 1418*0fca6ea1SDimitry Andric return false; 1419*0fca6ea1SDimitry Andric 1420*0fca6ea1SDimitry Andric // Lowers G_ICMP NE => G_ICMP EQ to allow better pattern matching for 1421*0fca6ea1SDimitry Andric // following passes 1422*0fca6ea1SDimitry Andric CmpInst::Predicate Pred = (CmpInst::Predicate)MI.getOperand(1).getPredicate(); 1423*0fca6ea1SDimitry Andric if (Pred != CmpInst::ICMP_NE) 1424*0fca6ea1SDimitry Andric return true; 1425*0fca6ea1SDimitry Andric Register CmpReg = 1426*0fca6ea1SDimitry Andric MIRBuilder 1427*0fca6ea1SDimitry Andric .buildICmp(CmpInst::ICMP_EQ, MRI.getType(DstReg), SrcReg1, SrcReg2) 1428*0fca6ea1SDimitry Andric .getReg(0); 1429*0fca6ea1SDimitry Andric MIRBuilder.buildNot(DstReg, CmpReg); 1430*0fca6ea1SDimitry Andric 1431*0fca6ea1SDimitry Andric MI.eraseFromParent(); 1432*0fca6ea1SDimitry Andric return true; 1433*0fca6ea1SDimitry Andric } 1434*0fca6ea1SDimitry Andric 1435fe6060f1SDimitry Andric bool AArch64LegalizerInfo::legalizeRotate(MachineInstr &MI, 1436fe6060f1SDimitry Andric MachineRegisterInfo &MRI, 1437fe6060f1SDimitry Andric LegalizerHelper &Helper) const { 1438fe6060f1SDimitry Andric // To allow for imported patterns to match, we ensure that the rotate amount 1439fe6060f1SDimitry Andric // is 64b with an extension. 1440fe6060f1SDimitry Andric Register AmtReg = MI.getOperand(2).getReg(); 1441fe6060f1SDimitry Andric LLT AmtTy = MRI.getType(AmtReg); 1442fe6060f1SDimitry Andric (void)AmtTy; 1443fe6060f1SDimitry Andric assert(AmtTy.isScalar() && "Expected a scalar rotate"); 1444fe6060f1SDimitry Andric assert(AmtTy.getSizeInBits() < 64 && "Expected this rotate to be legal"); 1445bdd1243dSDimitry Andric auto NewAmt = Helper.MIRBuilder.buildZExt(LLT::scalar(64), AmtReg); 1446fe6060f1SDimitry Andric Helper.Observer.changingInstr(MI); 1447fe6060f1SDimitry Andric MI.getOperand(2).setReg(NewAmt.getReg(0)); 1448fe6060f1SDimitry Andric Helper.Observer.changedInstr(MI); 1449fe6060f1SDimitry Andric return true; 1450fe6060f1SDimitry Andric } 1451fe6060f1SDimitry Andric 1452e8d8bef9SDimitry Andric bool AArch64LegalizerInfo::legalizeSmallCMGlobalValue( 1453e8d8bef9SDimitry Andric MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder, 14545ffd83dbSDimitry Andric GISelChangeObserver &Observer) const { 14555ffd83dbSDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_GLOBAL_VALUE); 14565ffd83dbSDimitry Andric // We do this custom legalization to convert G_GLOBAL_VALUE into target ADRP + 14575ffd83dbSDimitry Andric // G_ADD_LOW instructions. 14585ffd83dbSDimitry Andric // By splitting this here, we can optimize accesses in the small code model by 14595ffd83dbSDimitry Andric // folding in the G_ADD_LOW into the load/store offset. 1460fe6060f1SDimitry Andric auto &GlobalOp = MI.getOperand(1); 1461*0fca6ea1SDimitry Andric // Don't modify an intrinsic call. 1462*0fca6ea1SDimitry Andric if (GlobalOp.isSymbol()) 1463*0fca6ea1SDimitry Andric return true; 1464fe6060f1SDimitry Andric const auto* GV = GlobalOp.getGlobal(); 14655ffd83dbSDimitry Andric if (GV->isThreadLocal()) 14665ffd83dbSDimitry Andric return true; // Don't want to modify TLS vars. 14675ffd83dbSDimitry Andric 14685ffd83dbSDimitry Andric auto &TM = ST->getTargetLowering()->getTargetMachine(); 14695ffd83dbSDimitry Andric unsigned OpFlags = ST->ClassifyGlobalReference(GV, TM); 14705ffd83dbSDimitry Andric 14715ffd83dbSDimitry Andric if (OpFlags & AArch64II::MO_GOT) 14725ffd83dbSDimitry Andric return true; 14735ffd83dbSDimitry Andric 1474fe6060f1SDimitry Andric auto Offset = GlobalOp.getOffset(); 14755ffd83dbSDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 14765ffd83dbSDimitry Andric auto ADRP = MIRBuilder.buildInstr(AArch64::ADRP, {LLT::pointer(0, 64)}, {}) 1477fe6060f1SDimitry Andric .addGlobalAddress(GV, Offset, OpFlags | AArch64II::MO_PAGE); 14785ffd83dbSDimitry Andric // Set the regclass on the dest reg too. 14795ffd83dbSDimitry Andric MRI.setRegClass(ADRP.getReg(0), &AArch64::GPR64RegClass); 14805ffd83dbSDimitry Andric 1481e8d8bef9SDimitry Andric // MO_TAGGED on the page indicates a tagged address. Set the tag now. We do so 1482e8d8bef9SDimitry Andric // by creating a MOVK that sets bits 48-63 of the register to (global address 1483e8d8bef9SDimitry Andric // + 0x100000000 - PC) >> 48. The additional 0x100000000 offset here is to 14845f757f3fSDimitry Andric // prevent an incorrect tag being generated during relocation when the 1485e8d8bef9SDimitry Andric // global appears before the code section. Without the offset, a global at 1486e8d8bef9SDimitry Andric // `0x0f00'0000'0000'1000` (i.e. at `0x1000` with tag `0xf`) that's referenced 1487e8d8bef9SDimitry Andric // by code at `0x2000` would result in `0x0f00'0000'0000'1000 - 0x2000 = 1488e8d8bef9SDimitry Andric // 0x0eff'ffff'ffff'f000`, meaning the tag would be incorrectly set to `0xe` 1489e8d8bef9SDimitry Andric // instead of `0xf`. 1490e8d8bef9SDimitry Andric // This assumes that we're in the small code model so we can assume a binary 1491e8d8bef9SDimitry Andric // size of <= 4GB, which makes the untagged PC relative offset positive. The 1492e8d8bef9SDimitry Andric // binary must also be loaded into address range [0, 2^48). Both of these 1493e8d8bef9SDimitry Andric // properties need to be ensured at runtime when using tagged addresses. 1494e8d8bef9SDimitry Andric if (OpFlags & AArch64II::MO_TAGGED) { 1495fe6060f1SDimitry Andric assert(!Offset && 1496fe6060f1SDimitry Andric "Should not have folded in an offset for a tagged global!"); 1497e8d8bef9SDimitry Andric ADRP = MIRBuilder.buildInstr(AArch64::MOVKXi, {LLT::pointer(0, 64)}, {ADRP}) 1498e8d8bef9SDimitry Andric .addGlobalAddress(GV, 0x100000000, 1499e8d8bef9SDimitry Andric AArch64II::MO_PREL | AArch64II::MO_G3) 1500e8d8bef9SDimitry Andric .addImm(48); 1501e8d8bef9SDimitry Andric MRI.setRegClass(ADRP.getReg(0), &AArch64::GPR64RegClass); 1502e8d8bef9SDimitry Andric } 1503e8d8bef9SDimitry Andric 15045ffd83dbSDimitry Andric MIRBuilder.buildInstr(AArch64::G_ADD_LOW, {DstReg}, {ADRP}) 1505fe6060f1SDimitry Andric .addGlobalAddress(GV, Offset, 15065ffd83dbSDimitry Andric OpFlags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC); 15075ffd83dbSDimitry Andric MI.eraseFromParent(); 15085ffd83dbSDimitry Andric return true; 15095ffd83dbSDimitry Andric } 15105ffd83dbSDimitry Andric 1511e8d8bef9SDimitry Andric bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper, 1512e8d8bef9SDimitry Andric MachineInstr &MI) const { 15135f757f3fSDimitry Andric Intrinsic::ID IntrinsicID = cast<GIntrinsic>(MI).getIntrinsicID(); 15145f757f3fSDimitry Andric switch (IntrinsicID) { 1515349cc55cSDimitry Andric case Intrinsic::vacopy: { 1516349cc55cSDimitry Andric unsigned PtrSize = ST->isTargetILP32() ? 4 : 8; 1517349cc55cSDimitry Andric unsigned VaListSize = 1518349cc55cSDimitry Andric (ST->isTargetDarwin() || ST->isTargetWindows()) 1519349cc55cSDimitry Andric ? PtrSize 1520349cc55cSDimitry Andric : ST->isTargetILP32() ? 20 : 32; 1521349cc55cSDimitry Andric 1522349cc55cSDimitry Andric MachineFunction &MF = *MI.getMF(); 1523349cc55cSDimitry Andric auto Val = MF.getRegInfo().createGenericVirtualRegister( 1524349cc55cSDimitry Andric LLT::scalar(VaListSize * 8)); 1525349cc55cSDimitry Andric MachineIRBuilder MIB(MI); 1526349cc55cSDimitry Andric MIB.buildLoad(Val, MI.getOperand(2), 1527349cc55cSDimitry Andric *MF.getMachineMemOperand(MachinePointerInfo(), 1528349cc55cSDimitry Andric MachineMemOperand::MOLoad, 1529349cc55cSDimitry Andric VaListSize, Align(PtrSize))); 1530349cc55cSDimitry Andric MIB.buildStore(Val, MI.getOperand(1), 1531349cc55cSDimitry Andric *MF.getMachineMemOperand(MachinePointerInfo(), 1532349cc55cSDimitry Andric MachineMemOperand::MOStore, 1533349cc55cSDimitry Andric VaListSize, Align(PtrSize))); 1534349cc55cSDimitry Andric MI.eraseFromParent(); 1535349cc55cSDimitry Andric return true; 1536349cc55cSDimitry Andric } 1537349cc55cSDimitry Andric case Intrinsic::get_dynamic_area_offset: { 1538349cc55cSDimitry Andric MachineIRBuilder &MIB = Helper.MIRBuilder; 1539349cc55cSDimitry Andric MIB.buildConstant(MI.getOperand(0).getReg(), 0); 1540349cc55cSDimitry Andric MI.eraseFromParent(); 1541349cc55cSDimitry Andric return true; 1542349cc55cSDimitry Andric } 15431fd87a68SDimitry Andric case Intrinsic::aarch64_mops_memset_tag: { 15441fd87a68SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS); 15455f757f3fSDimitry Andric // Anyext the value being set to 64 bit (only the bottom 8 bits are read by 15465f757f3fSDimitry Andric // the instruction). 15471fd87a68SDimitry Andric MachineIRBuilder MIB(MI); 15481fd87a68SDimitry Andric auto &Value = MI.getOperand(3); 15495f757f3fSDimitry Andric Register ExtValueReg = MIB.buildAnyExt(LLT::scalar(64), Value).getReg(0); 15505f757f3fSDimitry Andric Value.setReg(ExtValueReg); 1551bdd1243dSDimitry Andric return true; 1552bdd1243dSDimitry Andric } 1553bdd1243dSDimitry Andric case Intrinsic::aarch64_prefetch: { 1554bdd1243dSDimitry Andric MachineIRBuilder MIB(MI); 1555bdd1243dSDimitry Andric auto &AddrVal = MI.getOperand(1); 1556bdd1243dSDimitry Andric 1557bdd1243dSDimitry Andric int64_t IsWrite = MI.getOperand(2).getImm(); 1558bdd1243dSDimitry Andric int64_t Target = MI.getOperand(3).getImm(); 1559bdd1243dSDimitry Andric int64_t IsStream = MI.getOperand(4).getImm(); 1560bdd1243dSDimitry Andric int64_t IsData = MI.getOperand(5).getImm(); 1561bdd1243dSDimitry Andric 1562bdd1243dSDimitry Andric unsigned PrfOp = (IsWrite << 4) | // Load/Store bit 1563bdd1243dSDimitry Andric (!IsData << 3) | // IsDataCache bit 1564bdd1243dSDimitry Andric (Target << 1) | // Cache level bits 1565bdd1243dSDimitry Andric (unsigned)IsStream; // Stream bit 1566bdd1243dSDimitry Andric 15675f757f3fSDimitry Andric MIB.buildInstr(AArch64::G_AARCH64_PREFETCH).addImm(PrfOp).add(AddrVal); 1568bdd1243dSDimitry Andric MI.eraseFromParent(); 1569bdd1243dSDimitry Andric return true; 1570bdd1243dSDimitry Andric } 15715f757f3fSDimitry Andric case Intrinsic::aarch64_neon_uaddv: 15725f757f3fSDimitry Andric case Intrinsic::aarch64_neon_saddv: 15735f757f3fSDimitry Andric case Intrinsic::aarch64_neon_umaxv: 15745f757f3fSDimitry Andric case Intrinsic::aarch64_neon_smaxv: 15755f757f3fSDimitry Andric case Intrinsic::aarch64_neon_uminv: 15765f757f3fSDimitry Andric case Intrinsic::aarch64_neon_sminv: { 15775f757f3fSDimitry Andric MachineIRBuilder MIB(MI); 15785f757f3fSDimitry Andric MachineRegisterInfo &MRI = *MIB.getMRI(); 15795f757f3fSDimitry Andric bool IsSigned = IntrinsicID == Intrinsic::aarch64_neon_saddv || 15805f757f3fSDimitry Andric IntrinsicID == Intrinsic::aarch64_neon_smaxv || 15815f757f3fSDimitry Andric IntrinsicID == Intrinsic::aarch64_neon_sminv; 15825f757f3fSDimitry Andric 15835f757f3fSDimitry Andric auto OldDst = MI.getOperand(0).getReg(); 15845f757f3fSDimitry Andric auto OldDstTy = MRI.getType(OldDst); 15855f757f3fSDimitry Andric LLT NewDstTy = MRI.getType(MI.getOperand(2).getReg()).getElementType(); 15865f757f3fSDimitry Andric if (OldDstTy == NewDstTy) 15875f757f3fSDimitry Andric return true; 15885f757f3fSDimitry Andric 15895f757f3fSDimitry Andric auto NewDst = MRI.createGenericVirtualRegister(NewDstTy); 15905f757f3fSDimitry Andric 15915f757f3fSDimitry Andric Helper.Observer.changingInstr(MI); 15925f757f3fSDimitry Andric MI.getOperand(0).setReg(NewDst); 15935f757f3fSDimitry Andric Helper.Observer.changedInstr(MI); 15945f757f3fSDimitry Andric 15955f757f3fSDimitry Andric MIB.setInsertPt(MIB.getMBB(), ++MIB.getInsertPt()); 15965f757f3fSDimitry Andric MIB.buildExtOrTrunc(IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT, 15975f757f3fSDimitry Andric OldDst, NewDst); 15985f757f3fSDimitry Andric 15995f757f3fSDimitry Andric return true; 16005f757f3fSDimitry Andric } 16017a6dacacSDimitry Andric case Intrinsic::aarch64_neon_uaddlp: 16027a6dacacSDimitry Andric case Intrinsic::aarch64_neon_saddlp: { 16037a6dacacSDimitry Andric MachineIRBuilder MIB(MI); 16047a6dacacSDimitry Andric 16057a6dacacSDimitry Andric unsigned Opc = IntrinsicID == Intrinsic::aarch64_neon_uaddlp 16067a6dacacSDimitry Andric ? AArch64::G_UADDLP 16077a6dacacSDimitry Andric : AArch64::G_SADDLP; 16087a6dacacSDimitry Andric MIB.buildInstr(Opc, {MI.getOperand(0)}, {MI.getOperand(2)}); 16097a6dacacSDimitry Andric MI.eraseFromParent(); 16107a6dacacSDimitry Andric 16117a6dacacSDimitry Andric return true; 16127a6dacacSDimitry Andric } 16137a6dacacSDimitry Andric case Intrinsic::aarch64_neon_uaddlv: 16147a6dacacSDimitry Andric case Intrinsic::aarch64_neon_saddlv: { 16157a6dacacSDimitry Andric MachineIRBuilder MIB(MI); 16167a6dacacSDimitry Andric MachineRegisterInfo &MRI = *MIB.getMRI(); 16177a6dacacSDimitry Andric 16187a6dacacSDimitry Andric unsigned Opc = IntrinsicID == Intrinsic::aarch64_neon_uaddlv 16197a6dacacSDimitry Andric ? AArch64::G_UADDLV 16207a6dacacSDimitry Andric : AArch64::G_SADDLV; 16217a6dacacSDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 16227a6dacacSDimitry Andric Register SrcReg = MI.getOperand(2).getReg(); 16237a6dacacSDimitry Andric LLT DstTy = MRI.getType(DstReg); 16247a6dacacSDimitry Andric 16257a6dacacSDimitry Andric LLT MidTy, ExtTy; 16267a6dacacSDimitry Andric if (DstTy.isScalar() && DstTy.getScalarSizeInBits() <= 32) { 16277a6dacacSDimitry Andric MidTy = LLT::fixed_vector(4, 32); 16287a6dacacSDimitry Andric ExtTy = LLT::scalar(32); 16297a6dacacSDimitry Andric } else { 16307a6dacacSDimitry Andric MidTy = LLT::fixed_vector(2, 64); 16317a6dacacSDimitry Andric ExtTy = LLT::scalar(64); 16327a6dacacSDimitry Andric } 16337a6dacacSDimitry Andric 16347a6dacacSDimitry Andric Register MidReg = 16357a6dacacSDimitry Andric MIB.buildInstr(Opc, {MidTy}, {SrcReg})->getOperand(0).getReg(); 16367a6dacacSDimitry Andric Register ZeroReg = 16377a6dacacSDimitry Andric MIB.buildConstant(LLT::scalar(64), 0)->getOperand(0).getReg(); 16387a6dacacSDimitry Andric Register ExtReg = MIB.buildInstr(AArch64::G_EXTRACT_VECTOR_ELT, {ExtTy}, 16397a6dacacSDimitry Andric {MidReg, ZeroReg}) 16407a6dacacSDimitry Andric .getReg(0); 16417a6dacacSDimitry Andric 16427a6dacacSDimitry Andric if (DstTy.getScalarSizeInBits() < 32) 16437a6dacacSDimitry Andric MIB.buildTrunc(DstReg, ExtReg); 16447a6dacacSDimitry Andric else 16457a6dacacSDimitry Andric MIB.buildCopy(DstReg, ExtReg); 16467a6dacacSDimitry Andric 16477a6dacacSDimitry Andric MI.eraseFromParent(); 16487a6dacacSDimitry Andric 16497a6dacacSDimitry Andric return true; 16507a6dacacSDimitry Andric } 16515f757f3fSDimitry Andric case Intrinsic::aarch64_neon_smax: 16525f757f3fSDimitry Andric case Intrinsic::aarch64_neon_smin: 16535f757f3fSDimitry Andric case Intrinsic::aarch64_neon_umax: 16545f757f3fSDimitry Andric case Intrinsic::aarch64_neon_umin: 16555f757f3fSDimitry Andric case Intrinsic::aarch64_neon_fmax: 1656647cbc5dSDimitry Andric case Intrinsic::aarch64_neon_fmin: 1657647cbc5dSDimitry Andric case Intrinsic::aarch64_neon_fmaxnm: 1658647cbc5dSDimitry Andric case Intrinsic::aarch64_neon_fminnm: { 16595f757f3fSDimitry Andric MachineIRBuilder MIB(MI); 16605f757f3fSDimitry Andric if (IntrinsicID == Intrinsic::aarch64_neon_smax) 16615f757f3fSDimitry Andric MIB.buildSMax(MI.getOperand(0), MI.getOperand(2), MI.getOperand(3)); 16625f757f3fSDimitry Andric else if (IntrinsicID == Intrinsic::aarch64_neon_smin) 16635f757f3fSDimitry Andric MIB.buildSMin(MI.getOperand(0), MI.getOperand(2), MI.getOperand(3)); 16645f757f3fSDimitry Andric else if (IntrinsicID == Intrinsic::aarch64_neon_umax) 16655f757f3fSDimitry Andric MIB.buildUMax(MI.getOperand(0), MI.getOperand(2), MI.getOperand(3)); 16665f757f3fSDimitry Andric else if (IntrinsicID == Intrinsic::aarch64_neon_umin) 16675f757f3fSDimitry Andric MIB.buildUMin(MI.getOperand(0), MI.getOperand(2), MI.getOperand(3)); 16685f757f3fSDimitry Andric else if (IntrinsicID == Intrinsic::aarch64_neon_fmax) 16695f757f3fSDimitry Andric MIB.buildInstr(TargetOpcode::G_FMAXIMUM, {MI.getOperand(0)}, 16705f757f3fSDimitry Andric {MI.getOperand(2), MI.getOperand(3)}); 16715f757f3fSDimitry Andric else if (IntrinsicID == Intrinsic::aarch64_neon_fmin) 16725f757f3fSDimitry Andric MIB.buildInstr(TargetOpcode::G_FMINIMUM, {MI.getOperand(0)}, 16735f757f3fSDimitry Andric {MI.getOperand(2), MI.getOperand(3)}); 1674647cbc5dSDimitry Andric else if (IntrinsicID == Intrinsic::aarch64_neon_fmaxnm) 1675647cbc5dSDimitry Andric MIB.buildInstr(TargetOpcode::G_FMAXNUM, {MI.getOperand(0)}, 1676647cbc5dSDimitry Andric {MI.getOperand(2), MI.getOperand(3)}); 1677647cbc5dSDimitry Andric else if (IntrinsicID == Intrinsic::aarch64_neon_fminnm) 1678647cbc5dSDimitry Andric MIB.buildInstr(TargetOpcode::G_FMINNUM, {MI.getOperand(0)}, 1679647cbc5dSDimitry Andric {MI.getOperand(2), MI.getOperand(3)}); 16805f757f3fSDimitry Andric MI.eraseFromParent(); 16815f757f3fSDimitry Andric return true; 16825f757f3fSDimitry Andric } 1683*0fca6ea1SDimitry Andric case Intrinsic::vector_reverse: 16845f757f3fSDimitry Andric // TODO: Add support for vector_reverse 16855f757f3fSDimitry Andric return false; 1686349cc55cSDimitry Andric } 1687349cc55cSDimitry Andric 16885ffd83dbSDimitry Andric return true; 16895ffd83dbSDimitry Andric } 16905ffd83dbSDimitry Andric 16915ffd83dbSDimitry Andric bool AArch64LegalizerInfo::legalizeShlAshrLshr( 16925ffd83dbSDimitry Andric MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder, 16935ffd83dbSDimitry Andric GISelChangeObserver &Observer) const { 16945ffd83dbSDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_ASHR || 16955ffd83dbSDimitry Andric MI.getOpcode() == TargetOpcode::G_LSHR || 16965ffd83dbSDimitry Andric MI.getOpcode() == TargetOpcode::G_SHL); 16975ffd83dbSDimitry Andric // If the shift amount is a G_CONSTANT, promote it to a 64 bit type so the 16985ffd83dbSDimitry Andric // imported patterns can select it later. Either way, it will be legal. 16995ffd83dbSDimitry Andric Register AmtReg = MI.getOperand(2).getReg(); 1700349cc55cSDimitry Andric auto VRegAndVal = getIConstantVRegValWithLookThrough(AmtReg, MRI); 1701eaeb601bSDimitry Andric if (!VRegAndVal) 17025ffd83dbSDimitry Andric return true; 17035ffd83dbSDimitry Andric // Check the shift amount is in range for an immediate form. 1704e8d8bef9SDimitry Andric int64_t Amount = VRegAndVal->Value.getSExtValue(); 17055ffd83dbSDimitry Andric if (Amount > 31) 17065ffd83dbSDimitry Andric return true; // This will have to remain a register variant. 1707eaeb601bSDimitry Andric auto ExtCst = MIRBuilder.buildConstant(LLT::scalar(64), Amount); 1708e8d8bef9SDimitry Andric Observer.changingInstr(MI); 17095ffd83dbSDimitry Andric MI.getOperand(2).setReg(ExtCst.getReg(0)); 1710e8d8bef9SDimitry Andric Observer.changedInstr(MI); 17115ffd83dbSDimitry Andric return true; 17125ffd83dbSDimitry Andric } 17135ffd83dbSDimitry Andric 1714349cc55cSDimitry Andric static void matchLDPSTPAddrMode(Register Root, Register &Base, int &Offset, 1715349cc55cSDimitry Andric MachineRegisterInfo &MRI) { 1716349cc55cSDimitry Andric Base = Root; 1717349cc55cSDimitry Andric Offset = 0; 1718349cc55cSDimitry Andric 1719349cc55cSDimitry Andric Register NewBase; 1720349cc55cSDimitry Andric int64_t NewOffset; 1721349cc55cSDimitry Andric if (mi_match(Root, MRI, m_GPtrAdd(m_Reg(NewBase), m_ICst(NewOffset))) && 1722349cc55cSDimitry Andric isShiftedInt<7, 3>(NewOffset)) { 1723349cc55cSDimitry Andric Base = NewBase; 1724349cc55cSDimitry Andric Offset = NewOffset; 1725349cc55cSDimitry Andric } 1726349cc55cSDimitry Andric } 1727349cc55cSDimitry Andric 1728fe6060f1SDimitry Andric // FIXME: This should be removed and replaced with the generic bitcast legalize 1729fe6060f1SDimitry Andric // action. 17305ffd83dbSDimitry Andric bool AArch64LegalizerInfo::legalizeLoadStore( 17315ffd83dbSDimitry Andric MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder, 17325ffd83dbSDimitry Andric GISelChangeObserver &Observer) const { 17335ffd83dbSDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_STORE || 17345ffd83dbSDimitry Andric MI.getOpcode() == TargetOpcode::G_LOAD); 17355ffd83dbSDimitry Andric // Here we just try to handle vector loads/stores where our value type might 17365ffd83dbSDimitry Andric // have pointer elements, which the SelectionDAG importer can't handle. To 17375ffd83dbSDimitry Andric // allow the existing patterns for s64 to fire for p0, we just try to bitcast 17385ffd83dbSDimitry Andric // the value to use s64 types. 17395ffd83dbSDimitry Andric 17405ffd83dbSDimitry Andric // Custom legalization requires the instruction, if not deleted, must be fully 17415ffd83dbSDimitry Andric // legalized. In order to allow further legalization of the inst, we create 17425ffd83dbSDimitry Andric // a new instruction and erase the existing one. 17435ffd83dbSDimitry Andric 17445ffd83dbSDimitry Andric Register ValReg = MI.getOperand(0).getReg(); 17455ffd83dbSDimitry Andric const LLT ValTy = MRI.getType(ValReg); 17465ffd83dbSDimitry Andric 1747349cc55cSDimitry Andric if (ValTy == LLT::scalar(128)) { 174806c3fb27SDimitry Andric 174906c3fb27SDimitry Andric AtomicOrdering Ordering = (*MI.memoperands_begin())->getSuccessOrdering(); 175006c3fb27SDimitry Andric bool IsLoad = MI.getOpcode() == TargetOpcode::G_LOAD; 175106c3fb27SDimitry Andric bool IsLoadAcquire = IsLoad && Ordering == AtomicOrdering::Acquire; 175206c3fb27SDimitry Andric bool IsStoreRelease = !IsLoad && Ordering == AtomicOrdering::Release; 175306c3fb27SDimitry Andric bool IsRcpC3 = 175406c3fb27SDimitry Andric ST->hasLSE2() && ST->hasRCPC3() && (IsLoadAcquire || IsStoreRelease); 175506c3fb27SDimitry Andric 1756349cc55cSDimitry Andric LLT s64 = LLT::scalar(64); 175706c3fb27SDimitry Andric 175806c3fb27SDimitry Andric unsigned Opcode; 175906c3fb27SDimitry Andric if (IsRcpC3) { 176006c3fb27SDimitry Andric Opcode = IsLoad ? AArch64::LDIAPPX : AArch64::STILPX; 176106c3fb27SDimitry Andric } else { 176206c3fb27SDimitry Andric // For LSE2, loads/stores should have been converted to monotonic and had 176306c3fb27SDimitry Andric // a fence inserted after them. 176406c3fb27SDimitry Andric assert(Ordering == AtomicOrdering::Monotonic || 176506c3fb27SDimitry Andric Ordering == AtomicOrdering::Unordered); 176606c3fb27SDimitry Andric assert(ST->hasLSE2() && "ldp/stp not single copy atomic without +lse2"); 176706c3fb27SDimitry Andric 176806c3fb27SDimitry Andric Opcode = IsLoad ? AArch64::LDPXi : AArch64::STPXi; 176906c3fb27SDimitry Andric } 177006c3fb27SDimitry Andric 1771349cc55cSDimitry Andric MachineInstrBuilder NewI; 177206c3fb27SDimitry Andric if (IsLoad) { 177306c3fb27SDimitry Andric NewI = MIRBuilder.buildInstr(Opcode, {s64, s64}, {}); 1774bdd1243dSDimitry Andric MIRBuilder.buildMergeLikeInstr( 1775bdd1243dSDimitry Andric ValReg, {NewI->getOperand(0), NewI->getOperand(1)}); 1776349cc55cSDimitry Andric } else { 1777349cc55cSDimitry Andric auto Split = MIRBuilder.buildUnmerge(s64, MI.getOperand(0)); 1778349cc55cSDimitry Andric NewI = MIRBuilder.buildInstr( 177906c3fb27SDimitry Andric Opcode, {}, {Split->getOperand(0), Split->getOperand(1)}); 1780349cc55cSDimitry Andric } 178106c3fb27SDimitry Andric 178206c3fb27SDimitry Andric if (IsRcpC3) { 178306c3fb27SDimitry Andric NewI.addUse(MI.getOperand(1).getReg()); 178406c3fb27SDimitry Andric } else { 1785349cc55cSDimitry Andric Register Base; 1786349cc55cSDimitry Andric int Offset; 1787349cc55cSDimitry Andric matchLDPSTPAddrMode(MI.getOperand(1).getReg(), Base, Offset, MRI); 1788349cc55cSDimitry Andric NewI.addUse(Base); 1789349cc55cSDimitry Andric NewI.addImm(Offset / 8); 179006c3fb27SDimitry Andric } 1791349cc55cSDimitry Andric 1792349cc55cSDimitry Andric NewI.cloneMemRefs(MI); 1793349cc55cSDimitry Andric constrainSelectedInstRegOperands(*NewI, *ST->getInstrInfo(), 1794349cc55cSDimitry Andric *MRI.getTargetRegisterInfo(), 1795349cc55cSDimitry Andric *ST->getRegBankInfo()); 1796349cc55cSDimitry Andric MI.eraseFromParent(); 1797349cc55cSDimitry Andric return true; 1798349cc55cSDimitry Andric } 1799349cc55cSDimitry Andric 1800*0fca6ea1SDimitry Andric if (!ValTy.isPointerVector() || 18015ffd83dbSDimitry Andric ValTy.getElementType().getAddressSpace() != 0) { 18025ffd83dbSDimitry Andric LLVM_DEBUG(dbgs() << "Tried to do custom legalization on wrong load/store"); 18035ffd83dbSDimitry Andric return false; 18045ffd83dbSDimitry Andric } 18055ffd83dbSDimitry Andric 18065ffd83dbSDimitry Andric unsigned PtrSize = ValTy.getElementType().getSizeInBits(); 1807fe6060f1SDimitry Andric const LLT NewTy = LLT::vector(ValTy.getElementCount(), PtrSize); 18085ffd83dbSDimitry Andric auto &MMO = **MI.memoperands_begin(); 1809fe6060f1SDimitry Andric MMO.setType(NewTy); 1810fe6060f1SDimitry Andric 18115ffd83dbSDimitry Andric if (MI.getOpcode() == TargetOpcode::G_STORE) { 18125ffd83dbSDimitry Andric auto Bitcast = MIRBuilder.buildBitcast(NewTy, ValReg); 18135ffd83dbSDimitry Andric MIRBuilder.buildStore(Bitcast.getReg(0), MI.getOperand(1), MMO); 18145ffd83dbSDimitry Andric } else { 18155ffd83dbSDimitry Andric auto NewLoad = MIRBuilder.buildLoad(NewTy, MI.getOperand(1), MMO); 18165ffd83dbSDimitry Andric MIRBuilder.buildBitcast(ValReg, NewLoad); 18175ffd83dbSDimitry Andric } 18185ffd83dbSDimitry Andric MI.eraseFromParent(); 18195ffd83dbSDimitry Andric return true; 18205ffd83dbSDimitry Andric } 18215ffd83dbSDimitry Andric 18225ffd83dbSDimitry Andric bool AArch64LegalizerInfo::legalizeVaArg(MachineInstr &MI, 18235ffd83dbSDimitry Andric MachineRegisterInfo &MRI, 18245ffd83dbSDimitry Andric MachineIRBuilder &MIRBuilder) const { 18255ffd83dbSDimitry Andric MachineFunction &MF = MIRBuilder.getMF(); 18265ffd83dbSDimitry Andric Align Alignment(MI.getOperand(2).getImm()); 18275ffd83dbSDimitry Andric Register Dst = MI.getOperand(0).getReg(); 18285ffd83dbSDimitry Andric Register ListPtr = MI.getOperand(1).getReg(); 18295ffd83dbSDimitry Andric 18305ffd83dbSDimitry Andric LLT PtrTy = MRI.getType(ListPtr); 18315ffd83dbSDimitry Andric LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits()); 18325ffd83dbSDimitry Andric 18335ffd83dbSDimitry Andric const unsigned PtrSize = PtrTy.getSizeInBits() / 8; 18345ffd83dbSDimitry Andric const Align PtrAlign = Align(PtrSize); 18355ffd83dbSDimitry Andric auto List = MIRBuilder.buildLoad( 18365ffd83dbSDimitry Andric PtrTy, ListPtr, 18375ffd83dbSDimitry Andric *MF.getMachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOLoad, 1838fe6060f1SDimitry Andric PtrTy, PtrAlign)); 18395ffd83dbSDimitry Andric 18405ffd83dbSDimitry Andric MachineInstrBuilder DstPtr; 18415ffd83dbSDimitry Andric if (Alignment > PtrAlign) { 18425ffd83dbSDimitry Andric // Realign the list to the actual required alignment. 18435ffd83dbSDimitry Andric auto AlignMinus1 = 18445ffd83dbSDimitry Andric MIRBuilder.buildConstant(IntPtrTy, Alignment.value() - 1); 18455ffd83dbSDimitry Andric auto ListTmp = MIRBuilder.buildPtrAdd(PtrTy, List, AlignMinus1.getReg(0)); 18465ffd83dbSDimitry Andric DstPtr = MIRBuilder.buildMaskLowPtrBits(PtrTy, ListTmp, Log2(Alignment)); 18475ffd83dbSDimitry Andric } else 18485ffd83dbSDimitry Andric DstPtr = List; 18495ffd83dbSDimitry Andric 1850fe6060f1SDimitry Andric LLT ValTy = MRI.getType(Dst); 1851fe6060f1SDimitry Andric uint64_t ValSize = ValTy.getSizeInBits() / 8; 18525ffd83dbSDimitry Andric MIRBuilder.buildLoad( 18535ffd83dbSDimitry Andric Dst, DstPtr, 18545ffd83dbSDimitry Andric *MF.getMachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOLoad, 1855fe6060f1SDimitry Andric ValTy, std::max(Alignment, PtrAlign))); 18565ffd83dbSDimitry Andric 18575ffd83dbSDimitry Andric auto Size = MIRBuilder.buildConstant(IntPtrTy, alignTo(ValSize, PtrAlign)); 18585ffd83dbSDimitry Andric 18595ffd83dbSDimitry Andric auto NewList = MIRBuilder.buildPtrAdd(PtrTy, DstPtr, Size.getReg(0)); 18605ffd83dbSDimitry Andric 18615ffd83dbSDimitry Andric MIRBuilder.buildStore(NewList, ListPtr, 18625ffd83dbSDimitry Andric *MF.getMachineMemOperand(MachinePointerInfo(), 18635ffd83dbSDimitry Andric MachineMemOperand::MOStore, 1864fe6060f1SDimitry Andric PtrTy, PtrAlign)); 18655ffd83dbSDimitry Andric 18665ffd83dbSDimitry Andric MI.eraseFromParent(); 18675ffd83dbSDimitry Andric return true; 18685ffd83dbSDimitry Andric } 1869fe6060f1SDimitry Andric 1870fe6060f1SDimitry Andric bool AArch64LegalizerInfo::legalizeBitfieldExtract( 1871fe6060f1SDimitry Andric MachineInstr &MI, MachineRegisterInfo &MRI, LegalizerHelper &Helper) const { 1872fe6060f1SDimitry Andric // Only legal if we can select immediate forms. 1873fe6060f1SDimitry Andric // TODO: Lower this otherwise. 1874349cc55cSDimitry Andric return getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI) && 1875349cc55cSDimitry Andric getIConstantVRegValWithLookThrough(MI.getOperand(3).getReg(), MRI); 1876fe6060f1SDimitry Andric } 1877fe6060f1SDimitry Andric 1878fe6060f1SDimitry Andric bool AArch64LegalizerInfo::legalizeCTPOP(MachineInstr &MI, 1879fe6060f1SDimitry Andric MachineRegisterInfo &MRI, 1880fe6060f1SDimitry Andric LegalizerHelper &Helper) const { 1881bdd1243dSDimitry Andric // When there is no integer popcount instruction (FEAT_CSSC isn't available), 1882bdd1243dSDimitry Andric // it can be more efficiently lowered to the following sequence that uses 1883bdd1243dSDimitry Andric // AdvSIMD registers/instructions as long as the copies to/from the AdvSIMD 1884bdd1243dSDimitry Andric // registers are cheap. 1885fe6060f1SDimitry Andric // FMOV D0, X0 // copy 64-bit int to vector, high bits zero'd 1886fe6060f1SDimitry Andric // CNT V0.8B, V0.8B // 8xbyte pop-counts 1887fe6060f1SDimitry Andric // ADDV B0, V0.8B // sum 8xbyte pop-counts 1888fe6060f1SDimitry Andric // UMOV X0, V0.B[0] // copy byte result back to integer reg 1889fe6060f1SDimitry Andric // 1890fe6060f1SDimitry Andric // For 128 bit vector popcounts, we lower to the following sequence: 1891fe6060f1SDimitry Andric // cnt.16b v0, v0 // v8s16, v4s32, v2s64 1892fe6060f1SDimitry Andric // uaddlp.8h v0, v0 // v8s16, v4s32, v2s64 1893fe6060f1SDimitry Andric // uaddlp.4s v0, v0 // v4s32, v2s64 1894fe6060f1SDimitry Andric // uaddlp.2d v0, v0 // v2s64 1895fe6060f1SDimitry Andric // 1896fe6060f1SDimitry Andric // For 64 bit vector popcounts, we lower to the following sequence: 1897fe6060f1SDimitry Andric // cnt.8b v0, v0 // v4s16, v2s32 1898fe6060f1SDimitry Andric // uaddlp.4h v0, v0 // v4s16, v2s32 1899fe6060f1SDimitry Andric // uaddlp.2s v0, v0 // v2s32 1900fe6060f1SDimitry Andric 1901fe6060f1SDimitry Andric MachineIRBuilder &MIRBuilder = Helper.MIRBuilder; 1902fe6060f1SDimitry Andric Register Dst = MI.getOperand(0).getReg(); 1903fe6060f1SDimitry Andric Register Val = MI.getOperand(1).getReg(); 1904fe6060f1SDimitry Andric LLT Ty = MRI.getType(Val); 1905bdd1243dSDimitry Andric unsigned Size = Ty.getSizeInBits(); 1906fe6060f1SDimitry Andric 1907fe6060f1SDimitry Andric assert(Ty == MRI.getType(Dst) && 1908fe6060f1SDimitry Andric "Expected src and dst to have the same type!"); 1909bdd1243dSDimitry Andric 1910bdd1243dSDimitry Andric if (ST->hasCSSC() && Ty.isScalar() && Size == 128) { 1911bdd1243dSDimitry Andric LLT s64 = LLT::scalar(64); 1912bdd1243dSDimitry Andric 1913bdd1243dSDimitry Andric auto Split = MIRBuilder.buildUnmerge(s64, Val); 1914bdd1243dSDimitry Andric auto CTPOP1 = MIRBuilder.buildCTPOP(s64, Split->getOperand(0)); 1915bdd1243dSDimitry Andric auto CTPOP2 = MIRBuilder.buildCTPOP(s64, Split->getOperand(1)); 1916bdd1243dSDimitry Andric auto Add = MIRBuilder.buildAdd(s64, CTPOP1, CTPOP2); 1917bdd1243dSDimitry Andric 1918bdd1243dSDimitry Andric MIRBuilder.buildZExt(Dst, Add); 1919bdd1243dSDimitry Andric MI.eraseFromParent(); 1920bdd1243dSDimitry Andric return true; 1921bdd1243dSDimitry Andric } 1922bdd1243dSDimitry Andric 1923bdd1243dSDimitry Andric if (!ST->hasNEON() || 1924bdd1243dSDimitry Andric MI.getMF()->getFunction().hasFnAttribute(Attribute::NoImplicitFloat)) { 1925bdd1243dSDimitry Andric // Use generic lowering when custom lowering is not possible. 1926bdd1243dSDimitry Andric return Ty.isScalar() && (Size == 32 || Size == 64) && 1927bdd1243dSDimitry Andric Helper.lowerBitCount(MI) == 1928bdd1243dSDimitry Andric LegalizerHelper::LegalizeResult::Legalized; 1929bdd1243dSDimitry Andric } 1930fe6060f1SDimitry Andric 1931fe6060f1SDimitry Andric // Pre-conditioning: widen Val up to the nearest vector type. 1932fe6060f1SDimitry Andric // s32,s64,v4s16,v2s32 -> v8i8 1933fe6060f1SDimitry Andric // v8s16,v4s32,v2s64 -> v16i8 1934fe6060f1SDimitry Andric LLT VTy = Size == 128 ? LLT::fixed_vector(16, 8) : LLT::fixed_vector(8, 8); 1935fe6060f1SDimitry Andric if (Ty.isScalar()) { 1936349cc55cSDimitry Andric assert((Size == 32 || Size == 64 || Size == 128) && "Expected only 32, 64, or 128 bit scalars!"); 1937fe6060f1SDimitry Andric if (Size == 32) { 1938fe6060f1SDimitry Andric Val = MIRBuilder.buildZExt(LLT::scalar(64), Val).getReg(0); 1939fe6060f1SDimitry Andric } 1940fe6060f1SDimitry Andric } 1941fe6060f1SDimitry Andric Val = MIRBuilder.buildBitcast(VTy, Val).getReg(0); 1942fe6060f1SDimitry Andric 1943fe6060f1SDimitry Andric // Count bits in each byte-sized lane. 1944fe6060f1SDimitry Andric auto CTPOP = MIRBuilder.buildCTPOP(VTy, Val); 1945fe6060f1SDimitry Andric 1946fe6060f1SDimitry Andric // Sum across lanes. 1947*0fca6ea1SDimitry Andric 1948*0fca6ea1SDimitry Andric if (ST->hasDotProd() && Ty.isVector() && Ty.getNumElements() >= 2 && 1949*0fca6ea1SDimitry Andric Ty.getScalarSizeInBits() != 16) { 1950*0fca6ea1SDimitry Andric LLT Dt = Ty == LLT::fixed_vector(2, 64) ? LLT::fixed_vector(4, 32) : Ty; 1951*0fca6ea1SDimitry Andric auto Zeros = MIRBuilder.buildConstant(Dt, 0); 1952*0fca6ea1SDimitry Andric auto Ones = MIRBuilder.buildConstant(VTy, 1); 1953*0fca6ea1SDimitry Andric MachineInstrBuilder Sum; 1954*0fca6ea1SDimitry Andric 1955*0fca6ea1SDimitry Andric if (Ty == LLT::fixed_vector(2, 64)) { 1956*0fca6ea1SDimitry Andric auto UDOT = 1957*0fca6ea1SDimitry Andric MIRBuilder.buildInstr(AArch64::G_UDOT, {Dt}, {Zeros, Ones, CTPOP}); 1958*0fca6ea1SDimitry Andric Sum = MIRBuilder.buildInstr(AArch64::G_UADDLP, {Ty}, {UDOT}); 1959*0fca6ea1SDimitry Andric } else if (Ty == LLT::fixed_vector(4, 32)) { 1960*0fca6ea1SDimitry Andric Sum = MIRBuilder.buildInstr(AArch64::G_UDOT, {Dt}, {Zeros, Ones, CTPOP}); 1961*0fca6ea1SDimitry Andric } else if (Ty == LLT::fixed_vector(2, 32)) { 1962*0fca6ea1SDimitry Andric Sum = MIRBuilder.buildInstr(AArch64::G_UDOT, {Dt}, {Zeros, Ones, CTPOP}); 1963*0fca6ea1SDimitry Andric } else { 1964*0fca6ea1SDimitry Andric llvm_unreachable("unexpected vector shape"); 1965*0fca6ea1SDimitry Andric } 1966*0fca6ea1SDimitry Andric 1967*0fca6ea1SDimitry Andric Sum->getOperand(0).setReg(Dst); 1968*0fca6ea1SDimitry Andric MI.eraseFromParent(); 1969*0fca6ea1SDimitry Andric return true; 1970*0fca6ea1SDimitry Andric } 1971*0fca6ea1SDimitry Andric 1972fe6060f1SDimitry Andric Register HSum = CTPOP.getReg(0); 1973fe6060f1SDimitry Andric unsigned Opc; 1974fe6060f1SDimitry Andric SmallVector<LLT> HAddTys; 1975fe6060f1SDimitry Andric if (Ty.isScalar()) { 1976fe6060f1SDimitry Andric Opc = Intrinsic::aarch64_neon_uaddlv; 1977fe6060f1SDimitry Andric HAddTys.push_back(LLT::scalar(32)); 1978fe6060f1SDimitry Andric } else if (Ty == LLT::fixed_vector(8, 16)) { 1979fe6060f1SDimitry Andric Opc = Intrinsic::aarch64_neon_uaddlp; 1980fe6060f1SDimitry Andric HAddTys.push_back(LLT::fixed_vector(8, 16)); 1981fe6060f1SDimitry Andric } else if (Ty == LLT::fixed_vector(4, 32)) { 1982fe6060f1SDimitry Andric Opc = Intrinsic::aarch64_neon_uaddlp; 1983fe6060f1SDimitry Andric HAddTys.push_back(LLT::fixed_vector(8, 16)); 1984fe6060f1SDimitry Andric HAddTys.push_back(LLT::fixed_vector(4, 32)); 1985fe6060f1SDimitry Andric } else if (Ty == LLT::fixed_vector(2, 64)) { 1986fe6060f1SDimitry Andric Opc = Intrinsic::aarch64_neon_uaddlp; 1987fe6060f1SDimitry Andric HAddTys.push_back(LLT::fixed_vector(8, 16)); 1988fe6060f1SDimitry Andric HAddTys.push_back(LLT::fixed_vector(4, 32)); 1989fe6060f1SDimitry Andric HAddTys.push_back(LLT::fixed_vector(2, 64)); 1990fe6060f1SDimitry Andric } else if (Ty == LLT::fixed_vector(4, 16)) { 1991fe6060f1SDimitry Andric Opc = Intrinsic::aarch64_neon_uaddlp; 1992fe6060f1SDimitry Andric HAddTys.push_back(LLT::fixed_vector(4, 16)); 1993fe6060f1SDimitry Andric } else if (Ty == LLT::fixed_vector(2, 32)) { 1994fe6060f1SDimitry Andric Opc = Intrinsic::aarch64_neon_uaddlp; 1995fe6060f1SDimitry Andric HAddTys.push_back(LLT::fixed_vector(4, 16)); 1996fe6060f1SDimitry Andric HAddTys.push_back(LLT::fixed_vector(2, 32)); 1997fe6060f1SDimitry Andric } else 1998fe6060f1SDimitry Andric llvm_unreachable("unexpected vector shape"); 1999fe6060f1SDimitry Andric MachineInstrBuilder UADD; 2000fe6060f1SDimitry Andric for (LLT HTy : HAddTys) { 20015f757f3fSDimitry Andric UADD = MIRBuilder.buildIntrinsic(Opc, {HTy}).addUse(HSum); 2002fe6060f1SDimitry Andric HSum = UADD.getReg(0); 2003fe6060f1SDimitry Andric } 2004fe6060f1SDimitry Andric 2005fe6060f1SDimitry Andric // Post-conditioning. 2006349cc55cSDimitry Andric if (Ty.isScalar() && (Size == 64 || Size == 128)) 2007fe6060f1SDimitry Andric MIRBuilder.buildZExt(Dst, UADD); 2008fe6060f1SDimitry Andric else 2009fe6060f1SDimitry Andric UADD->getOperand(0).setReg(Dst); 2010fe6060f1SDimitry Andric MI.eraseFromParent(); 2011fe6060f1SDimitry Andric return true; 2012fe6060f1SDimitry Andric } 2013fe6060f1SDimitry Andric 2014fe6060f1SDimitry Andric bool AArch64LegalizerInfo::legalizeAtomicCmpxchg128( 2015fe6060f1SDimitry Andric MachineInstr &MI, MachineRegisterInfo &MRI, LegalizerHelper &Helper) const { 2016fe6060f1SDimitry Andric MachineIRBuilder &MIRBuilder = Helper.MIRBuilder; 2017fe6060f1SDimitry Andric LLT s64 = LLT::scalar(64); 2018fe6060f1SDimitry Andric auto Addr = MI.getOperand(1).getReg(); 2019fe6060f1SDimitry Andric auto DesiredI = MIRBuilder.buildUnmerge({s64, s64}, MI.getOperand(2)); 2020fe6060f1SDimitry Andric auto NewI = MIRBuilder.buildUnmerge({s64, s64}, MI.getOperand(3)); 2021fe6060f1SDimitry Andric auto DstLo = MRI.createGenericVirtualRegister(s64); 2022fe6060f1SDimitry Andric auto DstHi = MRI.createGenericVirtualRegister(s64); 2023fe6060f1SDimitry Andric 2024fe6060f1SDimitry Andric MachineInstrBuilder CAS; 2025fe6060f1SDimitry Andric if (ST->hasLSE()) { 2026fe6060f1SDimitry Andric // We have 128-bit CASP instructions taking XSeqPair registers, which are 2027fe6060f1SDimitry Andric // s128. We need the merge/unmerge to bracket the expansion and pair up with 2028fe6060f1SDimitry Andric // the rest of the MIR so we must reassemble the extracted registers into a 2029fe6060f1SDimitry Andric // 128-bit known-regclass one with code like this: 2030fe6060f1SDimitry Andric // 2031fe6060f1SDimitry Andric // %in1 = REG_SEQUENCE Lo, Hi ; One for each input 2032fe6060f1SDimitry Andric // %out = CASP %in1, ... 2033fe6060f1SDimitry Andric // %OldLo = G_EXTRACT %out, 0 2034fe6060f1SDimitry Andric // %OldHi = G_EXTRACT %out, 64 2035fe6060f1SDimitry Andric auto Ordering = (*MI.memoperands_begin())->getMergedOrdering(); 2036fe6060f1SDimitry Andric unsigned Opcode; 2037fe6060f1SDimitry Andric switch (Ordering) { 2038fe6060f1SDimitry Andric case AtomicOrdering::Acquire: 2039fe6060f1SDimitry Andric Opcode = AArch64::CASPAX; 2040fe6060f1SDimitry Andric break; 2041fe6060f1SDimitry Andric case AtomicOrdering::Release: 2042fe6060f1SDimitry Andric Opcode = AArch64::CASPLX; 2043fe6060f1SDimitry Andric break; 2044fe6060f1SDimitry Andric case AtomicOrdering::AcquireRelease: 2045fe6060f1SDimitry Andric case AtomicOrdering::SequentiallyConsistent: 2046fe6060f1SDimitry Andric Opcode = AArch64::CASPALX; 2047fe6060f1SDimitry Andric break; 2048fe6060f1SDimitry Andric default: 2049fe6060f1SDimitry Andric Opcode = AArch64::CASPX; 2050fe6060f1SDimitry Andric break; 2051fe6060f1SDimitry Andric } 2052fe6060f1SDimitry Andric 2053fe6060f1SDimitry Andric LLT s128 = LLT::scalar(128); 2054fe6060f1SDimitry Andric auto CASDst = MRI.createGenericVirtualRegister(s128); 2055fe6060f1SDimitry Andric auto CASDesired = MRI.createGenericVirtualRegister(s128); 2056fe6060f1SDimitry Andric auto CASNew = MRI.createGenericVirtualRegister(s128); 2057fe6060f1SDimitry Andric MIRBuilder.buildInstr(TargetOpcode::REG_SEQUENCE, {CASDesired}, {}) 2058fe6060f1SDimitry Andric .addUse(DesiredI->getOperand(0).getReg()) 2059fe6060f1SDimitry Andric .addImm(AArch64::sube64) 2060fe6060f1SDimitry Andric .addUse(DesiredI->getOperand(1).getReg()) 2061fe6060f1SDimitry Andric .addImm(AArch64::subo64); 2062fe6060f1SDimitry Andric MIRBuilder.buildInstr(TargetOpcode::REG_SEQUENCE, {CASNew}, {}) 2063fe6060f1SDimitry Andric .addUse(NewI->getOperand(0).getReg()) 2064fe6060f1SDimitry Andric .addImm(AArch64::sube64) 2065fe6060f1SDimitry Andric .addUse(NewI->getOperand(1).getReg()) 2066fe6060f1SDimitry Andric .addImm(AArch64::subo64); 2067fe6060f1SDimitry Andric 2068fe6060f1SDimitry Andric CAS = MIRBuilder.buildInstr(Opcode, {CASDst}, {CASDesired, CASNew, Addr}); 2069fe6060f1SDimitry Andric 2070fe6060f1SDimitry Andric MIRBuilder.buildExtract({DstLo}, {CASDst}, 0); 2071fe6060f1SDimitry Andric MIRBuilder.buildExtract({DstHi}, {CASDst}, 64); 2072fe6060f1SDimitry Andric } else { 2073fe6060f1SDimitry Andric // The -O0 CMP_SWAP_128 is friendlier to generate code for because LDXP/STXP 2074fe6060f1SDimitry Andric // can take arbitrary registers so it just has the normal GPR64 operands the 2075fe6060f1SDimitry Andric // rest of AArch64 is expecting. 2076fe6060f1SDimitry Andric auto Ordering = (*MI.memoperands_begin())->getMergedOrdering(); 2077fe6060f1SDimitry Andric unsigned Opcode; 2078fe6060f1SDimitry Andric switch (Ordering) { 2079fe6060f1SDimitry Andric case AtomicOrdering::Acquire: 2080fe6060f1SDimitry Andric Opcode = AArch64::CMP_SWAP_128_ACQUIRE; 2081fe6060f1SDimitry Andric break; 2082fe6060f1SDimitry Andric case AtomicOrdering::Release: 2083fe6060f1SDimitry Andric Opcode = AArch64::CMP_SWAP_128_RELEASE; 2084fe6060f1SDimitry Andric break; 2085fe6060f1SDimitry Andric case AtomicOrdering::AcquireRelease: 2086fe6060f1SDimitry Andric case AtomicOrdering::SequentiallyConsistent: 2087fe6060f1SDimitry Andric Opcode = AArch64::CMP_SWAP_128; 2088fe6060f1SDimitry Andric break; 2089fe6060f1SDimitry Andric default: 2090fe6060f1SDimitry Andric Opcode = AArch64::CMP_SWAP_128_MONOTONIC; 2091fe6060f1SDimitry Andric break; 2092fe6060f1SDimitry Andric } 2093fe6060f1SDimitry Andric 2094fe6060f1SDimitry Andric auto Scratch = MRI.createVirtualRegister(&AArch64::GPR64RegClass); 2095fe6060f1SDimitry Andric CAS = MIRBuilder.buildInstr(Opcode, {DstLo, DstHi, Scratch}, 2096fe6060f1SDimitry Andric {Addr, DesiredI->getOperand(0), 2097fe6060f1SDimitry Andric DesiredI->getOperand(1), NewI->getOperand(0), 2098fe6060f1SDimitry Andric NewI->getOperand(1)}); 2099fe6060f1SDimitry Andric } 2100fe6060f1SDimitry Andric 2101fe6060f1SDimitry Andric CAS.cloneMemRefs(MI); 2102fe6060f1SDimitry Andric constrainSelectedInstRegOperands(*CAS, *ST->getInstrInfo(), 2103fe6060f1SDimitry Andric *MRI.getTargetRegisterInfo(), 2104fe6060f1SDimitry Andric *ST->getRegBankInfo()); 2105fe6060f1SDimitry Andric 2106bdd1243dSDimitry Andric MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), {DstLo, DstHi}); 2107fe6060f1SDimitry Andric MI.eraseFromParent(); 2108fe6060f1SDimitry Andric return true; 2109fe6060f1SDimitry Andric } 2110fe6060f1SDimitry Andric 2111fe6060f1SDimitry Andric bool AArch64LegalizerInfo::legalizeCTTZ(MachineInstr &MI, 2112fe6060f1SDimitry Andric LegalizerHelper &Helper) const { 2113fe6060f1SDimitry Andric MachineIRBuilder &MIRBuilder = Helper.MIRBuilder; 2114fe6060f1SDimitry Andric MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); 2115fe6060f1SDimitry Andric LLT Ty = MRI.getType(MI.getOperand(1).getReg()); 2116fe6060f1SDimitry Andric auto BitReverse = MIRBuilder.buildBitReverse(Ty, MI.getOperand(1)); 2117fe6060f1SDimitry Andric MIRBuilder.buildCTLZ(MI.getOperand(0).getReg(), BitReverse); 2118fe6060f1SDimitry Andric MI.eraseFromParent(); 2119fe6060f1SDimitry Andric return true; 2120fe6060f1SDimitry Andric } 21211fd87a68SDimitry Andric 21221fd87a68SDimitry Andric bool AArch64LegalizerInfo::legalizeMemOps(MachineInstr &MI, 21231fd87a68SDimitry Andric LegalizerHelper &Helper) const { 21241fd87a68SDimitry Andric MachineIRBuilder &MIRBuilder = Helper.MIRBuilder; 21251fd87a68SDimitry Andric 21261fd87a68SDimitry Andric // Tagged version MOPSMemorySetTagged is legalised in legalizeIntrinsic 21271fd87a68SDimitry Andric if (MI.getOpcode() == TargetOpcode::G_MEMSET) { 21285f757f3fSDimitry Andric // Anyext the value being set to 64 bit (only the bottom 8 bits are read by 21295f757f3fSDimitry Andric // the instruction). 21301fd87a68SDimitry Andric auto &Value = MI.getOperand(1); 21315f757f3fSDimitry Andric Register ExtValueReg = 21321fd87a68SDimitry Andric MIRBuilder.buildAnyExt(LLT::scalar(64), Value).getReg(0); 21335f757f3fSDimitry Andric Value.setReg(ExtValueReg); 21341fd87a68SDimitry Andric return true; 21351fd87a68SDimitry Andric } 21361fd87a68SDimitry Andric 21371fd87a68SDimitry Andric return false; 21381fd87a68SDimitry Andric } 2139bdd1243dSDimitry Andric 21405f757f3fSDimitry Andric bool AArch64LegalizerInfo::legalizeExtractVectorElt( 21415f757f3fSDimitry Andric MachineInstr &MI, MachineRegisterInfo &MRI, LegalizerHelper &Helper) const { 21425f757f3fSDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT); 21435f757f3fSDimitry Andric auto VRegAndVal = 21445f757f3fSDimitry Andric getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI); 21455f757f3fSDimitry Andric if (VRegAndVal) 21465f757f3fSDimitry Andric return true; 21475f757f3fSDimitry Andric return Helper.lowerExtractInsertVectorElt(MI) != 21485f757f3fSDimitry Andric LegalizerHelper::LegalizeResult::UnableToLegalize; 21495f757f3fSDimitry Andric } 21505f757f3fSDimitry Andric 21515f757f3fSDimitry Andric bool AArch64LegalizerInfo::legalizeDynStackAlloc( 21525f757f3fSDimitry Andric MachineInstr &MI, LegalizerHelper &Helper) const { 21535f757f3fSDimitry Andric MachineFunction &MF = *MI.getParent()->getParent(); 21545f757f3fSDimitry Andric MachineIRBuilder &MIRBuilder = Helper.MIRBuilder; 21555f757f3fSDimitry Andric MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); 21565f757f3fSDimitry Andric 21575f757f3fSDimitry Andric // If stack probing is not enabled for this function, use the default 21585f757f3fSDimitry Andric // lowering. 21595f757f3fSDimitry Andric if (!MF.getFunction().hasFnAttribute("probe-stack") || 21605f757f3fSDimitry Andric MF.getFunction().getFnAttribute("probe-stack").getValueAsString() != 21615f757f3fSDimitry Andric "inline-asm") { 21625f757f3fSDimitry Andric Helper.lowerDynStackAlloc(MI); 21635f757f3fSDimitry Andric return true; 21645f757f3fSDimitry Andric } 21655f757f3fSDimitry Andric 21665f757f3fSDimitry Andric Register Dst = MI.getOperand(0).getReg(); 21675f757f3fSDimitry Andric Register AllocSize = MI.getOperand(1).getReg(); 21685f757f3fSDimitry Andric Align Alignment = assumeAligned(MI.getOperand(2).getImm()); 21695f757f3fSDimitry Andric 21705f757f3fSDimitry Andric assert(MRI.getType(Dst) == LLT::pointer(0, 64) && 21715f757f3fSDimitry Andric "Unexpected type for dynamic alloca"); 21725f757f3fSDimitry Andric assert(MRI.getType(AllocSize) == LLT::scalar(64) && 21735f757f3fSDimitry Andric "Unexpected type for dynamic alloca"); 21745f757f3fSDimitry Andric 21755f757f3fSDimitry Andric LLT PtrTy = MRI.getType(Dst); 21765f757f3fSDimitry Andric Register SPReg = 21775f757f3fSDimitry Andric Helper.getTargetLowering().getStackPointerRegisterToSaveRestore(); 21785f757f3fSDimitry Andric Register SPTmp = 21795f757f3fSDimitry Andric Helper.getDynStackAllocTargetPtr(SPReg, AllocSize, Alignment, PtrTy); 21805f757f3fSDimitry Andric auto NewMI = 21815f757f3fSDimitry Andric MIRBuilder.buildInstr(AArch64::PROBED_STACKALLOC_DYN, {}, {SPTmp}); 21825f757f3fSDimitry Andric MRI.setRegClass(NewMI.getReg(0), &AArch64::GPR64commonRegClass); 21835f757f3fSDimitry Andric MIRBuilder.setInsertPt(*NewMI->getParent(), NewMI); 21845f757f3fSDimitry Andric MIRBuilder.buildCopy(Dst, SPTmp); 21855f757f3fSDimitry Andric 21865f757f3fSDimitry Andric MI.eraseFromParent(); 21875f757f3fSDimitry Andric return true; 21885f757f3fSDimitry Andric } 21895f757f3fSDimitry Andric 21905f757f3fSDimitry Andric bool AArch64LegalizerInfo::legalizePrefetch(MachineInstr &MI, 21915f757f3fSDimitry Andric LegalizerHelper &Helper) const { 21925f757f3fSDimitry Andric MachineIRBuilder &MIB = Helper.MIRBuilder; 21935f757f3fSDimitry Andric auto &AddrVal = MI.getOperand(0); 21945f757f3fSDimitry Andric 21955f757f3fSDimitry Andric int64_t IsWrite = MI.getOperand(1).getImm(); 21965f757f3fSDimitry Andric int64_t Locality = MI.getOperand(2).getImm(); 21975f757f3fSDimitry Andric int64_t IsData = MI.getOperand(3).getImm(); 21985f757f3fSDimitry Andric 21995f757f3fSDimitry Andric bool IsStream = Locality == 0; 22005f757f3fSDimitry Andric if (Locality != 0) { 22015f757f3fSDimitry Andric assert(Locality <= 3 && "Prefetch locality out-of-range"); 22025f757f3fSDimitry Andric // The locality degree is the opposite of the cache speed. 22035f757f3fSDimitry Andric // Put the number the other way around. 22045f757f3fSDimitry Andric // The encoding starts at 0 for level 1 22055f757f3fSDimitry Andric Locality = 3 - Locality; 22065f757f3fSDimitry Andric } 22075f757f3fSDimitry Andric 22085f757f3fSDimitry Andric unsigned PrfOp = (IsWrite << 4) | (!IsData << 3) | (Locality << 1) | IsStream; 22095f757f3fSDimitry Andric 22105f757f3fSDimitry Andric MIB.buildInstr(AArch64::G_AARCH64_PREFETCH).addImm(PrfOp).add(AddrVal); 22115f757f3fSDimitry Andric MI.eraseFromParent(); 22125f757f3fSDimitry Andric return true; 22135f757f3fSDimitry Andric } 2214