xref: /openbsd-src/gnu/llvm/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp (revision d415bd752c734aee168c4ee86ff32e8cc249eb16)
109467b48Spatrick //===-- ARMSelectionDAGInfo.cpp - ARM SelectionDAG Info -------------------===//
209467b48Spatrick //
309467b48Spatrick // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
409467b48Spatrick // See https://llvm.org/LICENSE.txt for license information.
509467b48Spatrick // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
609467b48Spatrick //
709467b48Spatrick //===----------------------------------------------------------------------===//
809467b48Spatrick //
909467b48Spatrick // This file implements the ARMSelectionDAGInfo class.
1009467b48Spatrick //
1109467b48Spatrick //===----------------------------------------------------------------------===//
1209467b48Spatrick 
1309467b48Spatrick #include "ARMTargetMachine.h"
1473471bf0Spatrick #include "ARMTargetTransformInfo.h"
1509467b48Spatrick #include "llvm/CodeGen/SelectionDAG.h"
1609467b48Spatrick #include "llvm/IR/DerivedTypes.h"
1773471bf0Spatrick #include "llvm/Support/CommandLine.h"
1809467b48Spatrick using namespace llvm;
1909467b48Spatrick 
2009467b48Spatrick #define DEBUG_TYPE "arm-selectiondag-info"
2109467b48Spatrick 
2273471bf0Spatrick cl::opt<TPLoop::MemTransfer> EnableMemtransferTPLoop(
2373471bf0Spatrick     "arm-memtransfer-tploop", cl::Hidden,
2473471bf0Spatrick     cl::desc("Control conversion of memcpy to "
2573471bf0Spatrick              "Tail predicated loops (WLSTP)"),
2673471bf0Spatrick     cl::init(TPLoop::ForceDisabled),
2773471bf0Spatrick     cl::values(clEnumValN(TPLoop::ForceDisabled, "force-disabled",
2873471bf0Spatrick                           "Don't convert memcpy to TP loop."),
2973471bf0Spatrick                clEnumValN(TPLoop::ForceEnabled, "force-enabled",
3073471bf0Spatrick                           "Always convert memcpy to TP loop."),
3173471bf0Spatrick                clEnumValN(TPLoop::Allow, "allow",
3273471bf0Spatrick                           "Allow (may be subject to certain conditions) "
3373471bf0Spatrick                           "conversion of memcpy to TP loop.")));
3473471bf0Spatrick 
3509467b48Spatrick // Emit, if possible, a specialized version of the given Libcall. Typically this
3609467b48Spatrick // means selecting the appropriately aligned version, but we also convert memset
3709467b48Spatrick // of 0 into memclr.
EmitSpecializedLibcall(SelectionDAG & DAG,const SDLoc & dl,SDValue Chain,SDValue Dst,SDValue Src,SDValue Size,unsigned Align,RTLIB::Libcall LC) const3809467b48Spatrick SDValue ARMSelectionDAGInfo::EmitSpecializedLibcall(
3909467b48Spatrick     SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
4009467b48Spatrick     SDValue Size, unsigned Align, RTLIB::Libcall LC) const {
4109467b48Spatrick   const ARMSubtarget &Subtarget =
4209467b48Spatrick       DAG.getMachineFunction().getSubtarget<ARMSubtarget>();
4309467b48Spatrick   const ARMTargetLowering *TLI = Subtarget.getTargetLowering();
4409467b48Spatrick 
4509467b48Spatrick   // Only use a specialized AEABI function if the default version of this
4609467b48Spatrick   // Libcall is an AEABI function.
4709467b48Spatrick   if (std::strncmp(TLI->getLibcallName(LC), "__aeabi", 7) != 0)
4809467b48Spatrick     return SDValue();
4909467b48Spatrick 
5009467b48Spatrick   // Translate RTLIB::Libcall to AEABILibcall. We only do this in order to be
5109467b48Spatrick   // able to translate memset to memclr and use the value to index the function
5209467b48Spatrick   // name array.
5309467b48Spatrick   enum {
5409467b48Spatrick     AEABI_MEMCPY = 0,
5509467b48Spatrick     AEABI_MEMMOVE,
5609467b48Spatrick     AEABI_MEMSET,
5709467b48Spatrick     AEABI_MEMCLR
5809467b48Spatrick   } AEABILibcall;
5909467b48Spatrick   switch (LC) {
6009467b48Spatrick   case RTLIB::MEMCPY:
6109467b48Spatrick     AEABILibcall = AEABI_MEMCPY;
6209467b48Spatrick     break;
6309467b48Spatrick   case RTLIB::MEMMOVE:
6409467b48Spatrick     AEABILibcall = AEABI_MEMMOVE;
6509467b48Spatrick     break;
6609467b48Spatrick   case RTLIB::MEMSET:
6709467b48Spatrick     AEABILibcall = AEABI_MEMSET;
6809467b48Spatrick     if (ConstantSDNode *ConstantSrc = dyn_cast<ConstantSDNode>(Src))
6909467b48Spatrick       if (ConstantSrc->getZExtValue() == 0)
7009467b48Spatrick         AEABILibcall = AEABI_MEMCLR;
7109467b48Spatrick     break;
7209467b48Spatrick   default:
7309467b48Spatrick     return SDValue();
7409467b48Spatrick   }
7509467b48Spatrick 
7609467b48Spatrick   // Choose the most-aligned libcall variant that we can
7709467b48Spatrick   enum {
7809467b48Spatrick     ALIGN1 = 0,
7909467b48Spatrick     ALIGN4,
8009467b48Spatrick     ALIGN8
8109467b48Spatrick   } AlignVariant;
8209467b48Spatrick   if ((Align & 7) == 0)
8309467b48Spatrick     AlignVariant = ALIGN8;
8409467b48Spatrick   else if ((Align & 3) == 0)
8509467b48Spatrick     AlignVariant = ALIGN4;
8609467b48Spatrick   else
8709467b48Spatrick     AlignVariant = ALIGN1;
8809467b48Spatrick 
8909467b48Spatrick   TargetLowering::ArgListTy Args;
9009467b48Spatrick   TargetLowering::ArgListEntry Entry;
9109467b48Spatrick   Entry.Ty = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
9209467b48Spatrick   Entry.Node = Dst;
9309467b48Spatrick   Args.push_back(Entry);
9409467b48Spatrick   if (AEABILibcall == AEABI_MEMCLR) {
9509467b48Spatrick     Entry.Node = Size;
9609467b48Spatrick     Args.push_back(Entry);
9709467b48Spatrick   } else if (AEABILibcall == AEABI_MEMSET) {
9809467b48Spatrick     // Adjust parameters for memset, EABI uses format (ptr, size, value),
9909467b48Spatrick     // GNU library uses (ptr, value, size)
10009467b48Spatrick     // See RTABI section 4.3.4
10109467b48Spatrick     Entry.Node = Size;
10209467b48Spatrick     Args.push_back(Entry);
10309467b48Spatrick 
10409467b48Spatrick     // Extend or truncate the argument to be an i32 value for the call.
10509467b48Spatrick     if (Src.getValueType().bitsGT(MVT::i32))
10609467b48Spatrick       Src = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Src);
10709467b48Spatrick     else if (Src.getValueType().bitsLT(MVT::i32))
10809467b48Spatrick       Src = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Src);
10909467b48Spatrick 
11009467b48Spatrick     Entry.Node = Src;
11109467b48Spatrick     Entry.Ty = Type::getInt32Ty(*DAG.getContext());
11209467b48Spatrick     Entry.IsSExt = false;
11309467b48Spatrick     Args.push_back(Entry);
11409467b48Spatrick   } else {
11509467b48Spatrick     Entry.Node = Src;
11609467b48Spatrick     Args.push_back(Entry);
11709467b48Spatrick 
11809467b48Spatrick     Entry.Node = Size;
11909467b48Spatrick     Args.push_back(Entry);
12009467b48Spatrick   }
12109467b48Spatrick 
12209467b48Spatrick   char const *FunctionNames[4][3] = {
12309467b48Spatrick     { "__aeabi_memcpy",  "__aeabi_memcpy4",  "__aeabi_memcpy8"  },
12409467b48Spatrick     { "__aeabi_memmove", "__aeabi_memmove4", "__aeabi_memmove8" },
12509467b48Spatrick     { "__aeabi_memset",  "__aeabi_memset4",  "__aeabi_memset8"  },
12609467b48Spatrick     { "__aeabi_memclr",  "__aeabi_memclr4",  "__aeabi_memclr8"  }
12709467b48Spatrick   };
12809467b48Spatrick   TargetLowering::CallLoweringInfo CLI(DAG);
12909467b48Spatrick   CLI.setDebugLoc(dl)
13009467b48Spatrick       .setChain(Chain)
13109467b48Spatrick       .setLibCallee(
13209467b48Spatrick           TLI->getLibcallCallingConv(LC), Type::getVoidTy(*DAG.getContext()),
13309467b48Spatrick           DAG.getExternalSymbol(FunctionNames[AEABILibcall][AlignVariant],
13409467b48Spatrick                                 TLI->getPointerTy(DAG.getDataLayout())),
13509467b48Spatrick           std::move(Args))
13609467b48Spatrick       .setDiscardResult();
13709467b48Spatrick   std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI);
13809467b48Spatrick 
13909467b48Spatrick   return CallResult.second;
14009467b48Spatrick }
14109467b48Spatrick 
shouldGenerateInlineTPLoop(const ARMSubtarget & Subtarget,const SelectionDAG & DAG,ConstantSDNode * ConstantSize,Align Alignment,bool IsMemcpy)14273471bf0Spatrick static bool shouldGenerateInlineTPLoop(const ARMSubtarget &Subtarget,
14373471bf0Spatrick                                        const SelectionDAG &DAG,
14473471bf0Spatrick                                        ConstantSDNode *ConstantSize,
14573471bf0Spatrick                                        Align Alignment, bool IsMemcpy) {
14673471bf0Spatrick   auto &F = DAG.getMachineFunction().getFunction();
14773471bf0Spatrick   if (!EnableMemtransferTPLoop)
14873471bf0Spatrick     return false;
14973471bf0Spatrick   if (EnableMemtransferTPLoop == TPLoop::ForceEnabled)
15073471bf0Spatrick     return true;
15173471bf0Spatrick   // Do not generate inline TP loop if optimizations is disabled,
15273471bf0Spatrick   // or if optimization for size (-Os or -Oz) is on.
15373471bf0Spatrick   if (F.hasOptNone() || F.hasOptSize())
15473471bf0Spatrick     return false;
15573471bf0Spatrick   // If cli option is unset, for memset always generate inline TP.
15673471bf0Spatrick   // For memcpy, check some conditions
15773471bf0Spatrick   if (!IsMemcpy)
15873471bf0Spatrick     return true;
15973471bf0Spatrick   if (!ConstantSize && Alignment >= Align(4))
16073471bf0Spatrick     return true;
16173471bf0Spatrick   if (ConstantSize &&
16273471bf0Spatrick       ConstantSize->getZExtValue() > Subtarget.getMaxInlineSizeThreshold() &&
16373471bf0Spatrick       ConstantSize->getZExtValue() <
16473471bf0Spatrick           Subtarget.getMaxMemcpyTPInlineSizeThreshold())
16573471bf0Spatrick     return true;
16673471bf0Spatrick   return false;
16773471bf0Spatrick }
16873471bf0Spatrick 
EmitTargetCodeForMemcpy(SelectionDAG & DAG,const SDLoc & dl,SDValue Chain,SDValue Dst,SDValue Src,SDValue Size,Align Alignment,bool isVolatile,bool AlwaysInline,MachinePointerInfo DstPtrInfo,MachinePointerInfo SrcPtrInfo) const16909467b48Spatrick SDValue ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(
17009467b48Spatrick     SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
171097a140dSpatrick     SDValue Size, Align Alignment, bool isVolatile, bool AlwaysInline,
17209467b48Spatrick     MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const {
17309467b48Spatrick   const ARMSubtarget &Subtarget =
17409467b48Spatrick       DAG.getMachineFunction().getSubtarget<ARMSubtarget>();
17573471bf0Spatrick   ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
17673471bf0Spatrick 
17773471bf0Spatrick   if (Subtarget.hasMVEIntegerOps() &&
17873471bf0Spatrick       shouldGenerateInlineTPLoop(Subtarget, DAG, ConstantSize, Alignment, true))
17973471bf0Spatrick     return DAG.getNode(ARMISD::MEMCPYLOOP, dl, MVT::Other, Chain, Dst, Src,
18073471bf0Spatrick                        DAG.getZExtOrTrunc(Size, dl, MVT::i32));
18173471bf0Spatrick 
18209467b48Spatrick   // Do repeated 4-byte loads and stores. To be improved.
18309467b48Spatrick   // This requires 4-byte alignment.
184097a140dSpatrick   if (Alignment < Align(4))
18509467b48Spatrick     return SDValue();
18609467b48Spatrick   // This requires the copy size to be a constant, preferably
18709467b48Spatrick   // within a subtarget-specific limit.
18809467b48Spatrick   if (!ConstantSize)
189097a140dSpatrick     return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size,
190097a140dSpatrick                                   Alignment.value(), RTLIB::MEMCPY);
19109467b48Spatrick   uint64_t SizeVal = ConstantSize->getZExtValue();
19209467b48Spatrick   if (!AlwaysInline && SizeVal > Subtarget.getMaxInlineSizeThreshold())
193097a140dSpatrick     return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size,
194097a140dSpatrick                                   Alignment.value(), RTLIB::MEMCPY);
19509467b48Spatrick 
19609467b48Spatrick   unsigned BytesLeft = SizeVal & 3;
19709467b48Spatrick   unsigned NumMemOps = SizeVal >> 2;
19809467b48Spatrick   unsigned EmittedNumMemOps = 0;
19909467b48Spatrick   EVT VT = MVT::i32;
20009467b48Spatrick   unsigned VTSize = 4;
20109467b48Spatrick   unsigned i = 0;
20209467b48Spatrick   // Emit a maximum of 4 loads in Thumb1 since we have fewer registers
20309467b48Spatrick   const unsigned MaxLoadsInLDM = Subtarget.isThumb1Only() ? 4 : 6;
20409467b48Spatrick   SDValue TFOps[6];
20509467b48Spatrick   SDValue Loads[6];
20609467b48Spatrick   uint64_t SrcOff = 0, DstOff = 0;
20709467b48Spatrick 
20809467b48Spatrick   // FIXME: We should invent a VMEMCPY pseudo-instruction that lowers to
20909467b48Spatrick   // VLDM/VSTM and make this code emit it when appropriate. This would reduce
21009467b48Spatrick   // pressure on the general purpose registers. However this seems harder to map
21109467b48Spatrick   // onto the register allocator's view of the world.
21209467b48Spatrick 
21309467b48Spatrick   // The number of MEMCPY pseudo-instructions to emit. We use up to
21409467b48Spatrick   // MaxLoadsInLDM registers per mcopy, which will get lowered into ldm/stm
21509467b48Spatrick   // later on. This is a lower bound on the number of MEMCPY operations we must
21609467b48Spatrick   // emit.
21709467b48Spatrick   unsigned NumMEMCPYs = (NumMemOps + MaxLoadsInLDM - 1) / MaxLoadsInLDM;
21809467b48Spatrick 
21909467b48Spatrick   // Code size optimisation: do not inline memcpy if expansion results in
22009467b48Spatrick   // more instructions than the libary call.
22109467b48Spatrick   if (NumMEMCPYs > 1 && Subtarget.hasMinSize()) {
22209467b48Spatrick     return SDValue();
22309467b48Spatrick   }
22409467b48Spatrick 
22509467b48Spatrick   SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other, MVT::Glue);
22609467b48Spatrick 
22709467b48Spatrick   for (unsigned I = 0; I != NumMEMCPYs; ++I) {
22809467b48Spatrick     // Evenly distribute registers among MEMCPY operations to reduce register
22909467b48Spatrick     // pressure.
23009467b48Spatrick     unsigned NextEmittedNumMemOps = NumMemOps * (I + 1) / NumMEMCPYs;
23109467b48Spatrick     unsigned NumRegs = NextEmittedNumMemOps - EmittedNumMemOps;
23209467b48Spatrick 
23309467b48Spatrick     Dst = DAG.getNode(ARMISD::MEMCPY, dl, VTs, Chain, Dst, Src,
23409467b48Spatrick                       DAG.getConstant(NumRegs, dl, MVT::i32));
23509467b48Spatrick     Src = Dst.getValue(1);
23609467b48Spatrick     Chain = Dst.getValue(2);
23709467b48Spatrick 
23809467b48Spatrick     DstPtrInfo = DstPtrInfo.getWithOffset(NumRegs * VTSize);
23909467b48Spatrick     SrcPtrInfo = SrcPtrInfo.getWithOffset(NumRegs * VTSize);
24009467b48Spatrick 
24109467b48Spatrick     EmittedNumMemOps = NextEmittedNumMemOps;
24209467b48Spatrick   }
24309467b48Spatrick 
24409467b48Spatrick   if (BytesLeft == 0)
24509467b48Spatrick     return Chain;
24609467b48Spatrick 
24709467b48Spatrick   // Issue loads / stores for the trailing (1 - 3) bytes.
24809467b48Spatrick   auto getRemainingValueType = [](unsigned BytesLeft) {
24909467b48Spatrick     return (BytesLeft >= 2) ? MVT::i16 : MVT::i8;
25009467b48Spatrick   };
25109467b48Spatrick   auto getRemainingSize = [](unsigned BytesLeft) {
25209467b48Spatrick     return (BytesLeft >= 2) ? 2 : 1;
25309467b48Spatrick   };
25409467b48Spatrick 
25509467b48Spatrick   unsigned BytesLeftSave = BytesLeft;
25609467b48Spatrick   i = 0;
25709467b48Spatrick   while (BytesLeft) {
25809467b48Spatrick     VT = getRemainingValueType(BytesLeft);
25909467b48Spatrick     VTSize = getRemainingSize(BytesLeft);
26009467b48Spatrick     Loads[i] = DAG.getLoad(VT, dl, Chain,
26109467b48Spatrick                            DAG.getNode(ISD::ADD, dl, MVT::i32, Src,
26209467b48Spatrick                                        DAG.getConstant(SrcOff, dl, MVT::i32)),
26309467b48Spatrick                            SrcPtrInfo.getWithOffset(SrcOff));
26409467b48Spatrick     TFOps[i] = Loads[i].getValue(1);
26509467b48Spatrick     ++i;
26609467b48Spatrick     SrcOff += VTSize;
26709467b48Spatrick     BytesLeft -= VTSize;
26809467b48Spatrick   }
269*d415bd75Srobert   Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, ArrayRef(TFOps, i));
27009467b48Spatrick 
27109467b48Spatrick   i = 0;
27209467b48Spatrick   BytesLeft = BytesLeftSave;
27309467b48Spatrick   while (BytesLeft) {
27409467b48Spatrick     VT = getRemainingValueType(BytesLeft);
27509467b48Spatrick     VTSize = getRemainingSize(BytesLeft);
27609467b48Spatrick     TFOps[i] = DAG.getStore(Chain, dl, Loads[i],
27709467b48Spatrick                             DAG.getNode(ISD::ADD, dl, MVT::i32, Dst,
27809467b48Spatrick                                         DAG.getConstant(DstOff, dl, MVT::i32)),
27909467b48Spatrick                             DstPtrInfo.getWithOffset(DstOff));
28009467b48Spatrick     ++i;
28109467b48Spatrick     DstOff += VTSize;
28209467b48Spatrick     BytesLeft -= VTSize;
28309467b48Spatrick   }
284*d415bd75Srobert   return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, ArrayRef(TFOps, i));
28509467b48Spatrick }
28609467b48Spatrick 
EmitTargetCodeForMemmove(SelectionDAG & DAG,const SDLoc & dl,SDValue Chain,SDValue Dst,SDValue Src,SDValue Size,Align Alignment,bool isVolatile,MachinePointerInfo DstPtrInfo,MachinePointerInfo SrcPtrInfo) const28709467b48Spatrick SDValue ARMSelectionDAGInfo::EmitTargetCodeForMemmove(
28809467b48Spatrick     SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
289097a140dSpatrick     SDValue Size, Align Alignment, bool isVolatile,
29009467b48Spatrick     MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const {
291097a140dSpatrick   return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size,
292097a140dSpatrick                                 Alignment.value(), RTLIB::MEMMOVE);
29309467b48Spatrick }
29409467b48Spatrick 
EmitTargetCodeForMemset(SelectionDAG & DAG,const SDLoc & dl,SDValue Chain,SDValue Dst,SDValue Src,SDValue Size,Align Alignment,bool isVolatile,bool AlwaysInline,MachinePointerInfo DstPtrInfo) const29509467b48Spatrick SDValue ARMSelectionDAGInfo::EmitTargetCodeForMemset(
29609467b48Spatrick     SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
297*d415bd75Srobert     SDValue Size, Align Alignment, bool isVolatile, bool AlwaysInline,
29809467b48Spatrick     MachinePointerInfo DstPtrInfo) const {
29973471bf0Spatrick 
30073471bf0Spatrick   const ARMSubtarget &Subtarget =
30173471bf0Spatrick       DAG.getMachineFunction().getSubtarget<ARMSubtarget>();
30273471bf0Spatrick 
30373471bf0Spatrick   ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
30473471bf0Spatrick 
30573471bf0Spatrick   // Generate TP loop for llvm.memset
30673471bf0Spatrick   if (Subtarget.hasMVEIntegerOps() &&
30773471bf0Spatrick       shouldGenerateInlineTPLoop(Subtarget, DAG, ConstantSize, Alignment,
30873471bf0Spatrick                                  false)) {
30973471bf0Spatrick     Src = DAG.getSplatBuildVector(MVT::v16i8, dl,
31073471bf0Spatrick                                   DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Src));
31173471bf0Spatrick     return DAG.getNode(ARMISD::MEMSETLOOP, dl, MVT::Other, Chain, Dst, Src,
31273471bf0Spatrick                        DAG.getZExtOrTrunc(Size, dl, MVT::i32));
31373471bf0Spatrick   }
31473471bf0Spatrick 
315*d415bd75Srobert   if (!AlwaysInline)
316097a140dSpatrick     return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size,
317097a140dSpatrick                                   Alignment.value(), RTLIB::MEMSET);
318*d415bd75Srobert 
319*d415bd75Srobert   return SDValue();
32009467b48Spatrick }
321