1 //===-- X86SelectionDAGInfo.cpp - X86 SelectionDAG Info -------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the X86SelectionDAGInfo class. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "X86SelectionDAGInfo.h" 14 #include "X86ISelLowering.h" 15 #include "X86InstrInfo.h" 16 #include "X86RegisterInfo.h" 17 #include "X86Subtarget.h" 18 #include "llvm/CodeGen/SelectionDAG.h" 19 #include "llvm/CodeGen/TargetLowering.h" 20 #include "llvm/IR/DerivedTypes.h" 21 22 using namespace llvm; 23 24 #define DEBUG_TYPE "x86-selectiondag-info" 25 26 bool X86SelectionDAGInfo::isBaseRegConflictPossible( 27 SelectionDAG &DAG, ArrayRef<MCPhysReg> ClobberSet) const { 28 // We cannot use TRI->hasBasePointer() until *after* we select all basic 29 // blocks. Legalization may introduce new stack temporaries with large 30 // alignment requirements. Fall back to generic code if there are any 31 // dynamic stack adjustments (hopefully rare) and the base pointer would 32 // conflict if we had to use it. 33 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); 34 if (!MFI.hasVarSizedObjects() && !MFI.hasOpaqueSPAdjustment()) 35 return false; 36 37 const X86RegisterInfo *TRI = static_cast<const X86RegisterInfo *>( 38 DAG.getSubtarget().getRegisterInfo()); 39 unsigned BaseReg = TRI->getBaseRegister(); 40 for (unsigned R : ClobberSet) 41 if (BaseReg == R) 42 return true; 43 return false; 44 } 45 46 namespace { 47 48 // Represents a cover of a buffer of Size bytes with Count() blocks of type AVT 49 // (of size UBytes() bytes), as well as how many bytes remain (BytesLeft() is 50 // always smaller than the block size). 51 struct RepMovsRepeats { 52 RepMovsRepeats(uint64_t Size) : Size(Size) {} 53 54 uint64_t Count() const { return Size / UBytes(); } 55 uint64_t BytesLeft() const { return Size % UBytes(); } 56 uint64_t UBytes() const { return AVT.getSizeInBits() / 8; } 57 58 const uint64_t Size; 59 MVT AVT = MVT::i8; 60 }; 61 62 } // namespace 63 64 SDValue X86SelectionDAGInfo::EmitTargetCodeForMemset( 65 SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Val, 66 SDValue Size, unsigned Align, bool isVolatile, 67 MachinePointerInfo DstPtrInfo) const { 68 ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); 69 const X86Subtarget &Subtarget = 70 DAG.getMachineFunction().getSubtarget<X86Subtarget>(); 71 72 #ifndef NDEBUG 73 // If the base register might conflict with our physical registers, bail out. 74 const MCPhysReg ClobberSet[] = {X86::RCX, X86::RAX, X86::RDI, 75 X86::ECX, X86::EAX, X86::EDI}; 76 assert(!isBaseRegConflictPossible(DAG, ClobberSet)); 77 #endif 78 79 // If to a segment-relative address space, use the default lowering. 80 if (DstPtrInfo.getAddrSpace() >= 256) 81 return SDValue(); 82 83 // If not DWORD aligned or size is more than the threshold, call the library. 84 // The libc version is likely to be faster for these cases. It can use the 85 // address value and run time information about the CPU. 86 if ((Align & 3) != 0 || !ConstantSize || 87 ConstantSize->getZExtValue() > Subtarget.getMaxInlineSizeThreshold()) { 88 // Check to see if there is a specialized entry-point for memory zeroing. 89 ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Val); 90 91 if (const char *bzeroName = (ValC && ValC->isNullValue()) 92 ? DAG.getTargetLoweringInfo().getLibcallName(RTLIB::BZERO) 93 : nullptr) { 94 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 95 EVT IntPtr = TLI.getPointerTy(DAG.getDataLayout()); 96 Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext()); 97 TargetLowering::ArgListTy Args; 98 TargetLowering::ArgListEntry Entry; 99 Entry.Node = Dst; 100 Entry.Ty = IntPtrTy; 101 Args.push_back(Entry); 102 Entry.Node = Size; 103 Args.push_back(Entry); 104 105 TargetLowering::CallLoweringInfo CLI(DAG); 106 CLI.setDebugLoc(dl) 107 .setChain(Chain) 108 .setLibCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()), 109 DAG.getExternalSymbol(bzeroName, IntPtr), 110 std::move(Args)) 111 .setDiscardResult(); 112 113 std::pair<SDValue,SDValue> CallResult = TLI.LowerCallTo(CLI); 114 return CallResult.second; 115 } 116 117 // Otherwise have the target-independent code call memset. 118 return SDValue(); 119 } 120 121 uint64_t SizeVal = ConstantSize->getZExtValue(); 122 SDValue InFlag; 123 EVT AVT; 124 SDValue Count; 125 ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Val); 126 unsigned BytesLeft = 0; 127 if (ValC) { 128 unsigned ValReg; 129 uint64_t Val = ValC->getZExtValue() & 255; 130 131 // If the value is a constant, then we can potentially use larger sets. 132 switch (Align & 3) { 133 case 2: // WORD aligned 134 AVT = MVT::i16; 135 ValReg = X86::AX; 136 Val = (Val << 8) | Val; 137 break; 138 case 0: // DWORD aligned 139 AVT = MVT::i32; 140 ValReg = X86::EAX; 141 Val = (Val << 8) | Val; 142 Val = (Val << 16) | Val; 143 if (Subtarget.is64Bit() && ((Align & 0x7) == 0)) { // QWORD aligned 144 AVT = MVT::i64; 145 ValReg = X86::RAX; 146 Val = (Val << 32) | Val; 147 } 148 break; 149 default: // Byte aligned 150 AVT = MVT::i8; 151 ValReg = X86::AL; 152 Count = DAG.getIntPtrConstant(SizeVal, dl); 153 break; 154 } 155 156 if (AVT.bitsGT(MVT::i8)) { 157 unsigned UBytes = AVT.getSizeInBits() / 8; 158 Count = DAG.getIntPtrConstant(SizeVal / UBytes, dl); 159 BytesLeft = SizeVal % UBytes; 160 } 161 162 Chain = DAG.getCopyToReg(Chain, dl, ValReg, DAG.getConstant(Val, dl, AVT), 163 InFlag); 164 InFlag = Chain.getValue(1); 165 } else { 166 AVT = MVT::i8; 167 Count = DAG.getIntPtrConstant(SizeVal, dl); 168 Chain = DAG.getCopyToReg(Chain, dl, X86::AL, Val, InFlag); 169 InFlag = Chain.getValue(1); 170 } 171 172 bool Use64BitRegs = Subtarget.isTarget64BitLP64(); 173 Chain = DAG.getCopyToReg(Chain, dl, Use64BitRegs ? X86::RCX : X86::ECX, 174 Count, InFlag); 175 InFlag = Chain.getValue(1); 176 Chain = DAG.getCopyToReg(Chain, dl, Use64BitRegs ? X86::RDI : X86::EDI, 177 Dst, InFlag); 178 InFlag = Chain.getValue(1); 179 180 SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue); 181 SDValue Ops[] = { Chain, DAG.getValueType(AVT), InFlag }; 182 Chain = DAG.getNode(X86ISD::REP_STOS, dl, Tys, Ops); 183 184 if (BytesLeft) { 185 // Handle the last 1 - 7 bytes. 186 unsigned Offset = SizeVal - BytesLeft; 187 EVT AddrVT = Dst.getValueType(); 188 EVT SizeVT = Size.getValueType(); 189 190 Chain = DAG.getMemset(Chain, dl, 191 DAG.getNode(ISD::ADD, dl, AddrVT, Dst, 192 DAG.getConstant(Offset, dl, AddrVT)), 193 Val, 194 DAG.getConstant(BytesLeft, dl, SizeVT), 195 Align, isVolatile, false, 196 DstPtrInfo.getWithOffset(Offset)); 197 } 198 199 // TODO: Use a Tokenfactor, as in memcpy, instead of a single chain. 200 return Chain; 201 } 202 203 SDValue X86SelectionDAGInfo::EmitTargetCodeForMemcpy( 204 SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, 205 SDValue Size, unsigned Align, bool isVolatile, bool AlwaysInline, 206 MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const { 207 // This requires the copy size to be a constant, preferably 208 // within a subtarget-specific limit. 209 ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); 210 const X86Subtarget &Subtarget = 211 DAG.getMachineFunction().getSubtarget<X86Subtarget>(); 212 if (!ConstantSize) 213 return SDValue(); 214 RepMovsRepeats Repeats(ConstantSize->getZExtValue()); 215 if (!AlwaysInline && Repeats.Size > Subtarget.getMaxInlineSizeThreshold()) 216 return SDValue(); 217 218 /// If not DWORD aligned, it is more efficient to call the library. However 219 /// if calling the library is not allowed (AlwaysInline), then soldier on as 220 /// the code generated here is better than the long load-store sequence we 221 /// would otherwise get. 222 if (!AlwaysInline && (Align & 3) != 0) 223 return SDValue(); 224 225 // If to a segment-relative address space, use the default lowering. 226 if (DstPtrInfo.getAddrSpace() >= 256 || 227 SrcPtrInfo.getAddrSpace() >= 256) 228 return SDValue(); 229 230 // If the base register might conflict with our physical registers, bail out. 231 const MCPhysReg ClobberSet[] = {X86::RCX, X86::RSI, X86::RDI, 232 X86::ECX, X86::ESI, X86::EDI}; 233 if (isBaseRegConflictPossible(DAG, ClobberSet)) 234 return SDValue(); 235 236 // If the target has enhanced REPMOVSB, then it's at least as fast to use 237 // REP MOVSB instead of REP MOVS{W,D,Q}, and it avoids having to handle 238 // BytesLeft. 239 if (!Subtarget.hasERMSB() && !(Align & 1)) { 240 if (Align & 2) 241 // WORD aligned 242 Repeats.AVT = MVT::i16; 243 else if (Align & 4) 244 // DWORD aligned 245 Repeats.AVT = MVT::i32; 246 else 247 // QWORD aligned 248 Repeats.AVT = Subtarget.is64Bit() ? MVT::i64 : MVT::i32; 249 250 if (Repeats.BytesLeft() > 0 && 251 DAG.getMachineFunction().getFunction().optForMinSize()) { 252 // When aggressively optimizing for size, avoid generating the code to 253 // handle BytesLeft. 254 Repeats.AVT = MVT::i8; 255 } 256 } 257 258 bool Use64BitRegs = Subtarget.isTarget64BitLP64(); 259 SDValue InFlag; 260 Chain = DAG.getCopyToReg(Chain, dl, Use64BitRegs ? X86::RCX : X86::ECX, 261 DAG.getIntPtrConstant(Repeats.Count(), dl), InFlag); 262 InFlag = Chain.getValue(1); 263 Chain = DAG.getCopyToReg(Chain, dl, Use64BitRegs ? X86::RDI : X86::EDI, 264 Dst, InFlag); 265 InFlag = Chain.getValue(1); 266 Chain = DAG.getCopyToReg(Chain, dl, Use64BitRegs ? X86::RSI : X86::ESI, 267 Src, InFlag); 268 InFlag = Chain.getValue(1); 269 270 SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue); 271 SDValue Ops[] = { Chain, DAG.getValueType(Repeats.AVT), InFlag }; 272 SDValue RepMovs = DAG.getNode(X86ISD::REP_MOVS, dl, Tys, Ops); 273 274 SmallVector<SDValue, 4> Results; 275 Results.push_back(RepMovs); 276 if (Repeats.BytesLeft()) { 277 // Handle the last 1 - 7 bytes. 278 unsigned Offset = Repeats.Size - Repeats.BytesLeft(); 279 EVT DstVT = Dst.getValueType(); 280 EVT SrcVT = Src.getValueType(); 281 EVT SizeVT = Size.getValueType(); 282 Results.push_back(DAG.getMemcpy(Chain, dl, 283 DAG.getNode(ISD::ADD, dl, DstVT, Dst, 284 DAG.getConstant(Offset, dl, 285 DstVT)), 286 DAG.getNode(ISD::ADD, dl, SrcVT, Src, 287 DAG.getConstant(Offset, dl, 288 SrcVT)), 289 DAG.getConstant(Repeats.BytesLeft(), dl, 290 SizeVT), 291 Align, isVolatile, AlwaysInline, false, 292 DstPtrInfo.getWithOffset(Offset), 293 SrcPtrInfo.getWithOffset(Offset))); 294 } 295 296 return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Results); 297 } 298