1 //===-- BPFISelLowering.cpp - BPF DAG Lowering Implementation ------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines the interfaces that BPF uses to lower LLVM code into a 10 // selection DAG. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "BPFISelLowering.h" 15 #include "BPF.h" 16 #include "BPFSubtarget.h" 17 #include "llvm/CodeGen/CallingConvLower.h" 18 #include "llvm/CodeGen/MachineFrameInfo.h" 19 #include "llvm/CodeGen/MachineFunction.h" 20 #include "llvm/CodeGen/MachineInstrBuilder.h" 21 #include "llvm/CodeGen/MachineRegisterInfo.h" 22 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" 23 #include "llvm/CodeGen/ValueTypes.h" 24 #include "llvm/IR/DiagnosticInfo.h" 25 #include "llvm/IR/DiagnosticPrinter.h" 26 #include "llvm/Support/Debug.h" 27 #include "llvm/Support/ErrorHandling.h" 28 #include "llvm/Support/MathExtras.h" 29 #include "llvm/Support/raw_ostream.h" 30 31 using namespace llvm; 32 33 #define DEBUG_TYPE "bpf-lower" 34 35 static cl::opt<bool> BPFExpandMemcpyInOrder("bpf-expand-memcpy-in-order", 36 cl::Hidden, cl::init(false), 37 cl::desc("Expand memcpy into load/store pairs in order")); 38 39 static void fail(const SDLoc &DL, SelectionDAG &DAG, const Twine &Msg, 40 SDValue Val = {}) { 41 std::string Str; 42 if (Val) { 43 raw_string_ostream OS(Str); 44 Val->print(OS); 45 OS << ' '; 46 } 47 MachineFunction &MF = DAG.getMachineFunction(); 48 DAG.getContext()->diagnose(DiagnosticInfoUnsupported( 49 MF.getFunction(), Twine(Str).concat(Msg), DL.getDebugLoc())); 50 } 51 52 BPFTargetLowering::BPFTargetLowering(const TargetMachine &TM, 53 const BPFSubtarget &STI) 54 : TargetLowering(TM) { 55 56 // Set up the register classes. 57 addRegisterClass(MVT::i64, &BPF::GPRRegClass); 58 if (STI.getHasAlu32()) 59 addRegisterClass(MVT::i32, &BPF::GPR32RegClass); 60 61 // Compute derived properties from the register classes 62 computeRegisterProperties(STI.getRegisterInfo()); 63 64 setStackPointerRegisterToSaveRestore(BPF::R11); 65 66 setOperationAction(ISD::BR_CC, MVT::i64, Custom); 67 setOperationAction(ISD::BR_JT, MVT::Other, Expand); 68 setOperationAction(ISD::BRIND, MVT::Other, Expand); 69 setOperationAction(ISD::BRCOND, MVT::Other, Expand); 70 71 setOperationAction({ISD::GlobalAddress, ISD::ConstantPool}, MVT::i64, Custom); 72 73 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom); 74 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); 75 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); 76 77 // Set unsupported atomic operations as Custom so 78 // we can emit better error messages than fatal error 79 // from selectiondag. 80 for (auto VT : {MVT::i8, MVT::i16, MVT::i32}) { 81 if (VT == MVT::i32) { 82 if (STI.getHasAlu32()) 83 continue; 84 } else { 85 setOperationAction(ISD::ATOMIC_LOAD_ADD, VT, Custom); 86 } 87 88 setOperationAction(ISD::ATOMIC_LOAD_AND, VT, Custom); 89 setOperationAction(ISD::ATOMIC_LOAD_OR, VT, Custom); 90 setOperationAction(ISD::ATOMIC_LOAD_XOR, VT, Custom); 91 setOperationAction(ISD::ATOMIC_SWAP, VT, Custom); 92 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Custom); 93 } 94 95 for (auto VT : { MVT::i32, MVT::i64 }) { 96 if (VT == MVT::i32 && !STI.getHasAlu32()) 97 continue; 98 99 setOperationAction(ISD::SDIVREM, VT, Expand); 100 setOperationAction(ISD::UDIVREM, VT, Expand); 101 if (!STI.hasSdivSmod()) { 102 setOperationAction(ISD::SDIV, VT, Custom); 103 setOperationAction(ISD::SREM, VT, Custom); 104 } 105 setOperationAction(ISD::MULHU, VT, Expand); 106 setOperationAction(ISD::MULHS, VT, Expand); 107 setOperationAction(ISD::UMUL_LOHI, VT, Expand); 108 setOperationAction(ISD::SMUL_LOHI, VT, Expand); 109 setOperationAction(ISD::ROTR, VT, Expand); 110 setOperationAction(ISD::ROTL, VT, Expand); 111 setOperationAction(ISD::SHL_PARTS, VT, Expand); 112 setOperationAction(ISD::SRL_PARTS, VT, Expand); 113 setOperationAction(ISD::SRA_PARTS, VT, Expand); 114 setOperationAction(ISD::CTPOP, VT, Expand); 115 setOperationAction(ISD::CTTZ, VT, Expand); 116 setOperationAction(ISD::CTLZ, VT, Expand); 117 setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand); 118 setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand); 119 120 setOperationAction(ISD::SETCC, VT, Expand); 121 setOperationAction(ISD::SELECT, VT, Expand); 122 setOperationAction(ISD::SELECT_CC, VT, Custom); 123 } 124 125 if (STI.getHasAlu32()) { 126 setOperationAction(ISD::BSWAP, MVT::i32, Promote); 127 setOperationAction(ISD::BR_CC, MVT::i32, 128 STI.getHasJmp32() ? Custom : Promote); 129 } 130 131 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); 132 if (!STI.hasMovsx()) { 133 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand); 134 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand); 135 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Expand); 136 } 137 138 // Extended load operations for i1 types must be promoted 139 for (MVT VT : MVT::integer_valuetypes()) { 140 setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote); 141 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote); 142 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); 143 144 if (!STI.hasLdsx()) { 145 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Expand); 146 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i16, Expand); 147 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i32, Expand); 148 } 149 } 150 151 setBooleanContents(ZeroOrOneBooleanContent); 152 setMaxAtomicSizeInBitsSupported(64); 153 154 // Function alignments 155 setMinFunctionAlignment(Align(8)); 156 setPrefFunctionAlignment(Align(8)); 157 158 if (BPFExpandMemcpyInOrder) { 159 // LLVM generic code will try to expand memcpy into load/store pairs at this 160 // stage which is before quite a few IR optimization passes, therefore the 161 // loads and stores could potentially be moved apart from each other which 162 // will cause trouble to memcpy pattern matcher inside kernel eBPF JIT 163 // compilers. 164 // 165 // When -bpf-expand-memcpy-in-order specified, we want to defer the expand 166 // of memcpy to later stage in IR optimization pipeline so those load/store 167 // pairs won't be touched and could be kept in order. Hence, we set 168 // MaxStoresPerMem* to zero to disable the generic getMemcpyLoadsAndStores 169 // code path, and ask LLVM to use target expander EmitTargetCodeForMemcpy. 170 MaxStoresPerMemset = MaxStoresPerMemsetOptSize = 0; 171 MaxStoresPerMemcpy = MaxStoresPerMemcpyOptSize = 0; 172 MaxStoresPerMemmove = MaxStoresPerMemmoveOptSize = 0; 173 MaxLoadsPerMemcmp = 0; 174 } else { 175 // inline memcpy() for kernel to see explicit copy 176 unsigned CommonMaxStores = 177 STI.getSelectionDAGInfo()->getCommonMaxStoresPerMemFunc(); 178 179 MaxStoresPerMemset = MaxStoresPerMemsetOptSize = CommonMaxStores; 180 MaxStoresPerMemcpy = MaxStoresPerMemcpyOptSize = CommonMaxStores; 181 MaxStoresPerMemmove = MaxStoresPerMemmoveOptSize = CommonMaxStores; 182 MaxLoadsPerMemcmp = MaxLoadsPerMemcmpOptSize = CommonMaxStores; 183 } 184 185 // CPU/Feature control 186 HasAlu32 = STI.getHasAlu32(); 187 HasJmp32 = STI.getHasJmp32(); 188 HasJmpExt = STI.getHasJmpExt(); 189 HasMovsx = STI.hasMovsx(); 190 } 191 192 bool BPFTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { 193 return false; 194 } 195 196 bool BPFTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const { 197 if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy()) 198 return false; 199 unsigned NumBits1 = Ty1->getPrimitiveSizeInBits(); 200 unsigned NumBits2 = Ty2->getPrimitiveSizeInBits(); 201 return NumBits1 > NumBits2; 202 } 203 204 bool BPFTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const { 205 if (!VT1.isInteger() || !VT2.isInteger()) 206 return false; 207 unsigned NumBits1 = VT1.getSizeInBits(); 208 unsigned NumBits2 = VT2.getSizeInBits(); 209 return NumBits1 > NumBits2; 210 } 211 212 bool BPFTargetLowering::isZExtFree(Type *Ty1, Type *Ty2) const { 213 if (!getHasAlu32() || !Ty1->isIntegerTy() || !Ty2->isIntegerTy()) 214 return false; 215 unsigned NumBits1 = Ty1->getPrimitiveSizeInBits(); 216 unsigned NumBits2 = Ty2->getPrimitiveSizeInBits(); 217 return NumBits1 == 32 && NumBits2 == 64; 218 } 219 220 bool BPFTargetLowering::isZExtFree(EVT VT1, EVT VT2) const { 221 if (!getHasAlu32() || !VT1.isInteger() || !VT2.isInteger()) 222 return false; 223 unsigned NumBits1 = VT1.getSizeInBits(); 224 unsigned NumBits2 = VT2.getSizeInBits(); 225 return NumBits1 == 32 && NumBits2 == 64; 226 } 227 228 bool BPFTargetLowering::isZExtFree(SDValue Val, EVT VT2) const { 229 EVT VT1 = Val.getValueType(); 230 if (Val.getOpcode() == ISD::LOAD && VT1.isSimple() && VT2.isSimple()) { 231 MVT MT1 = VT1.getSimpleVT().SimpleTy; 232 MVT MT2 = VT2.getSimpleVT().SimpleTy; 233 if ((MT1 == MVT::i8 || MT1 == MVT::i16 || MT1 == MVT::i32) && 234 (MT2 == MVT::i32 || MT2 == MVT::i64)) 235 return true; 236 } 237 return TargetLoweringBase::isZExtFree(Val, VT2); 238 } 239 240 BPFTargetLowering::ConstraintType 241 BPFTargetLowering::getConstraintType(StringRef Constraint) const { 242 if (Constraint.size() == 1) { 243 switch (Constraint[0]) { 244 default: 245 break; 246 case 'w': 247 return C_RegisterClass; 248 } 249 } 250 251 return TargetLowering::getConstraintType(Constraint); 252 } 253 254 std::pair<unsigned, const TargetRegisterClass *> 255 BPFTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, 256 StringRef Constraint, 257 MVT VT) const { 258 if (Constraint.size() == 1) { 259 // GCC Constraint Letters 260 switch (Constraint[0]) { 261 case 'r': // GENERAL_REGS 262 return std::make_pair(0U, &BPF::GPRRegClass); 263 case 'w': 264 if (HasAlu32) 265 return std::make_pair(0U, &BPF::GPR32RegClass); 266 break; 267 default: 268 break; 269 } 270 } 271 272 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); 273 } 274 275 void BPFTargetLowering::ReplaceNodeResults( 276 SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const { 277 const char *Msg; 278 uint32_t Opcode = N->getOpcode(); 279 switch (Opcode) { 280 default: 281 report_fatal_error("unhandled custom legalization: " + Twine(Opcode)); 282 case ISD::ATOMIC_LOAD_ADD: 283 case ISD::ATOMIC_LOAD_AND: 284 case ISD::ATOMIC_LOAD_OR: 285 case ISD::ATOMIC_LOAD_XOR: 286 case ISD::ATOMIC_SWAP: 287 case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: 288 if (HasAlu32 || Opcode == ISD::ATOMIC_LOAD_ADD) 289 Msg = "unsupported atomic operation, please use 32/64 bit version"; 290 else 291 Msg = "unsupported atomic operation, please use 64 bit version"; 292 break; 293 } 294 295 SDLoc DL(N); 296 // We'll still produce a fatal error downstream, but this diagnostic is more 297 // user-friendly. 298 fail(DL, DAG, Msg); 299 } 300 301 SDValue BPFTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { 302 switch (Op.getOpcode()) { 303 default: 304 report_fatal_error("unimplemented opcode: " + Twine(Op.getOpcode())); 305 case ISD::BR_CC: 306 return LowerBR_CC(Op, DAG); 307 case ISD::GlobalAddress: 308 return LowerGlobalAddress(Op, DAG); 309 case ISD::ConstantPool: 310 return LowerConstantPool(Op, DAG); 311 case ISD::SELECT_CC: 312 return LowerSELECT_CC(Op, DAG); 313 case ISD::SDIV: 314 case ISD::SREM: 315 return LowerSDIVSREM(Op, DAG); 316 case ISD::DYNAMIC_STACKALLOC: 317 return LowerDYNAMIC_STACKALLOC(Op, DAG); 318 } 319 } 320 321 // Calling Convention Implementation 322 #include "BPFGenCallingConv.inc" 323 324 SDValue BPFTargetLowering::LowerFormalArguments( 325 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, 326 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL, 327 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { 328 switch (CallConv) { 329 default: 330 report_fatal_error("unimplemented calling convention: " + Twine(CallConv)); 331 case CallingConv::C: 332 case CallingConv::Fast: 333 break; 334 } 335 336 MachineFunction &MF = DAG.getMachineFunction(); 337 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 338 339 // Assign locations to all of the incoming arguments. 340 SmallVector<CCValAssign, 16> ArgLocs; 341 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); 342 CCInfo.AnalyzeFormalArguments(Ins, getHasAlu32() ? CC_BPF32 : CC_BPF64); 343 344 bool HasMemArgs = false; 345 for (size_t I = 0; I < ArgLocs.size(); ++I) { 346 auto &VA = ArgLocs[I]; 347 348 if (VA.isRegLoc()) { 349 // Arguments passed in registers 350 EVT RegVT = VA.getLocVT(); 351 MVT::SimpleValueType SimpleTy = RegVT.getSimpleVT().SimpleTy; 352 switch (SimpleTy) { 353 default: { 354 std::string Str; 355 { 356 raw_string_ostream OS(Str); 357 RegVT.print(OS); 358 } 359 report_fatal_error("unhandled argument type: " + Twine(Str)); 360 } 361 case MVT::i32: 362 case MVT::i64: 363 Register VReg = RegInfo.createVirtualRegister( 364 SimpleTy == MVT::i64 ? &BPF::GPRRegClass : &BPF::GPR32RegClass); 365 RegInfo.addLiveIn(VA.getLocReg(), VReg); 366 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, RegVT); 367 368 // If this is an value that has been promoted to wider types, insert an 369 // assert[sz]ext to capture this, then truncate to the right size. 370 if (VA.getLocInfo() == CCValAssign::SExt) 371 ArgValue = DAG.getNode(ISD::AssertSext, DL, RegVT, ArgValue, 372 DAG.getValueType(VA.getValVT())); 373 else if (VA.getLocInfo() == CCValAssign::ZExt) 374 ArgValue = DAG.getNode(ISD::AssertZext, DL, RegVT, ArgValue, 375 DAG.getValueType(VA.getValVT())); 376 377 if (VA.getLocInfo() != CCValAssign::Full) 378 ArgValue = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), ArgValue); 379 380 InVals.push_back(ArgValue); 381 382 break; 383 } 384 } else { 385 if (VA.isMemLoc()) 386 HasMemArgs = true; 387 else 388 report_fatal_error("unhandled argument location"); 389 InVals.push_back(DAG.getConstant(0, DL, VA.getLocVT())); 390 } 391 } 392 if (HasMemArgs) 393 fail(DL, DAG, "stack arguments are not supported"); 394 if (IsVarArg) 395 fail(DL, DAG, "variadic functions are not supported"); 396 if (MF.getFunction().hasStructRetAttr()) 397 fail(DL, DAG, "aggregate returns are not supported"); 398 399 return Chain; 400 } 401 402 const size_t BPFTargetLowering::MaxArgs = 5; 403 404 static void resetRegMaskBit(const TargetRegisterInfo *TRI, uint32_t *RegMask, 405 MCRegister Reg) { 406 for (MCPhysReg SubReg : TRI->subregs_inclusive(Reg)) 407 RegMask[SubReg / 32] &= ~(1u << (SubReg % 32)); 408 } 409 410 static uint32_t *regMaskFromTemplate(const TargetRegisterInfo *TRI, 411 MachineFunction &MF, 412 const uint32_t *BaseRegMask) { 413 uint32_t *RegMask = MF.allocateRegMask(); 414 unsigned RegMaskSize = MachineOperand::getRegMaskSize(TRI->getNumRegs()); 415 memcpy(RegMask, BaseRegMask, sizeof(RegMask[0]) * RegMaskSize); 416 return RegMask; 417 } 418 419 SDValue BPFTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, 420 SmallVectorImpl<SDValue> &InVals) const { 421 SelectionDAG &DAG = CLI.DAG; 422 auto &Outs = CLI.Outs; 423 auto &OutVals = CLI.OutVals; 424 auto &Ins = CLI.Ins; 425 SDValue Chain = CLI.Chain; 426 SDValue Callee = CLI.Callee; 427 bool &IsTailCall = CLI.IsTailCall; 428 CallingConv::ID CallConv = CLI.CallConv; 429 bool IsVarArg = CLI.IsVarArg; 430 MachineFunction &MF = DAG.getMachineFunction(); 431 432 // BPF target does not support tail call optimization. 433 IsTailCall = false; 434 435 switch (CallConv) { 436 default: 437 report_fatal_error("unsupported calling convention: " + Twine(CallConv)); 438 case CallingConv::Fast: 439 case CallingConv::C: 440 break; 441 } 442 443 // Analyze operands of the call, assigning locations to each operand. 444 SmallVector<CCValAssign, 16> ArgLocs; 445 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); 446 447 CCInfo.AnalyzeCallOperands(Outs, getHasAlu32() ? CC_BPF32 : CC_BPF64); 448 449 unsigned NumBytes = CCInfo.getStackSize(); 450 451 if (Outs.size() > MaxArgs) 452 fail(CLI.DL, DAG, "too many arguments", Callee); 453 454 for (auto &Arg : Outs) { 455 ISD::ArgFlagsTy Flags = Arg.Flags; 456 if (!Flags.isByVal()) 457 continue; 458 fail(CLI.DL, DAG, "pass by value not supported", Callee); 459 break; 460 } 461 462 auto PtrVT = getPointerTy(MF.getDataLayout()); 463 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL); 464 465 SmallVector<std::pair<unsigned, SDValue>, MaxArgs> RegsToPass; 466 467 // Walk arg assignments 468 for (size_t i = 0; i < std::min(ArgLocs.size(), MaxArgs); ++i) { 469 CCValAssign &VA = ArgLocs[i]; 470 SDValue &Arg = OutVals[i]; 471 472 // Promote the value if needed. 473 switch (VA.getLocInfo()) { 474 default: 475 report_fatal_error("unhandled location info: " + Twine(VA.getLocInfo())); 476 case CCValAssign::Full: 477 break; 478 case CCValAssign::SExt: 479 Arg = DAG.getNode(ISD::SIGN_EXTEND, CLI.DL, VA.getLocVT(), Arg); 480 break; 481 case CCValAssign::ZExt: 482 Arg = DAG.getNode(ISD::ZERO_EXTEND, CLI.DL, VA.getLocVT(), Arg); 483 break; 484 case CCValAssign::AExt: 485 Arg = DAG.getNode(ISD::ANY_EXTEND, CLI.DL, VA.getLocVT(), Arg); 486 break; 487 } 488 489 // Push arguments into RegsToPass vector 490 if (VA.isRegLoc()) 491 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); 492 else 493 report_fatal_error("stack arguments are not supported"); 494 } 495 496 SDValue InGlue; 497 498 // Build a sequence of copy-to-reg nodes chained together with token chain and 499 // flag operands which copy the outgoing args into registers. The InGlue in 500 // necessary since all emitted instructions must be stuck together. 501 for (auto &Reg : RegsToPass) { 502 Chain = DAG.getCopyToReg(Chain, CLI.DL, Reg.first, Reg.second, InGlue); 503 InGlue = Chain.getValue(1); 504 } 505 506 // If the callee is a GlobalAddress node (quite common, every direct call is) 507 // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. 508 // Likewise ExternalSymbol -> TargetExternalSymbol. 509 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 510 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), CLI.DL, PtrVT, 511 G->getOffset(), 0); 512 } else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee)) { 513 Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT, 0); 514 fail(CLI.DL, DAG, 515 Twine("A call to built-in function '" + StringRef(E->getSymbol()) + 516 "' is not supported.")); 517 } 518 519 // Returns a chain & a flag for retval copy to use. 520 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); 521 SmallVector<SDValue, 8> Ops; 522 Ops.push_back(Chain); 523 Ops.push_back(Callee); 524 525 // Add argument registers to the end of the list so that they are 526 // known live into the call. 527 for (auto &Reg : RegsToPass) 528 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType())); 529 530 bool HasFastCall = 531 (CLI.CB && isa<CallInst>(CLI.CB) && CLI.CB->hasFnAttr("bpf_fastcall")); 532 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); 533 if (HasFastCall) { 534 uint32_t *RegMask = regMaskFromTemplate( 535 TRI, MF, TRI->getCallPreservedMask(MF, CallingConv::PreserveAll)); 536 for (auto const &RegPair : RegsToPass) 537 resetRegMaskBit(TRI, RegMask, RegPair.first); 538 if (!CLI.CB->getType()->isVoidTy()) 539 resetRegMaskBit(TRI, RegMask, BPF::R0); 540 Ops.push_back(DAG.getRegisterMask(RegMask)); 541 } else { 542 Ops.push_back( 543 DAG.getRegisterMask(TRI->getCallPreservedMask(MF, CLI.CallConv))); 544 } 545 546 if (InGlue.getNode()) 547 Ops.push_back(InGlue); 548 549 Chain = DAG.getNode(BPFISD::CALL, CLI.DL, NodeTys, Ops); 550 InGlue = Chain.getValue(1); 551 552 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge); 553 554 // Create the CALLSEQ_END node. 555 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, InGlue, CLI.DL); 556 InGlue = Chain.getValue(1); 557 558 // Handle result values, copying them out of physregs into vregs that we 559 // return. 560 return LowerCallResult(Chain, InGlue, CallConv, IsVarArg, Ins, CLI.DL, DAG, 561 InVals); 562 } 563 564 SDValue 565 BPFTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, 566 bool IsVarArg, 567 const SmallVectorImpl<ISD::OutputArg> &Outs, 568 const SmallVectorImpl<SDValue> &OutVals, 569 const SDLoc &DL, SelectionDAG &DAG) const { 570 unsigned Opc = BPFISD::RET_GLUE; 571 572 // CCValAssign - represent the assignment of the return value to a location 573 SmallVector<CCValAssign, 16> RVLocs; 574 MachineFunction &MF = DAG.getMachineFunction(); 575 576 // CCState - Info about the registers and stack slot. 577 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext()); 578 579 if (MF.getFunction().getReturnType()->isAggregateType()) { 580 fail(DL, DAG, "aggregate returns are not supported"); 581 return DAG.getNode(Opc, DL, MVT::Other, Chain); 582 } 583 584 // Analize return values. 585 CCInfo.AnalyzeReturn(Outs, getHasAlu32() ? RetCC_BPF32 : RetCC_BPF64); 586 587 SDValue Glue; 588 SmallVector<SDValue, 4> RetOps(1, Chain); 589 590 // Copy the result values into the output registers. 591 for (size_t i = 0; i != RVLocs.size(); ++i) { 592 CCValAssign &VA = RVLocs[i]; 593 if (!VA.isRegLoc()) 594 report_fatal_error("stack return values are not supported"); 595 596 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), OutVals[i], Glue); 597 598 // Guarantee that all emitted copies are stuck together, 599 // avoiding something bad. 600 Glue = Chain.getValue(1); 601 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); 602 } 603 604 RetOps[0] = Chain; // Update chain. 605 606 // Add the glue if we have it. 607 if (Glue.getNode()) 608 RetOps.push_back(Glue); 609 610 return DAG.getNode(Opc, DL, MVT::Other, RetOps); 611 } 612 613 SDValue BPFTargetLowering::LowerCallResult( 614 SDValue Chain, SDValue InGlue, CallingConv::ID CallConv, bool IsVarArg, 615 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL, 616 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { 617 618 MachineFunction &MF = DAG.getMachineFunction(); 619 // Assign locations to each value returned by this call. 620 SmallVector<CCValAssign, 16> RVLocs; 621 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext()); 622 623 if (Ins.size() > 1) { 624 fail(DL, DAG, "only small returns supported"); 625 for (auto &In : Ins) 626 InVals.push_back(DAG.getConstant(0, DL, In.VT)); 627 return DAG.getCopyFromReg(Chain, DL, 1, Ins[0].VT, InGlue).getValue(1); 628 } 629 630 CCInfo.AnalyzeCallResult(Ins, getHasAlu32() ? RetCC_BPF32 : RetCC_BPF64); 631 632 // Copy all of the result registers out of their specified physreg. 633 for (auto &Val : RVLocs) { 634 Chain = DAG.getCopyFromReg(Chain, DL, Val.getLocReg(), 635 Val.getValVT(), InGlue).getValue(1); 636 InGlue = Chain.getValue(2); 637 InVals.push_back(Chain.getValue(0)); 638 } 639 640 return Chain; 641 } 642 643 static void NegateCC(SDValue &LHS, SDValue &RHS, ISD::CondCode &CC) { 644 switch (CC) { 645 default: 646 break; 647 case ISD::SETULT: 648 case ISD::SETULE: 649 case ISD::SETLT: 650 case ISD::SETLE: 651 CC = ISD::getSetCCSwappedOperands(CC); 652 std::swap(LHS, RHS); 653 break; 654 } 655 } 656 657 SDValue BPFTargetLowering::LowerSDIVSREM(SDValue Op, SelectionDAG &DAG) const { 658 SDLoc DL(Op); 659 fail(DL, DAG, 660 "unsupported signed division, please convert to unsigned div/mod."); 661 return DAG.getUNDEF(Op->getValueType(0)); 662 } 663 664 SDValue BPFTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, 665 SelectionDAG &DAG) const { 666 SDLoc DL(Op); 667 fail(DL, DAG, "unsupported dynamic stack allocation"); 668 auto Ops = {DAG.getConstant(0, SDLoc(), Op.getValueType()), Op.getOperand(0)}; 669 return DAG.getMergeValues(Ops, SDLoc()); 670 } 671 672 SDValue BPFTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { 673 SDValue Chain = Op.getOperand(0); 674 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get(); 675 SDValue LHS = Op.getOperand(2); 676 SDValue RHS = Op.getOperand(3); 677 SDValue Dest = Op.getOperand(4); 678 SDLoc DL(Op); 679 680 if (!getHasJmpExt()) 681 NegateCC(LHS, RHS, CC); 682 683 return DAG.getNode(BPFISD::BR_CC, DL, Op.getValueType(), Chain, LHS, RHS, 684 DAG.getConstant(CC, DL, LHS.getValueType()), Dest); 685 } 686 687 SDValue BPFTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { 688 SDValue LHS = Op.getOperand(0); 689 SDValue RHS = Op.getOperand(1); 690 SDValue TrueV = Op.getOperand(2); 691 SDValue FalseV = Op.getOperand(3); 692 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get(); 693 SDLoc DL(Op); 694 695 if (!getHasJmpExt()) 696 NegateCC(LHS, RHS, CC); 697 698 SDValue TargetCC = DAG.getConstant(CC, DL, LHS.getValueType()); 699 SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV}; 700 701 return DAG.getNode(BPFISD::SELECT_CC, DL, Op.getValueType(), Ops); 702 } 703 704 const char *BPFTargetLowering::getTargetNodeName(unsigned Opcode) const { 705 switch ((BPFISD::NodeType)Opcode) { 706 case BPFISD::FIRST_NUMBER: 707 break; 708 case BPFISD::RET_GLUE: 709 return "BPFISD::RET_GLUE"; 710 case BPFISD::CALL: 711 return "BPFISD::CALL"; 712 case BPFISD::SELECT_CC: 713 return "BPFISD::SELECT_CC"; 714 case BPFISD::BR_CC: 715 return "BPFISD::BR_CC"; 716 case BPFISD::Wrapper: 717 return "BPFISD::Wrapper"; 718 case BPFISD::MEMCPY: 719 return "BPFISD::MEMCPY"; 720 } 721 return nullptr; 722 } 723 724 static SDValue getTargetNode(GlobalAddressSDNode *N, const SDLoc &DL, EVT Ty, 725 SelectionDAG &DAG, unsigned Flags) { 726 return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags); 727 } 728 729 static SDValue getTargetNode(ConstantPoolSDNode *N, const SDLoc &DL, EVT Ty, 730 SelectionDAG &DAG, unsigned Flags) { 731 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(), 732 N->getOffset(), Flags); 733 } 734 735 template <class NodeTy> 736 SDValue BPFTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG, 737 unsigned Flags) const { 738 SDLoc DL(N); 739 740 SDValue GA = getTargetNode(N, DL, MVT::i64, DAG, Flags); 741 742 return DAG.getNode(BPFISD::Wrapper, DL, MVT::i64, GA); 743 } 744 745 SDValue BPFTargetLowering::LowerGlobalAddress(SDValue Op, 746 SelectionDAG &DAG) const { 747 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op); 748 if (N->getOffset() != 0) 749 report_fatal_error("invalid offset for global address: " + 750 Twine(N->getOffset())); 751 return getAddr(N, DAG); 752 } 753 754 SDValue BPFTargetLowering::LowerConstantPool(SDValue Op, 755 SelectionDAG &DAG) const { 756 ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op); 757 758 return getAddr(N, DAG); 759 } 760 761 unsigned 762 BPFTargetLowering::EmitSubregExt(MachineInstr &MI, MachineBasicBlock *BB, 763 unsigned Reg, bool isSigned) const { 764 const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo(); 765 const TargetRegisterClass *RC = getRegClassFor(MVT::i64); 766 int RShiftOp = isSigned ? BPF::SRA_ri : BPF::SRL_ri; 767 MachineFunction *F = BB->getParent(); 768 DebugLoc DL = MI.getDebugLoc(); 769 770 MachineRegisterInfo &RegInfo = F->getRegInfo(); 771 772 if (!isSigned) { 773 Register PromotedReg0 = RegInfo.createVirtualRegister(RC); 774 BuildMI(BB, DL, TII.get(BPF::MOV_32_64), PromotedReg0).addReg(Reg); 775 return PromotedReg0; 776 } 777 Register PromotedReg0 = RegInfo.createVirtualRegister(RC); 778 Register PromotedReg1 = RegInfo.createVirtualRegister(RC); 779 Register PromotedReg2 = RegInfo.createVirtualRegister(RC); 780 if (HasMovsx) { 781 BuildMI(BB, DL, TII.get(BPF::MOVSX_rr_32), PromotedReg0).addReg(Reg); 782 } else { 783 BuildMI(BB, DL, TII.get(BPF::MOV_32_64), PromotedReg0).addReg(Reg); 784 BuildMI(BB, DL, TII.get(BPF::SLL_ri), PromotedReg1) 785 .addReg(PromotedReg0).addImm(32); 786 BuildMI(BB, DL, TII.get(RShiftOp), PromotedReg2) 787 .addReg(PromotedReg1).addImm(32); 788 } 789 790 return PromotedReg2; 791 } 792 793 MachineBasicBlock * 794 BPFTargetLowering::EmitInstrWithCustomInserterMemcpy(MachineInstr &MI, 795 MachineBasicBlock *BB) 796 const { 797 MachineFunction *MF = MI.getParent()->getParent(); 798 MachineRegisterInfo &MRI = MF->getRegInfo(); 799 MachineInstrBuilder MIB(*MF, MI); 800 unsigned ScratchReg; 801 802 // This function does custom insertion during lowering BPFISD::MEMCPY which 803 // only has two register operands from memcpy semantics, the copy source 804 // address and the copy destination address. 805 // 806 // Because we will expand BPFISD::MEMCPY into load/store pairs, we will need 807 // a third scratch register to serve as the destination register of load and 808 // source register of store. 809 // 810 // The scratch register here is with the Define | Dead | EarlyClobber flags. 811 // The EarlyClobber flag has the semantic property that the operand it is 812 // attached to is clobbered before the rest of the inputs are read. Hence it 813 // must be unique among the operands to the instruction. The Define flag is 814 // needed to coerce the machine verifier that an Undef value isn't a problem 815 // as we anyway is loading memory into it. The Dead flag is needed as the 816 // value in scratch isn't supposed to be used by any other instruction. 817 ScratchReg = MRI.createVirtualRegister(&BPF::GPRRegClass); 818 MIB.addReg(ScratchReg, 819 RegState::Define | RegState::Dead | RegState::EarlyClobber); 820 821 return BB; 822 } 823 824 MachineBasicBlock * 825 BPFTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, 826 MachineBasicBlock *BB) const { 827 const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo(); 828 DebugLoc DL = MI.getDebugLoc(); 829 unsigned Opc = MI.getOpcode(); 830 bool isSelectRROp = (Opc == BPF::Select || 831 Opc == BPF::Select_64_32 || 832 Opc == BPF::Select_32 || 833 Opc == BPF::Select_32_64); 834 835 bool isMemcpyOp = Opc == BPF::MEMCPY; 836 837 #ifndef NDEBUG 838 bool isSelectRIOp = (Opc == BPF::Select_Ri || 839 Opc == BPF::Select_Ri_64_32 || 840 Opc == BPF::Select_Ri_32 || 841 Opc == BPF::Select_Ri_32_64); 842 843 if (!(isSelectRROp || isSelectRIOp || isMemcpyOp)) 844 report_fatal_error("unhandled instruction type: " + Twine(Opc)); 845 #endif 846 847 if (isMemcpyOp) 848 return EmitInstrWithCustomInserterMemcpy(MI, BB); 849 850 bool is32BitCmp = (Opc == BPF::Select_32 || 851 Opc == BPF::Select_32_64 || 852 Opc == BPF::Select_Ri_32 || 853 Opc == BPF::Select_Ri_32_64); 854 855 // To "insert" a SELECT instruction, we actually have to insert the diamond 856 // control-flow pattern. The incoming instruction knows the destination vreg 857 // to set, the condition code register to branch on, the true/false values to 858 // select between, and a branch opcode to use. 859 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 860 MachineFunction::iterator I = ++BB->getIterator(); 861 862 // ThisMBB: 863 // ... 864 // TrueVal = ... 865 // jmp_XX r1, r2 goto Copy1MBB 866 // fallthrough --> Copy0MBB 867 MachineBasicBlock *ThisMBB = BB; 868 MachineFunction *F = BB->getParent(); 869 MachineBasicBlock *Copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); 870 MachineBasicBlock *Copy1MBB = F->CreateMachineBasicBlock(LLVM_BB); 871 872 F->insert(I, Copy0MBB); 873 F->insert(I, Copy1MBB); 874 // Update machine-CFG edges by transferring all successors of the current 875 // block to the new block which will contain the Phi node for the select. 876 Copy1MBB->splice(Copy1MBB->begin(), BB, 877 std::next(MachineBasicBlock::iterator(MI)), BB->end()); 878 Copy1MBB->transferSuccessorsAndUpdatePHIs(BB); 879 // Next, add the true and fallthrough blocks as its successors. 880 BB->addSuccessor(Copy0MBB); 881 BB->addSuccessor(Copy1MBB); 882 883 // Insert Branch if Flag 884 int CC = MI.getOperand(3).getImm(); 885 int NewCC; 886 switch (CC) { 887 #define SET_NEWCC(X, Y) \ 888 case ISD::X: \ 889 if (is32BitCmp && HasJmp32) \ 890 NewCC = isSelectRROp ? BPF::Y##_rr_32 : BPF::Y##_ri_32; \ 891 else \ 892 NewCC = isSelectRROp ? BPF::Y##_rr : BPF::Y##_ri; \ 893 break 894 SET_NEWCC(SETGT, JSGT); 895 SET_NEWCC(SETUGT, JUGT); 896 SET_NEWCC(SETGE, JSGE); 897 SET_NEWCC(SETUGE, JUGE); 898 SET_NEWCC(SETEQ, JEQ); 899 SET_NEWCC(SETNE, JNE); 900 SET_NEWCC(SETLT, JSLT); 901 SET_NEWCC(SETULT, JULT); 902 SET_NEWCC(SETLE, JSLE); 903 SET_NEWCC(SETULE, JULE); 904 default: 905 report_fatal_error("unimplemented select CondCode " + Twine(CC)); 906 } 907 908 Register LHS = MI.getOperand(1).getReg(); 909 bool isSignedCmp = (CC == ISD::SETGT || 910 CC == ISD::SETGE || 911 CC == ISD::SETLT || 912 CC == ISD::SETLE); 913 914 // eBPF at the moment only has 64-bit comparison. Any 32-bit comparison need 915 // to be promoted, however if the 32-bit comparison operands are destination 916 // registers then they are implicitly zero-extended already, there is no 917 // need of explicit zero-extend sequence for them. 918 // 919 // We simply do extension for all situations in this method, but we will 920 // try to remove those unnecessary in BPFMIPeephole pass. 921 if (is32BitCmp && !HasJmp32) 922 LHS = EmitSubregExt(MI, BB, LHS, isSignedCmp); 923 924 if (isSelectRROp) { 925 Register RHS = MI.getOperand(2).getReg(); 926 927 if (is32BitCmp && !HasJmp32) 928 RHS = EmitSubregExt(MI, BB, RHS, isSignedCmp); 929 930 BuildMI(BB, DL, TII.get(NewCC)).addReg(LHS).addReg(RHS).addMBB(Copy1MBB); 931 } else { 932 int64_t imm32 = MI.getOperand(2).getImm(); 933 // Check before we build J*_ri instruction. 934 if (!isInt<32>(imm32)) 935 report_fatal_error("immediate overflows 32 bits: " + Twine(imm32)); 936 BuildMI(BB, DL, TII.get(NewCC)) 937 .addReg(LHS).addImm(imm32).addMBB(Copy1MBB); 938 } 939 940 // Copy0MBB: 941 // %FalseValue = ... 942 // # fallthrough to Copy1MBB 943 BB = Copy0MBB; 944 945 // Update machine-CFG edges 946 BB->addSuccessor(Copy1MBB); 947 948 // Copy1MBB: 949 // %Result = phi [ %FalseValue, Copy0MBB ], [ %TrueValue, ThisMBB ] 950 // ... 951 BB = Copy1MBB; 952 BuildMI(*BB, BB->begin(), DL, TII.get(BPF::PHI), MI.getOperand(0).getReg()) 953 .addReg(MI.getOperand(5).getReg()) 954 .addMBB(Copy0MBB) 955 .addReg(MI.getOperand(4).getReg()) 956 .addMBB(ThisMBB); 957 958 MI.eraseFromParent(); // The pseudo instruction is gone now. 959 return BB; 960 } 961 962 EVT BPFTargetLowering::getSetCCResultType(const DataLayout &, LLVMContext &, 963 EVT VT) const { 964 return getHasAlu32() ? MVT::i32 : MVT::i64; 965 } 966 967 MVT BPFTargetLowering::getScalarShiftAmountTy(const DataLayout &DL, 968 EVT VT) const { 969 return (getHasAlu32() && VT == MVT::i32) ? MVT::i32 : MVT::i64; 970 } 971 972 bool BPFTargetLowering::isLegalAddressingMode(const DataLayout &DL, 973 const AddrMode &AM, Type *Ty, 974 unsigned AS, 975 Instruction *I) const { 976 // No global is ever allowed as a base. 977 if (AM.BaseGV) 978 return false; 979 980 switch (AM.Scale) { 981 case 0: // "r+i" or just "i", depending on HasBaseReg. 982 break; 983 case 1: 984 if (!AM.HasBaseReg) // allow "r+i". 985 break; 986 return false; // disallow "r+r" or "r+r+i". 987 default: 988 return false; 989 } 990 991 return true; 992 } 993