1 //===------ BPFAbstractMemberAccess.cpp - Abstracting Member Accesses -----===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This pass abstracted struct/union member accesses in order to support 10 // compile-once run-everywhere (CO-RE). The CO-RE intends to compile the program 11 // which can run on different kernels. In particular, if bpf program tries to 12 // access a particular kernel data structure member, the details of the 13 // intermediate member access will be remembered so bpf loader can do 14 // necessary adjustment right before program loading. 15 // 16 // For example, 17 // 18 // struct s { 19 // int a; 20 // int b; 21 // }; 22 // struct t { 23 // struct s c; 24 // int d; 25 // }; 26 // struct t e; 27 // 28 // For the member access e.c.b, the compiler will generate code 29 // &e + 4 30 // 31 // The compile-once run-everywhere instead generates the following code 32 // r = 4 33 // &e + r 34 // The "4" in "r = 4" can be changed based on a particular kernel version. 35 // For example, on a particular kernel version, if struct s is changed to 36 // 37 // struct s { 38 // int new_field; 39 // int a; 40 // int b; 41 // } 42 // 43 // By repeating the member access on the host, the bpf loader can 44 // adjust "r = 4" as "r = 8". 45 // 46 // This feature relies on the following three intrinsic calls: 47 // addr = preserve_array_access_index(base, dimension, index) 48 // addr = preserve_union_access_index(base, di_index) 49 // !llvm.preserve.access.index <union_ditype> 50 // addr = preserve_struct_access_index(base, gep_index, di_index) 51 // !llvm.preserve.access.index <struct_ditype> 52 // 53 //===----------------------------------------------------------------------===// 54 55 #include "BPF.h" 56 #include "BPFCORE.h" 57 #include "BPFTargetMachine.h" 58 #include "llvm/IR/DebugInfoMetadata.h" 59 #include "llvm/IR/GlobalVariable.h" 60 #include "llvm/IR/Instruction.h" 61 #include "llvm/IR/Instructions.h" 62 #include "llvm/IR/Module.h" 63 #include "llvm/IR/Type.h" 64 #include "llvm/IR/User.h" 65 #include "llvm/IR/Value.h" 66 #include "llvm/Pass.h" 67 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 68 69 #define DEBUG_TYPE "bpf-abstract-member-access" 70 71 namespace llvm { 72 const std::string BPFCoreSharedInfo::AmaAttr = "btf_ama"; 73 const std::string BPFCoreSharedInfo::PatchableExtSecName = 74 ".BPF.patchable_externs"; 75 } // namespace llvm 76 77 using namespace llvm; 78 79 namespace { 80 81 class BPFAbstractMemberAccess final : public ModulePass { 82 StringRef getPassName() const override { 83 return "BPF Abstract Member Access"; 84 } 85 86 bool runOnModule(Module &M) override; 87 88 public: 89 static char ID; 90 BPFAbstractMemberAccess() : ModulePass(ID) {} 91 92 private: 93 enum : uint32_t { 94 BPFPreserveArrayAI = 1, 95 BPFPreserveUnionAI = 2, 96 BPFPreserveStructAI = 3, 97 }; 98 99 std::map<std::string, GlobalVariable *> GEPGlobals; 100 // A map to link preserve_*_access_index instrinsic calls. 101 std::map<CallInst *, std::pair<CallInst *, uint32_t>> AIChain; 102 // A map to hold all the base preserve_*_access_index instrinsic calls. 103 // The base call is not an input of any other preserve_*_access_index 104 // intrinsics. 105 std::map<CallInst *, uint32_t> BaseAICalls; 106 107 bool doTransformation(Module &M); 108 109 void traceAICall(CallInst *Call, uint32_t Kind); 110 void traceBitCast(BitCastInst *BitCast, CallInst *Parent, uint32_t Kind); 111 void traceGEP(GetElementPtrInst *GEP, CallInst *Parent, uint32_t Kind); 112 void collectAICallChains(Module &M, Function &F); 113 114 bool IsPreserveDIAccessIndexCall(const CallInst *Call, uint32_t &Kind); 115 bool removePreserveAccessIndexIntrinsic(Module &M); 116 void replaceWithGEP(std::vector<CallInst *> &CallList, 117 uint32_t NumOfZerosIndex, uint32_t DIIndex); 118 119 Value *computeBaseAndAccessKey(CallInst *Call, std::string &AccessKey, 120 uint32_t Kind, MDNode *&TypeMeta); 121 bool getAccessIndex(const Value *IndexValue, uint64_t &AccessIndex); 122 bool transformGEPChain(Module &M, CallInst *Call, uint32_t Kind); 123 }; 124 } // End anonymous namespace 125 126 char BPFAbstractMemberAccess::ID = 0; 127 INITIALIZE_PASS(BPFAbstractMemberAccess, DEBUG_TYPE, 128 "abstracting struct/union member accessees", false, false) 129 130 ModulePass *llvm::createBPFAbstractMemberAccess() { 131 return new BPFAbstractMemberAccess(); 132 } 133 134 bool BPFAbstractMemberAccess::runOnModule(Module &M) { 135 LLVM_DEBUG(dbgs() << "********** Abstract Member Accesses **********\n"); 136 137 // Bail out if no debug info. 138 if (empty(M.debug_compile_units())) 139 return false; 140 141 return doTransformation(M); 142 } 143 144 /// Check whether a call is a preserve_*_access_index intrinsic call or not. 145 bool BPFAbstractMemberAccess::IsPreserveDIAccessIndexCall(const CallInst *Call, 146 uint32_t &Kind) { 147 if (!Call) 148 return false; 149 150 const auto *GV = dyn_cast<GlobalValue>(Call->getCalledValue()); 151 if (!GV) 152 return false; 153 if (GV->getName().startswith("llvm.preserve.array.access.index")) { 154 Kind = BPFPreserveArrayAI; 155 return true; 156 } 157 if (GV->getName().startswith("llvm.preserve.union.access.index")) { 158 Kind = BPFPreserveUnionAI; 159 return true; 160 } 161 if (GV->getName().startswith("llvm.preserve.struct.access.index")) { 162 Kind = BPFPreserveStructAI; 163 return true; 164 } 165 166 return false; 167 } 168 169 void BPFAbstractMemberAccess::replaceWithGEP(std::vector<CallInst *> &CallList, 170 uint32_t DimensionIndex, 171 uint32_t GEPIndex) { 172 for (auto Call : CallList) { 173 uint32_t Dimension = 1; 174 if (DimensionIndex > 0) 175 Dimension = cast<ConstantInt>(Call->getArgOperand(DimensionIndex)) 176 ->getZExtValue(); 177 178 Constant *Zero = 179 ConstantInt::get(Type::getInt32Ty(Call->getParent()->getContext()), 0); 180 SmallVector<Value *, 4> IdxList; 181 for (unsigned I = 0; I < Dimension; ++I) 182 IdxList.push_back(Zero); 183 IdxList.push_back(Call->getArgOperand(GEPIndex)); 184 185 auto *GEP = GetElementPtrInst::CreateInBounds(Call->getArgOperand(0), 186 IdxList, "", Call); 187 Call->replaceAllUsesWith(GEP); 188 Call->eraseFromParent(); 189 } 190 } 191 192 bool BPFAbstractMemberAccess::removePreserveAccessIndexIntrinsic(Module &M) { 193 std::vector<CallInst *> PreserveArrayIndexCalls; 194 std::vector<CallInst *> PreserveUnionIndexCalls; 195 std::vector<CallInst *> PreserveStructIndexCalls; 196 bool Found = false; 197 198 for (Function &F : M) 199 for (auto &BB : F) 200 for (auto &I : BB) { 201 auto *Call = dyn_cast<CallInst>(&I); 202 uint32_t Kind; 203 if (!IsPreserveDIAccessIndexCall(Call, Kind)) 204 continue; 205 206 Found = true; 207 if (Kind == BPFPreserveArrayAI) 208 PreserveArrayIndexCalls.push_back(Call); 209 else if (Kind == BPFPreserveUnionAI) 210 PreserveUnionIndexCalls.push_back(Call); 211 else 212 PreserveStructIndexCalls.push_back(Call); 213 } 214 215 // do the following transformation: 216 // . addr = preserve_array_access_index(base, dimension, index) 217 // is transformed to 218 // addr = GEP(base, dimenion's zero's, index) 219 // . addr = preserve_union_access_index(base, di_index) 220 // is transformed to 221 // addr = base, i.e., all usages of "addr" are replaced by "base". 222 // . addr = preserve_struct_access_index(base, gep_index, di_index) 223 // is transformed to 224 // addr = GEP(base, 0, gep_index) 225 replaceWithGEP(PreserveArrayIndexCalls, 1, 2); 226 replaceWithGEP(PreserveStructIndexCalls, 0, 1); 227 for (auto Call : PreserveUnionIndexCalls) { 228 Call->replaceAllUsesWith(Call->getArgOperand(0)); 229 Call->eraseFromParent(); 230 } 231 232 return Found; 233 } 234 235 void BPFAbstractMemberAccess::traceAICall(CallInst *Call, uint32_t Kind) { 236 for (User *U : Call->users()) { 237 Instruction *Inst = dyn_cast<Instruction>(U); 238 if (!Inst) 239 continue; 240 241 if (auto *BI = dyn_cast<BitCastInst>(Inst)) { 242 traceBitCast(BI, Call, Kind); 243 } else if (auto *CI = dyn_cast<CallInst>(Inst)) { 244 uint32_t CIKind; 245 if (IsPreserveDIAccessIndexCall(CI, CIKind)) { 246 AIChain[CI] = std::make_pair(Call, Kind); 247 traceAICall(CI, CIKind); 248 } else { 249 BaseAICalls[Call] = Kind; 250 } 251 } else if (auto *GI = dyn_cast<GetElementPtrInst>(Inst)) { 252 if (GI->hasAllZeroIndices()) 253 traceGEP(GI, Call, Kind); 254 else 255 BaseAICalls[Call] = Kind; 256 } 257 } 258 } 259 260 void BPFAbstractMemberAccess::traceBitCast(BitCastInst *BitCast, 261 CallInst *Parent, uint32_t Kind) { 262 for (User *U : BitCast->users()) { 263 Instruction *Inst = dyn_cast<Instruction>(U); 264 if (!Inst) 265 continue; 266 267 if (auto *BI = dyn_cast<BitCastInst>(Inst)) { 268 traceBitCast(BI, Parent, Kind); 269 } else if (auto *CI = dyn_cast<CallInst>(Inst)) { 270 uint32_t CIKind; 271 if (IsPreserveDIAccessIndexCall(CI, CIKind)) { 272 AIChain[CI] = std::make_pair(Parent, Kind); 273 traceAICall(CI, CIKind); 274 } else { 275 BaseAICalls[Parent] = Kind; 276 } 277 } else if (auto *GI = dyn_cast<GetElementPtrInst>(Inst)) { 278 if (GI->hasAllZeroIndices()) 279 traceGEP(GI, Parent, Kind); 280 else 281 BaseAICalls[Parent] = Kind; 282 } 283 } 284 } 285 286 void BPFAbstractMemberAccess::traceGEP(GetElementPtrInst *GEP, CallInst *Parent, 287 uint32_t Kind) { 288 for (User *U : GEP->users()) { 289 Instruction *Inst = dyn_cast<Instruction>(U); 290 if (!Inst) 291 continue; 292 293 if (auto *BI = dyn_cast<BitCastInst>(Inst)) { 294 traceBitCast(BI, Parent, Kind); 295 } else if (auto *CI = dyn_cast<CallInst>(Inst)) { 296 uint32_t CIKind; 297 if (IsPreserveDIAccessIndexCall(CI, CIKind)) { 298 AIChain[CI] = std::make_pair(Parent, Kind); 299 traceAICall(CI, CIKind); 300 } else { 301 BaseAICalls[Parent] = Kind; 302 } 303 } else if (auto *GI = dyn_cast<GetElementPtrInst>(Inst)) { 304 if (GI->hasAllZeroIndices()) 305 traceGEP(GI, Parent, Kind); 306 else 307 BaseAICalls[Parent] = Kind; 308 } 309 } 310 } 311 312 void BPFAbstractMemberAccess::collectAICallChains(Module &M, Function &F) { 313 AIChain.clear(); 314 BaseAICalls.clear(); 315 316 for (auto &BB : F) 317 for (auto &I : BB) { 318 uint32_t Kind; 319 auto *Call = dyn_cast<CallInst>(&I); 320 if (!IsPreserveDIAccessIndexCall(Call, Kind) || 321 AIChain.find(Call) != AIChain.end()) 322 continue; 323 324 traceAICall(Call, Kind); 325 } 326 } 327 328 /// Get access index from the preserve_*_access_index intrinsic calls. 329 bool BPFAbstractMemberAccess::getAccessIndex(const Value *IndexValue, 330 uint64_t &AccessIndex) { 331 const ConstantInt *CV = dyn_cast<ConstantInt>(IndexValue); 332 if (!CV) 333 return false; 334 335 AccessIndex = CV->getValue().getZExtValue(); 336 return true; 337 } 338 339 /// Compute the base of the whole preserve_*_access_index chains, i.e., the base 340 /// pointer of the first preserve_*_access_index call, and construct the access 341 /// string, which will be the name of a global variable. 342 Value *BPFAbstractMemberAccess::computeBaseAndAccessKey(CallInst *Call, 343 std::string &AccessKey, 344 uint32_t Kind, 345 MDNode *&TypeMeta) { 346 Value *Base = nullptr; 347 std::vector<uint64_t> AccessIndices; 348 uint64_t TypeNameIndex = 0; 349 std::string LastTypeName; 350 351 while (Call) { 352 // Base of original corresponding GEP 353 Base = Call->getArgOperand(0); 354 355 // Type Name 356 std::string TypeName; 357 MDNode *MDN; 358 if (Kind == BPFPreserveUnionAI || Kind == BPFPreserveStructAI) { 359 MDN = Call->getMetadata(LLVMContext::MD_preserve_access_index); 360 if (!MDN) 361 return nullptr; 362 363 DIType *Ty = dyn_cast<DIType>(MDN); 364 if (!Ty) 365 return nullptr; 366 367 TypeName = Ty->getName(); 368 } 369 370 // Access Index 371 uint64_t AccessIndex; 372 uint32_t ArgIndex = (Kind == BPFPreserveUnionAI) ? 1 : 2; 373 if (!getAccessIndex(Call->getArgOperand(ArgIndex), AccessIndex)) 374 return nullptr; 375 376 AccessIndices.push_back(AccessIndex); 377 if (TypeName.size()) { 378 TypeNameIndex = AccessIndices.size() - 1; 379 LastTypeName = TypeName; 380 TypeMeta = MDN; 381 } 382 383 Kind = AIChain[Call].second; 384 Call = AIChain[Call].first; 385 } 386 387 // The intial type name is required. 388 // FIXME: if the initial type access is an array index, e.g., 389 // &a[3].b.c, only one dimentional array is supported. 390 if (!LastTypeName.size() || AccessIndices.size() > TypeNameIndex + 2) 391 return nullptr; 392 393 // Construct the type string AccessKey. 394 for (unsigned I = 0; I < AccessIndices.size(); ++I) 395 AccessKey = std::to_string(AccessIndices[I]) + ":" + AccessKey; 396 397 if (TypeNameIndex == AccessIndices.size() - 1) 398 AccessKey = "0:" + AccessKey; 399 400 // Access key is the type name + access string, uniquely identifying 401 // one kernel memory access. 402 AccessKey = LastTypeName + ":" + AccessKey; 403 404 return Base; 405 } 406 407 /// Call/Kind is the base preserve_*_access_index() call. Attempts to do 408 /// transformation to a chain of relocable GEPs. 409 bool BPFAbstractMemberAccess::transformGEPChain(Module &M, CallInst *Call, 410 uint32_t Kind) { 411 std::string AccessKey; 412 MDNode *TypeMeta = nullptr; 413 Value *Base = 414 computeBaseAndAccessKey(Call, AccessKey, Kind, TypeMeta); 415 if (!Base) 416 return false; 417 418 // Do the transformation 419 // For any original GEP Call and Base %2 like 420 // %4 = bitcast %struct.net_device** %dev1 to i64* 421 // it is transformed to: 422 // %6 = load __BTF_0:sk_buff:0:0:2:0: 423 // %7 = bitcast %struct.sk_buff* %2 to i8* 424 // %8 = getelementptr i8, i8* %7, %6 425 // %9 = bitcast i8* %8 to i64* 426 // using %9 instead of %4 427 // The original Call inst is removed. 428 BasicBlock *BB = Call->getParent(); 429 GlobalVariable *GV; 430 431 if (GEPGlobals.find(AccessKey) == GEPGlobals.end()) { 432 GV = new GlobalVariable(M, Type::getInt64Ty(BB->getContext()), false, 433 GlobalVariable::ExternalLinkage, NULL, AccessKey); 434 GV->addAttribute(BPFCoreSharedInfo::AmaAttr); 435 // Set the metadata (debuginfo types) for the global. 436 if (TypeMeta) 437 GV->setMetadata(LLVMContext::MD_preserve_access_index, TypeMeta); 438 GEPGlobals[AccessKey] = GV; 439 } else { 440 GV = GEPGlobals[AccessKey]; 441 } 442 443 // Load the global variable. 444 auto *LDInst = new LoadInst(Type::getInt64Ty(BB->getContext()), GV); 445 BB->getInstList().insert(Call->getIterator(), LDInst); 446 447 // Generate a BitCast 448 auto *BCInst = new BitCastInst(Base, Type::getInt8PtrTy(BB->getContext())); 449 BB->getInstList().insert(Call->getIterator(), BCInst); 450 451 // Generate a GetElementPtr 452 auto *GEP = GetElementPtrInst::Create(Type::getInt8Ty(BB->getContext()), 453 BCInst, LDInst); 454 BB->getInstList().insert(Call->getIterator(), GEP); 455 456 // Generate a BitCast 457 auto *BCInst2 = new BitCastInst(GEP, Call->getType()); 458 BB->getInstList().insert(Call->getIterator(), BCInst2); 459 460 Call->replaceAllUsesWith(BCInst2); 461 Call->eraseFromParent(); 462 463 return true; 464 } 465 466 bool BPFAbstractMemberAccess::doTransformation(Module &M) { 467 bool Transformed = false; 468 469 for (Function &F : M) { 470 // Collect PreserveDIAccessIndex Intrinsic call chains. 471 // The call chains will be used to generate the access 472 // patterns similar to GEP. 473 collectAICallChains(M, F); 474 475 for (auto &C : BaseAICalls) 476 Transformed = transformGEPChain(M, C.first, C.second) || Transformed; 477 } 478 479 return removePreserveAccessIndexIntrinsic(M) || Transformed; 480 } 481