xref: /freebsd-src/contrib/llvm-project/llvm/lib/Target/BPF/BPFCheckAndAdjustIR.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
1e8d8bef9SDimitry Andric //===------------ BPFCheckAndAdjustIR.cpp - Check and Adjust IR -----------===//
2e8d8bef9SDimitry Andric //
3e8d8bef9SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4e8d8bef9SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5e8d8bef9SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6e8d8bef9SDimitry Andric //
7e8d8bef9SDimitry Andric //===----------------------------------------------------------------------===//
8e8d8bef9SDimitry Andric //
9e8d8bef9SDimitry Andric // Check IR and adjust IR for verifier friendly codes.
10e8d8bef9SDimitry Andric // The following are done for IR checking:
11e8d8bef9SDimitry Andric //   - no relocation globals in PHI node.
12e8d8bef9SDimitry Andric // The following are done for IR adjustment:
13e8d8bef9SDimitry Andric //   - remove __builtin_bpf_passthrough builtins. Target independent IR
14e8d8bef9SDimitry Andric //     optimizations are done and those builtins can be removed.
155f757f3fSDimitry Andric //   - remove llvm.bpf.getelementptr.and.load builtins.
165f757f3fSDimitry Andric //   - remove llvm.bpf.getelementptr.and.store builtins.
17*0fca6ea1SDimitry Andric //   - for loads and stores with base addresses from non-zero address space
18*0fca6ea1SDimitry Andric //     cast base address to zero address space (support for BPF address spaces).
19e8d8bef9SDimitry Andric //
20e8d8bef9SDimitry Andric //===----------------------------------------------------------------------===//
21e8d8bef9SDimitry Andric 
22e8d8bef9SDimitry Andric #include "BPF.h"
23e8d8bef9SDimitry Andric #include "BPFCORE.h"
24e8d8bef9SDimitry Andric #include "BPFTargetMachine.h"
2506c3fb27SDimitry Andric #include "llvm/Analysis/LoopInfo.h"
26e8d8bef9SDimitry Andric #include "llvm/IR/DebugInfoMetadata.h"
27e8d8bef9SDimitry Andric #include "llvm/IR/GlobalVariable.h"
2806c3fb27SDimitry Andric #include "llvm/IR/IRBuilder.h"
29e8d8bef9SDimitry Andric #include "llvm/IR/Instruction.h"
30e8d8bef9SDimitry Andric #include "llvm/IR/Instructions.h"
315f757f3fSDimitry Andric #include "llvm/IR/IntrinsicsBPF.h"
32e8d8bef9SDimitry Andric #include "llvm/IR/Module.h"
33e8d8bef9SDimitry Andric #include "llvm/IR/Type.h"
34e8d8bef9SDimitry Andric #include "llvm/IR/User.h"
35e8d8bef9SDimitry Andric #include "llvm/IR/Value.h"
36e8d8bef9SDimitry Andric #include "llvm/Pass.h"
37e8d8bef9SDimitry Andric #include "llvm/Transforms/Utils/BasicBlockUtils.h"
38e8d8bef9SDimitry Andric 
39e8d8bef9SDimitry Andric #define DEBUG_TYPE "bpf-check-and-opt-ir"
40e8d8bef9SDimitry Andric 
41e8d8bef9SDimitry Andric using namespace llvm;
42e8d8bef9SDimitry Andric 
43e8d8bef9SDimitry Andric namespace {
44e8d8bef9SDimitry Andric 
45e8d8bef9SDimitry Andric class BPFCheckAndAdjustIR final : public ModulePass {
46e8d8bef9SDimitry Andric   bool runOnModule(Module &F) override;
47e8d8bef9SDimitry Andric 
48e8d8bef9SDimitry Andric public:
49e8d8bef9SDimitry Andric   static char ID;
50e8d8bef9SDimitry Andric   BPFCheckAndAdjustIR() : ModulePass(ID) {}
5106c3fb27SDimitry Andric   virtual void getAnalysisUsage(AnalysisUsage &AU) const override;
52e8d8bef9SDimitry Andric 
53e8d8bef9SDimitry Andric private:
54e8d8bef9SDimitry Andric   void checkIR(Module &M);
55e8d8bef9SDimitry Andric   bool adjustIR(Module &M);
56e8d8bef9SDimitry Andric   bool removePassThroughBuiltin(Module &M);
57349cc55cSDimitry Andric   bool removeCompareBuiltin(Module &M);
5806c3fb27SDimitry Andric   bool sinkMinMax(Module &M);
595f757f3fSDimitry Andric   bool removeGEPBuiltins(Module &M);
60*0fca6ea1SDimitry Andric   bool insertASpaceCasts(Module &M);
61e8d8bef9SDimitry Andric };
62e8d8bef9SDimitry Andric } // End anonymous namespace
63e8d8bef9SDimitry Andric 
64e8d8bef9SDimitry Andric char BPFCheckAndAdjustIR::ID = 0;
65e8d8bef9SDimitry Andric INITIALIZE_PASS(BPFCheckAndAdjustIR, DEBUG_TYPE, "BPF Check And Adjust IR",
66e8d8bef9SDimitry Andric                 false, false)
67e8d8bef9SDimitry Andric 
68e8d8bef9SDimitry Andric ModulePass *llvm::createBPFCheckAndAdjustIR() {
69e8d8bef9SDimitry Andric   return new BPFCheckAndAdjustIR();
70e8d8bef9SDimitry Andric }
71e8d8bef9SDimitry Andric 
72e8d8bef9SDimitry Andric void BPFCheckAndAdjustIR::checkIR(Module &M) {
73e8d8bef9SDimitry Andric   // Ensure relocation global won't appear in PHI node
74e8d8bef9SDimitry Andric   // This may happen if the compiler generated the following code:
75e8d8bef9SDimitry Andric   //   B1:
76e8d8bef9SDimitry Andric   //      g1 = @llvm.skb_buff:0:1...
77e8d8bef9SDimitry Andric   //      ...
78e8d8bef9SDimitry Andric   //      goto B_COMMON
79e8d8bef9SDimitry Andric   //   B2:
80e8d8bef9SDimitry Andric   //      g2 = @llvm.skb_buff:0:2...
81e8d8bef9SDimitry Andric   //      ...
82e8d8bef9SDimitry Andric   //      goto B_COMMON
83e8d8bef9SDimitry Andric   //   B_COMMON:
84e8d8bef9SDimitry Andric   //      g = PHI(g1, g2)
85e8d8bef9SDimitry Andric   //      x = load g
86e8d8bef9SDimitry Andric   //      ...
87e8d8bef9SDimitry Andric   // If anything likes the above "g = PHI(g1, g2)", issue a fatal error.
88e8d8bef9SDimitry Andric   for (Function &F : M)
89e8d8bef9SDimitry Andric     for (auto &BB : F)
90e8d8bef9SDimitry Andric       for (auto &I : BB) {
91e8d8bef9SDimitry Andric         PHINode *PN = dyn_cast<PHINode>(&I);
92e8d8bef9SDimitry Andric         if (!PN || PN->use_empty())
93e8d8bef9SDimitry Andric           continue;
94e8d8bef9SDimitry Andric         for (int i = 0, e = PN->getNumIncomingValues(); i < e; ++i) {
95e8d8bef9SDimitry Andric           auto *GV = dyn_cast<GlobalVariable>(PN->getIncomingValue(i));
96e8d8bef9SDimitry Andric           if (!GV)
97e8d8bef9SDimitry Andric             continue;
98e8d8bef9SDimitry Andric           if (GV->hasAttribute(BPFCoreSharedInfo::AmaAttr) ||
99e8d8bef9SDimitry Andric               GV->hasAttribute(BPFCoreSharedInfo::TypeIdAttr))
100e8d8bef9SDimitry Andric             report_fatal_error("relocation global in PHI node");
101e8d8bef9SDimitry Andric         }
102e8d8bef9SDimitry Andric       }
103e8d8bef9SDimitry Andric }
104e8d8bef9SDimitry Andric 
105e8d8bef9SDimitry Andric bool BPFCheckAndAdjustIR::removePassThroughBuiltin(Module &M) {
106e8d8bef9SDimitry Andric   // Remove __builtin_bpf_passthrough()'s which are used to prevent
107e8d8bef9SDimitry Andric   // certain IR optimizations. Now major IR optimizations are done,
108e8d8bef9SDimitry Andric   // remove them.
109e8d8bef9SDimitry Andric   bool Changed = false;
110e8d8bef9SDimitry Andric   CallInst *ToBeDeleted = nullptr;
111e8d8bef9SDimitry Andric   for (Function &F : M)
112e8d8bef9SDimitry Andric     for (auto &BB : F)
113e8d8bef9SDimitry Andric       for (auto &I : BB) {
114e8d8bef9SDimitry Andric         if (ToBeDeleted) {
115e8d8bef9SDimitry Andric           ToBeDeleted->eraseFromParent();
116e8d8bef9SDimitry Andric           ToBeDeleted = nullptr;
117e8d8bef9SDimitry Andric         }
118e8d8bef9SDimitry Andric 
119e8d8bef9SDimitry Andric         auto *Call = dyn_cast<CallInst>(&I);
120e8d8bef9SDimitry Andric         if (!Call)
121e8d8bef9SDimitry Andric           continue;
122e8d8bef9SDimitry Andric         auto *GV = dyn_cast<GlobalValue>(Call->getCalledOperand());
123e8d8bef9SDimitry Andric         if (!GV)
124e8d8bef9SDimitry Andric           continue;
1255f757f3fSDimitry Andric         if (!GV->getName().starts_with("llvm.bpf.passthrough"))
126e8d8bef9SDimitry Andric           continue;
127e8d8bef9SDimitry Andric         Changed = true;
128e8d8bef9SDimitry Andric         Value *Arg = Call->getArgOperand(1);
129e8d8bef9SDimitry Andric         Call->replaceAllUsesWith(Arg);
130e8d8bef9SDimitry Andric         ToBeDeleted = Call;
131e8d8bef9SDimitry Andric       }
132e8d8bef9SDimitry Andric   return Changed;
133e8d8bef9SDimitry Andric }
134e8d8bef9SDimitry Andric 
135349cc55cSDimitry Andric bool BPFCheckAndAdjustIR::removeCompareBuiltin(Module &M) {
136349cc55cSDimitry Andric   // Remove __builtin_bpf_compare()'s which are used to prevent
137349cc55cSDimitry Andric   // certain IR optimizations. Now major IR optimizations are done,
138349cc55cSDimitry Andric   // remove them.
139349cc55cSDimitry Andric   bool Changed = false;
140349cc55cSDimitry Andric   CallInst *ToBeDeleted = nullptr;
141349cc55cSDimitry Andric   for (Function &F : M)
142349cc55cSDimitry Andric     for (auto &BB : F)
143349cc55cSDimitry Andric       for (auto &I : BB) {
144349cc55cSDimitry Andric         if (ToBeDeleted) {
145349cc55cSDimitry Andric           ToBeDeleted->eraseFromParent();
146349cc55cSDimitry Andric           ToBeDeleted = nullptr;
147349cc55cSDimitry Andric         }
148349cc55cSDimitry Andric 
149349cc55cSDimitry Andric         auto *Call = dyn_cast<CallInst>(&I);
150349cc55cSDimitry Andric         if (!Call)
151349cc55cSDimitry Andric           continue;
152349cc55cSDimitry Andric         auto *GV = dyn_cast<GlobalValue>(Call->getCalledOperand());
153349cc55cSDimitry Andric         if (!GV)
154349cc55cSDimitry Andric           continue;
1555f757f3fSDimitry Andric         if (!GV->getName().starts_with("llvm.bpf.compare"))
156349cc55cSDimitry Andric           continue;
157349cc55cSDimitry Andric 
158349cc55cSDimitry Andric         Changed = true;
159349cc55cSDimitry Andric         Value *Arg0 = Call->getArgOperand(0);
160349cc55cSDimitry Andric         Value *Arg1 = Call->getArgOperand(1);
161349cc55cSDimitry Andric         Value *Arg2 = Call->getArgOperand(2);
162349cc55cSDimitry Andric 
163349cc55cSDimitry Andric         auto OpVal = cast<ConstantInt>(Arg0)->getValue().getZExtValue();
164349cc55cSDimitry Andric         CmpInst::Predicate Opcode = (CmpInst::Predicate)OpVal;
165349cc55cSDimitry Andric 
166349cc55cSDimitry Andric         auto *ICmp = new ICmpInst(Opcode, Arg1, Arg2);
167bdd1243dSDimitry Andric         ICmp->insertBefore(Call);
168349cc55cSDimitry Andric 
169349cc55cSDimitry Andric         Call->replaceAllUsesWith(ICmp);
170349cc55cSDimitry Andric         ToBeDeleted = Call;
171349cc55cSDimitry Andric       }
172349cc55cSDimitry Andric   return Changed;
173349cc55cSDimitry Andric }
174349cc55cSDimitry Andric 
17506c3fb27SDimitry Andric struct MinMaxSinkInfo {
17606c3fb27SDimitry Andric   ICmpInst *ICmp;
17706c3fb27SDimitry Andric   Value *Other;
17806c3fb27SDimitry Andric   ICmpInst::Predicate Predicate;
17906c3fb27SDimitry Andric   CallInst *MinMax;
18006c3fb27SDimitry Andric   ZExtInst *ZExt;
18106c3fb27SDimitry Andric   SExtInst *SExt;
18206c3fb27SDimitry Andric 
18306c3fb27SDimitry Andric   MinMaxSinkInfo(ICmpInst *ICmp, Value *Other, ICmpInst::Predicate Predicate)
18406c3fb27SDimitry Andric       : ICmp(ICmp), Other(Other), Predicate(Predicate), MinMax(nullptr),
18506c3fb27SDimitry Andric         ZExt(nullptr), SExt(nullptr) {}
18606c3fb27SDimitry Andric };
18706c3fb27SDimitry Andric 
18806c3fb27SDimitry Andric static bool sinkMinMaxInBB(BasicBlock &BB,
18906c3fb27SDimitry Andric                            const std::function<bool(Instruction *)> &Filter) {
19006c3fb27SDimitry Andric   // Check if V is:
19106c3fb27SDimitry Andric   //   (fn %a %b) or (ext (fn %a %b))
19206c3fb27SDimitry Andric   // Where:
19306c3fb27SDimitry Andric   //   ext := sext | zext
19406c3fb27SDimitry Andric   //   fn  := smin | umin | smax | umax
19506c3fb27SDimitry Andric   auto IsMinMaxCall = [=](Value *V, MinMaxSinkInfo &Info) {
19606c3fb27SDimitry Andric     if (auto *ZExt = dyn_cast<ZExtInst>(V)) {
19706c3fb27SDimitry Andric       V = ZExt->getOperand(0);
19806c3fb27SDimitry Andric       Info.ZExt = ZExt;
19906c3fb27SDimitry Andric     } else if (auto *SExt = dyn_cast<SExtInst>(V)) {
20006c3fb27SDimitry Andric       V = SExt->getOperand(0);
20106c3fb27SDimitry Andric       Info.SExt = SExt;
20206c3fb27SDimitry Andric     }
20306c3fb27SDimitry Andric 
20406c3fb27SDimitry Andric     auto *Call = dyn_cast<CallInst>(V);
20506c3fb27SDimitry Andric     if (!Call)
20606c3fb27SDimitry Andric       return false;
20706c3fb27SDimitry Andric 
20806c3fb27SDimitry Andric     auto *Called = dyn_cast<Function>(Call->getCalledOperand());
20906c3fb27SDimitry Andric     if (!Called)
21006c3fb27SDimitry Andric       return false;
21106c3fb27SDimitry Andric 
21206c3fb27SDimitry Andric     switch (Called->getIntrinsicID()) {
21306c3fb27SDimitry Andric     case Intrinsic::smin:
21406c3fb27SDimitry Andric     case Intrinsic::umin:
21506c3fb27SDimitry Andric     case Intrinsic::smax:
21606c3fb27SDimitry Andric     case Intrinsic::umax:
21706c3fb27SDimitry Andric       break;
21806c3fb27SDimitry Andric     default:
21906c3fb27SDimitry Andric       return false;
22006c3fb27SDimitry Andric     }
22106c3fb27SDimitry Andric 
22206c3fb27SDimitry Andric     if (!Filter(Call))
22306c3fb27SDimitry Andric       return false;
22406c3fb27SDimitry Andric 
22506c3fb27SDimitry Andric     Info.MinMax = Call;
22606c3fb27SDimitry Andric 
22706c3fb27SDimitry Andric     return true;
22806c3fb27SDimitry Andric   };
22906c3fb27SDimitry Andric 
23006c3fb27SDimitry Andric   auto ZeroOrSignExtend = [](IRBuilder<> &Builder, Value *V,
23106c3fb27SDimitry Andric                              MinMaxSinkInfo &Info) {
23206c3fb27SDimitry Andric     if (Info.SExt) {
23306c3fb27SDimitry Andric       if (Info.SExt->getType() == V->getType())
23406c3fb27SDimitry Andric         return V;
23506c3fb27SDimitry Andric       return Builder.CreateSExt(V, Info.SExt->getType());
23606c3fb27SDimitry Andric     }
23706c3fb27SDimitry Andric     if (Info.ZExt) {
23806c3fb27SDimitry Andric       if (Info.ZExt->getType() == V->getType())
23906c3fb27SDimitry Andric         return V;
24006c3fb27SDimitry Andric       return Builder.CreateZExt(V, Info.ZExt->getType());
24106c3fb27SDimitry Andric     }
24206c3fb27SDimitry Andric     return V;
24306c3fb27SDimitry Andric   };
24406c3fb27SDimitry Andric 
24506c3fb27SDimitry Andric   bool Changed = false;
24606c3fb27SDimitry Andric   SmallVector<MinMaxSinkInfo, 2> SinkList;
24706c3fb27SDimitry Andric 
24806c3fb27SDimitry Andric   // Check BB for instructions like:
24906c3fb27SDimitry Andric   //   insn := (icmp %a (fn ...)) | (icmp (fn ...)  %a)
25006c3fb27SDimitry Andric   //
25106c3fb27SDimitry Andric   // Where:
25206c3fb27SDimitry Andric   //   fn := min | max | (sext (min ...)) | (sext (max ...))
25306c3fb27SDimitry Andric   //
25406c3fb27SDimitry Andric   // Put such instructions to SinkList.
25506c3fb27SDimitry Andric   for (Instruction &I : BB) {
25606c3fb27SDimitry Andric     ICmpInst *ICmp = dyn_cast<ICmpInst>(&I);
25706c3fb27SDimitry Andric     if (!ICmp)
25806c3fb27SDimitry Andric       continue;
25906c3fb27SDimitry Andric     if (!ICmp->isRelational())
26006c3fb27SDimitry Andric       continue;
26106c3fb27SDimitry Andric     MinMaxSinkInfo First(ICmp, ICmp->getOperand(1),
26206c3fb27SDimitry Andric                          ICmpInst::getSwappedPredicate(ICmp->getPredicate()));
26306c3fb27SDimitry Andric     MinMaxSinkInfo Second(ICmp, ICmp->getOperand(0), ICmp->getPredicate());
26406c3fb27SDimitry Andric     bool FirstMinMax = IsMinMaxCall(ICmp->getOperand(0), First);
26506c3fb27SDimitry Andric     bool SecondMinMax = IsMinMaxCall(ICmp->getOperand(1), Second);
26606c3fb27SDimitry Andric     if (!(FirstMinMax ^ SecondMinMax))
26706c3fb27SDimitry Andric       continue;
26806c3fb27SDimitry Andric     SinkList.push_back(FirstMinMax ? First : Second);
26906c3fb27SDimitry Andric   }
27006c3fb27SDimitry Andric 
27106c3fb27SDimitry Andric   // Iterate SinkList and replace each (icmp ...) with corresponding
27206c3fb27SDimitry Andric   // `x < a && x < b` or similar expression.
27306c3fb27SDimitry Andric   for (auto &Info : SinkList) {
27406c3fb27SDimitry Andric     ICmpInst *ICmp = Info.ICmp;
27506c3fb27SDimitry Andric     CallInst *MinMax = Info.MinMax;
27606c3fb27SDimitry Andric     Intrinsic::ID IID = MinMax->getCalledFunction()->getIntrinsicID();
27706c3fb27SDimitry Andric     ICmpInst::Predicate P = Info.Predicate;
27806c3fb27SDimitry Andric     if (ICmpInst::isSigned(P) && IID != Intrinsic::smin &&
27906c3fb27SDimitry Andric         IID != Intrinsic::smax)
28006c3fb27SDimitry Andric       continue;
28106c3fb27SDimitry Andric 
28206c3fb27SDimitry Andric     IRBuilder<> Builder(ICmp);
28306c3fb27SDimitry Andric     Value *X = Info.Other;
28406c3fb27SDimitry Andric     Value *A = ZeroOrSignExtend(Builder, MinMax->getArgOperand(0), Info);
28506c3fb27SDimitry Andric     Value *B = ZeroOrSignExtend(Builder, MinMax->getArgOperand(1), Info);
28606c3fb27SDimitry Andric     bool IsMin = IID == Intrinsic::smin || IID == Intrinsic::umin;
28706c3fb27SDimitry Andric     bool IsMax = IID == Intrinsic::smax || IID == Intrinsic::umax;
28806c3fb27SDimitry Andric     bool IsLess = ICmpInst::isLE(P) || ICmpInst::isLT(P);
28906c3fb27SDimitry Andric     bool IsGreater = ICmpInst::isGE(P) || ICmpInst::isGT(P);
29006c3fb27SDimitry Andric     assert(IsMin ^ IsMax);
29106c3fb27SDimitry Andric     assert(IsLess ^ IsGreater);
29206c3fb27SDimitry Andric 
29306c3fb27SDimitry Andric     Value *Replacement;
29406c3fb27SDimitry Andric     Value *LHS = Builder.CreateICmp(P, X, A);
29506c3fb27SDimitry Andric     Value *RHS = Builder.CreateICmp(P, X, B);
29606c3fb27SDimitry Andric     if ((IsLess && IsMin) || (IsGreater && IsMax))
29706c3fb27SDimitry Andric       // x < min(a, b) -> x < a && x < b
29806c3fb27SDimitry Andric       // x > max(a, b) -> x > a && x > b
29906c3fb27SDimitry Andric       Replacement = Builder.CreateLogicalAnd(LHS, RHS);
30006c3fb27SDimitry Andric     else
30106c3fb27SDimitry Andric       // x > min(a, b) -> x > a || x > b
30206c3fb27SDimitry Andric       // x < max(a, b) -> x < a || x < b
30306c3fb27SDimitry Andric       Replacement = Builder.CreateLogicalOr(LHS, RHS);
30406c3fb27SDimitry Andric 
30506c3fb27SDimitry Andric     ICmp->replaceAllUsesWith(Replacement);
30606c3fb27SDimitry Andric 
30706c3fb27SDimitry Andric     Instruction *ToRemove[] = {ICmp, Info.ZExt, Info.SExt, MinMax};
30806c3fb27SDimitry Andric     for (Instruction *I : ToRemove)
30906c3fb27SDimitry Andric       if (I && I->use_empty())
31006c3fb27SDimitry Andric         I->eraseFromParent();
31106c3fb27SDimitry Andric 
31206c3fb27SDimitry Andric     Changed = true;
31306c3fb27SDimitry Andric   }
31406c3fb27SDimitry Andric 
31506c3fb27SDimitry Andric   return Changed;
31606c3fb27SDimitry Andric }
31706c3fb27SDimitry Andric 
31806c3fb27SDimitry Andric // Do the following transformation:
31906c3fb27SDimitry Andric //
32006c3fb27SDimitry Andric //   x < min(a, b) -> x < a && x < b
32106c3fb27SDimitry Andric //   x > min(a, b) -> x > a || x > b
32206c3fb27SDimitry Andric //   x < max(a, b) -> x < a || x < b
32306c3fb27SDimitry Andric //   x > max(a, b) -> x > a && x > b
32406c3fb27SDimitry Andric //
32506c3fb27SDimitry Andric // Such patterns are introduced by LICM.cpp:hoistMinMax()
32606c3fb27SDimitry Andric // transformation and might lead to BPF verification failures for
32706c3fb27SDimitry Andric // older kernels.
32806c3fb27SDimitry Andric //
32906c3fb27SDimitry Andric // To minimize "collateral" changes only do it for icmp + min/max
33006c3fb27SDimitry Andric // calls when icmp is inside a loop and min/max is outside of that
33106c3fb27SDimitry Andric // loop.
33206c3fb27SDimitry Andric //
33306c3fb27SDimitry Andric // Verification failure happens when:
33406c3fb27SDimitry Andric // - RHS operand of some `icmp LHS, RHS` is replaced by some RHS1;
33506c3fb27SDimitry Andric // - verifier can recognize RHS as a constant scalar in some context;
33606c3fb27SDimitry Andric // - verifier can't recognize RHS1 as a constant scalar in the same
33706c3fb27SDimitry Andric //   context;
33806c3fb27SDimitry Andric //
33906c3fb27SDimitry Andric // The "constant scalar" is not a compile time constant, but a register
34006c3fb27SDimitry Andric // that holds a scalar value known to verifier at some point in time
34106c3fb27SDimitry Andric // during abstract interpretation.
34206c3fb27SDimitry Andric //
34306c3fb27SDimitry Andric // See also:
34406c3fb27SDimitry Andric //   https://lore.kernel.org/bpf/20230406164505.1046801-1-yhs@fb.com/
34506c3fb27SDimitry Andric bool BPFCheckAndAdjustIR::sinkMinMax(Module &M) {
34606c3fb27SDimitry Andric   bool Changed = false;
34706c3fb27SDimitry Andric 
34806c3fb27SDimitry Andric   for (Function &F : M) {
34906c3fb27SDimitry Andric     if (F.isDeclaration())
35006c3fb27SDimitry Andric       continue;
35106c3fb27SDimitry Andric 
35206c3fb27SDimitry Andric     LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>(F).getLoopInfo();
35306c3fb27SDimitry Andric     for (Loop *L : LI)
35406c3fb27SDimitry Andric       for (BasicBlock *BB : L->blocks()) {
35506c3fb27SDimitry Andric         // Filter out instructions coming from the same loop
35606c3fb27SDimitry Andric         Loop *BBLoop = LI.getLoopFor(BB);
35706c3fb27SDimitry Andric         auto OtherLoopFilter = [&](Instruction *I) {
35806c3fb27SDimitry Andric           return LI.getLoopFor(I->getParent()) != BBLoop;
35906c3fb27SDimitry Andric         };
36006c3fb27SDimitry Andric         Changed |= sinkMinMaxInBB(*BB, OtherLoopFilter);
36106c3fb27SDimitry Andric       }
36206c3fb27SDimitry Andric   }
36306c3fb27SDimitry Andric 
36406c3fb27SDimitry Andric   return Changed;
36506c3fb27SDimitry Andric }
36606c3fb27SDimitry Andric 
36706c3fb27SDimitry Andric void BPFCheckAndAdjustIR::getAnalysisUsage(AnalysisUsage &AU) const {
36806c3fb27SDimitry Andric   AU.addRequired<LoopInfoWrapperPass>();
36906c3fb27SDimitry Andric }
37006c3fb27SDimitry Andric 
3715f757f3fSDimitry Andric static void unrollGEPLoad(CallInst *Call) {
3725f757f3fSDimitry Andric   auto [GEP, Load] = BPFPreserveStaticOffsetPass::reconstructLoad(Call);
3735f757f3fSDimitry Andric   GEP->insertBefore(Call);
3745f757f3fSDimitry Andric   Load->insertBefore(Call);
3755f757f3fSDimitry Andric   Call->replaceAllUsesWith(Load);
3765f757f3fSDimitry Andric   Call->eraseFromParent();
3775f757f3fSDimitry Andric }
3785f757f3fSDimitry Andric 
3795f757f3fSDimitry Andric static void unrollGEPStore(CallInst *Call) {
3805f757f3fSDimitry Andric   auto [GEP, Store] = BPFPreserveStaticOffsetPass::reconstructStore(Call);
3815f757f3fSDimitry Andric   GEP->insertBefore(Call);
3825f757f3fSDimitry Andric   Store->insertBefore(Call);
3835f757f3fSDimitry Andric   Call->eraseFromParent();
3845f757f3fSDimitry Andric }
3855f757f3fSDimitry Andric 
3865f757f3fSDimitry Andric static bool removeGEPBuiltinsInFunc(Function &F) {
3875f757f3fSDimitry Andric   SmallVector<CallInst *> GEPLoads;
3885f757f3fSDimitry Andric   SmallVector<CallInst *> GEPStores;
3895f757f3fSDimitry Andric   for (auto &BB : F)
3905f757f3fSDimitry Andric     for (auto &Insn : BB)
3915f757f3fSDimitry Andric       if (auto *Call = dyn_cast<CallInst>(&Insn))
3925f757f3fSDimitry Andric         if (auto *Called = Call->getCalledFunction())
3935f757f3fSDimitry Andric           switch (Called->getIntrinsicID()) {
3945f757f3fSDimitry Andric           case Intrinsic::bpf_getelementptr_and_load:
3955f757f3fSDimitry Andric             GEPLoads.push_back(Call);
3965f757f3fSDimitry Andric             break;
3975f757f3fSDimitry Andric           case Intrinsic::bpf_getelementptr_and_store:
3985f757f3fSDimitry Andric             GEPStores.push_back(Call);
3995f757f3fSDimitry Andric             break;
4005f757f3fSDimitry Andric           }
4015f757f3fSDimitry Andric 
4025f757f3fSDimitry Andric   if (GEPLoads.empty() && GEPStores.empty())
4035f757f3fSDimitry Andric     return false;
4045f757f3fSDimitry Andric 
4055f757f3fSDimitry Andric   for_each(GEPLoads, unrollGEPLoad);
4065f757f3fSDimitry Andric   for_each(GEPStores, unrollGEPStore);
4075f757f3fSDimitry Andric 
4085f757f3fSDimitry Andric   return true;
4095f757f3fSDimitry Andric }
4105f757f3fSDimitry Andric 
4115f757f3fSDimitry Andric // Rewrites the following builtins:
4125f757f3fSDimitry Andric // - llvm.bpf.getelementptr.and.load
4135f757f3fSDimitry Andric // - llvm.bpf.getelementptr.and.store
4145f757f3fSDimitry Andric // As (load (getelementptr ...)) or (store (getelementptr ...)).
4155f757f3fSDimitry Andric bool BPFCheckAndAdjustIR::removeGEPBuiltins(Module &M) {
4165f757f3fSDimitry Andric   bool Changed = false;
4175f757f3fSDimitry Andric   for (auto &F : M)
4185f757f3fSDimitry Andric     Changed = removeGEPBuiltinsInFunc(F) || Changed;
4195f757f3fSDimitry Andric   return Changed;
4205f757f3fSDimitry Andric }
4215f757f3fSDimitry Andric 
422*0fca6ea1SDimitry Andric // Wrap ToWrap with cast to address space zero:
423*0fca6ea1SDimitry Andric // - if ToWrap is a getelementptr,
424*0fca6ea1SDimitry Andric //   wrap it's base pointer instead and return a copy;
425*0fca6ea1SDimitry Andric // - if ToWrap is Instruction, insert address space cast
426*0fca6ea1SDimitry Andric //   immediately after ToWrap;
427*0fca6ea1SDimitry Andric // - if ToWrap is not an Instruction (function parameter
428*0fca6ea1SDimitry Andric //   or a global value), insert address space cast at the
429*0fca6ea1SDimitry Andric //   beginning of the Function F;
430*0fca6ea1SDimitry Andric // - use Cache to avoid inserting too many casts;
431*0fca6ea1SDimitry Andric static Value *aspaceWrapValue(DenseMap<Value *, Value *> &Cache, Function *F,
432*0fca6ea1SDimitry Andric                               Value *ToWrap) {
433*0fca6ea1SDimitry Andric   auto It = Cache.find(ToWrap);
434*0fca6ea1SDimitry Andric   if (It != Cache.end())
435*0fca6ea1SDimitry Andric     return It->getSecond();
436*0fca6ea1SDimitry Andric 
437*0fca6ea1SDimitry Andric   if (auto *GEP = dyn_cast<GetElementPtrInst>(ToWrap)) {
438*0fca6ea1SDimitry Andric     Value *Ptr = GEP->getPointerOperand();
439*0fca6ea1SDimitry Andric     Value *WrappedPtr = aspaceWrapValue(Cache, F, Ptr);
440*0fca6ea1SDimitry Andric     auto *GEPTy = cast<PointerType>(GEP->getType());
441*0fca6ea1SDimitry Andric     auto *NewGEP = GEP->clone();
442*0fca6ea1SDimitry Andric     NewGEP->insertAfter(GEP);
443*0fca6ea1SDimitry Andric     NewGEP->mutateType(GEPTy->getPointerTo(0));
444*0fca6ea1SDimitry Andric     NewGEP->setOperand(GEP->getPointerOperandIndex(), WrappedPtr);
445*0fca6ea1SDimitry Andric     NewGEP->setName(GEP->getName());
446*0fca6ea1SDimitry Andric     Cache[ToWrap] = NewGEP;
447*0fca6ea1SDimitry Andric     return NewGEP;
448*0fca6ea1SDimitry Andric   }
449*0fca6ea1SDimitry Andric 
450*0fca6ea1SDimitry Andric   IRBuilder IB(F->getContext());
451*0fca6ea1SDimitry Andric   if (Instruction *InsnPtr = dyn_cast<Instruction>(ToWrap))
452*0fca6ea1SDimitry Andric     IB.SetInsertPoint(*InsnPtr->getInsertionPointAfterDef());
453*0fca6ea1SDimitry Andric   else
454*0fca6ea1SDimitry Andric     IB.SetInsertPoint(F->getEntryBlock().getFirstInsertionPt());
455*0fca6ea1SDimitry Andric   auto *PtrTy = cast<PointerType>(ToWrap->getType());
456*0fca6ea1SDimitry Andric   auto *ASZeroPtrTy = PtrTy->getPointerTo(0);
457*0fca6ea1SDimitry Andric   auto *ACast = IB.CreateAddrSpaceCast(ToWrap, ASZeroPtrTy, ToWrap->getName());
458*0fca6ea1SDimitry Andric   Cache[ToWrap] = ACast;
459*0fca6ea1SDimitry Andric   return ACast;
460*0fca6ea1SDimitry Andric }
461*0fca6ea1SDimitry Andric 
462*0fca6ea1SDimitry Andric // Wrap a pointer operand OpNum of instruction I
463*0fca6ea1SDimitry Andric // with cast to address space zero
464*0fca6ea1SDimitry Andric static void aspaceWrapOperand(DenseMap<Value *, Value *> &Cache, Instruction *I,
465*0fca6ea1SDimitry Andric                               unsigned OpNum) {
466*0fca6ea1SDimitry Andric   Value *OldOp = I->getOperand(OpNum);
467*0fca6ea1SDimitry Andric   if (OldOp->getType()->getPointerAddressSpace() == 0)
468*0fca6ea1SDimitry Andric     return;
469*0fca6ea1SDimitry Andric 
470*0fca6ea1SDimitry Andric   Value *NewOp = aspaceWrapValue(Cache, I->getFunction(), OldOp);
471*0fca6ea1SDimitry Andric   I->setOperand(OpNum, NewOp);
472*0fca6ea1SDimitry Andric   // Check if there are any remaining users of old GEP,
473*0fca6ea1SDimitry Andric   // delete those w/o users
474*0fca6ea1SDimitry Andric   for (;;) {
475*0fca6ea1SDimitry Andric     auto *OldGEP = dyn_cast<GetElementPtrInst>(OldOp);
476*0fca6ea1SDimitry Andric     if (!OldGEP)
477*0fca6ea1SDimitry Andric       break;
478*0fca6ea1SDimitry Andric     if (!OldGEP->use_empty())
479*0fca6ea1SDimitry Andric       break;
480*0fca6ea1SDimitry Andric     OldOp = OldGEP->getPointerOperand();
481*0fca6ea1SDimitry Andric     OldGEP->eraseFromParent();
482*0fca6ea1SDimitry Andric   }
483*0fca6ea1SDimitry Andric }
484*0fca6ea1SDimitry Andric 
485*0fca6ea1SDimitry Andric // Support for BPF address spaces:
486*0fca6ea1SDimitry Andric // - for each function in the module M, update pointer operand of
487*0fca6ea1SDimitry Andric //   each memory access instruction (load/store/cmpxchg/atomicrmw)
488*0fca6ea1SDimitry Andric //   by casting it from non-zero address space to zero address space, e.g:
489*0fca6ea1SDimitry Andric //
490*0fca6ea1SDimitry Andric //   (load (ptr addrspace (N) %p) ...)
491*0fca6ea1SDimitry Andric //     -> (load (addrspacecast ptr addrspace (N) %p to ptr))
492*0fca6ea1SDimitry Andric //
493*0fca6ea1SDimitry Andric // - assign section with name .addr_space.N for globals defined in
494*0fca6ea1SDimitry Andric //   non-zero address space N
495*0fca6ea1SDimitry Andric bool BPFCheckAndAdjustIR::insertASpaceCasts(Module &M) {
496*0fca6ea1SDimitry Andric   bool Changed = false;
497*0fca6ea1SDimitry Andric   for (Function &F : M) {
498*0fca6ea1SDimitry Andric     DenseMap<Value *, Value *> CastsCache;
499*0fca6ea1SDimitry Andric     for (BasicBlock &BB : F) {
500*0fca6ea1SDimitry Andric       for (Instruction &I : BB) {
501*0fca6ea1SDimitry Andric         unsigned PtrOpNum;
502*0fca6ea1SDimitry Andric 
503*0fca6ea1SDimitry Andric         if (auto *LD = dyn_cast<LoadInst>(&I))
504*0fca6ea1SDimitry Andric           PtrOpNum = LD->getPointerOperandIndex();
505*0fca6ea1SDimitry Andric         else if (auto *ST = dyn_cast<StoreInst>(&I))
506*0fca6ea1SDimitry Andric           PtrOpNum = ST->getPointerOperandIndex();
507*0fca6ea1SDimitry Andric         else if (auto *CmpXchg = dyn_cast<AtomicCmpXchgInst>(&I))
508*0fca6ea1SDimitry Andric           PtrOpNum = CmpXchg->getPointerOperandIndex();
509*0fca6ea1SDimitry Andric         else if (auto *RMW = dyn_cast<AtomicRMWInst>(&I))
510*0fca6ea1SDimitry Andric           PtrOpNum = RMW->getPointerOperandIndex();
511*0fca6ea1SDimitry Andric         else
512*0fca6ea1SDimitry Andric           continue;
513*0fca6ea1SDimitry Andric 
514*0fca6ea1SDimitry Andric         aspaceWrapOperand(CastsCache, &I, PtrOpNum);
515*0fca6ea1SDimitry Andric       }
516*0fca6ea1SDimitry Andric     }
517*0fca6ea1SDimitry Andric     Changed |= !CastsCache.empty();
518*0fca6ea1SDimitry Andric   }
519*0fca6ea1SDimitry Andric   // Merge all globals within same address space into single
520*0fca6ea1SDimitry Andric   // .addr_space.<addr space no> section
521*0fca6ea1SDimitry Andric   for (GlobalVariable &G : M.globals()) {
522*0fca6ea1SDimitry Andric     if (G.getAddressSpace() == 0 || G.hasSection())
523*0fca6ea1SDimitry Andric       continue;
524*0fca6ea1SDimitry Andric     SmallString<16> SecName;
525*0fca6ea1SDimitry Andric     raw_svector_ostream OS(SecName);
526*0fca6ea1SDimitry Andric     OS << ".addr_space." << G.getAddressSpace();
527*0fca6ea1SDimitry Andric     G.setSection(SecName);
528*0fca6ea1SDimitry Andric     // Prevent having separate section for constants
529*0fca6ea1SDimitry Andric     G.setConstant(false);
530*0fca6ea1SDimitry Andric   }
531*0fca6ea1SDimitry Andric   return Changed;
532*0fca6ea1SDimitry Andric }
533*0fca6ea1SDimitry Andric 
534e8d8bef9SDimitry Andric bool BPFCheckAndAdjustIR::adjustIR(Module &M) {
535349cc55cSDimitry Andric   bool Changed = removePassThroughBuiltin(M);
536349cc55cSDimitry Andric   Changed = removeCompareBuiltin(M) || Changed;
53706c3fb27SDimitry Andric   Changed = sinkMinMax(M) || Changed;
5385f757f3fSDimitry Andric   Changed = removeGEPBuiltins(M) || Changed;
539*0fca6ea1SDimitry Andric   Changed = insertASpaceCasts(M) || Changed;
540349cc55cSDimitry Andric   return Changed;
541e8d8bef9SDimitry Andric }
542e8d8bef9SDimitry Andric 
543e8d8bef9SDimitry Andric bool BPFCheckAndAdjustIR::runOnModule(Module &M) {
544e8d8bef9SDimitry Andric   checkIR(M);
545e8d8bef9SDimitry Andric   return adjustIR(M);
546e8d8bef9SDimitry Andric }
547