xref: /freebsd-src/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/PoisonChecking.cpp (revision 19261079b74319502c6ffa1249920079f0f69a72)
1 //===- PoisonChecking.cpp - -----------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Implements a transform pass which instruments IR such that poison semantics
10 // are made explicit.  That is, it provides a (possibly partial) executable
11 // semantics for every instruction w.r.t. poison as specified in the LLVM
12 // LangRef.  There are obvious parallels to the sanitizer tools, but this pass
13 // is focused purely on the semantics of LLVM IR, not any particular source
14 // language.   If you're looking for something to see if your C/C++ contains
15 // UB, this is not it.
16 //
17 // The rewritten semantics of each instruction will include the following
18 // components:
19 //
20 // 1) The original instruction, unmodified.
21 // 2) A propagation rule which translates dynamic information about the poison
22 //    state of each input to whether the dynamic output of the instruction
23 //    produces poison.
24 // 3) A creation rule which validates any poison producing flags on the
25 //    instruction itself (e.g. checks for overflow on nsw).
26 // 4) A check rule which traps (to a handler function) if this instruction must
27 //    execute undefined behavior given the poison state of it's inputs.
28 //
29 // This is a must analysis based transform; that is, the resulting code may
30 // produce a false negative result (not report UB when actually exists
31 // according to the LangRef spec), but should never produce a false positive
32 // (report UB where it doesn't exist).
33 //
34 // Use cases for this pass include:
35 // - Understanding (and testing!) the implications of the definition of poison
36 //   from the LangRef.
37 // - Validating the output of a IR fuzzer to ensure that all programs produced
38 //   are well defined on the specific input used.
39 // - Finding/confirming poison specific miscompiles by checking the poison
40 //   status of an input/IR pair is the same before and after an optimization
41 //   transform.
42 // - Checking that a bugpoint reduction does not introduce UB which didn't
43 //   exist in the original program being reduced.
44 //
45 // The major sources of inaccuracy are currently:
46 // - Most validation rules not yet implemented for instructions with poison
47 //   relavant flags.  At the moment, only nsw/nuw on add/sub are supported.
48 // - UB which is control dependent on a branch on poison is not yet
49 //   reported. Currently, only data flow dependence is modeled.
50 // - Poison which is propagated through memory is not modeled.  As such,
51 //   storing poison to memory and then reloading it will cause a false negative
52 //   as we consider the reloaded value to not be poisoned.
53 // - Poison propagation across function boundaries is not modeled.  At the
54 //   moment, all arguments and return values are assumed not to be poison.
55 // - Undef is not modeled.  In particular, the optimizer's freedom to pick
56 //   concrete values for undef bits so as to maximize potential for producing
57 //   poison is not modeled.
58 //
59 //===----------------------------------------------------------------------===//
60 
61 #include "llvm/Transforms/Instrumentation/PoisonChecking.h"
62 #include "llvm/ADT/DenseMap.h"
63 #include "llvm/ADT/Statistic.h"
64 #include "llvm/Analysis/MemoryBuiltins.h"
65 #include "llvm/Analysis/ValueTracking.h"
66 #include "llvm/IR/IRBuilder.h"
67 #include "llvm/IR/InstVisitor.h"
68 #include "llvm/IR/IntrinsicInst.h"
69 #include "llvm/IR/PatternMatch.h"
70 #include "llvm/Support/CommandLine.h"
71 #include "llvm/Support/Debug.h"
72 
73 using namespace llvm;
74 
75 #define DEBUG_TYPE "poison-checking"
76 
77 static cl::opt<bool>
78 LocalCheck("poison-checking-function-local",
79            cl::init(false),
80            cl::desc("Check that returns are non-poison (for testing)"));
81 
82 
83 static bool isConstantFalse(Value* V) {
84   assert(V->getType()->isIntegerTy(1));
85   if (auto *CI = dyn_cast<ConstantInt>(V))
86     return CI->isZero();
87   return false;
88 }
89 
90 static Value *buildOrChain(IRBuilder<> &B, ArrayRef<Value*> Ops) {
91   if (Ops.size() == 0)
92     return B.getFalse();
93   unsigned i = 0;
94   for (; i < Ops.size() && isConstantFalse(Ops[i]); i++) {}
95   if (i == Ops.size())
96     return B.getFalse();
97   Value *Accum = Ops[i++];
98   for (; i < Ops.size(); i++)
99     if (!isConstantFalse(Ops[i]))
100       Accum = B.CreateOr(Accum, Ops[i]);
101   return Accum;
102 }
103 
104 static void generateCreationChecksForBinOp(Instruction &I,
105                                            SmallVectorImpl<Value*> &Checks) {
106   assert(isa<BinaryOperator>(I));
107 
108   IRBuilder<> B(&I);
109   Value *LHS = I.getOperand(0);
110   Value *RHS = I.getOperand(1);
111   switch (I.getOpcode()) {
112   default:
113     return;
114   case Instruction::Add: {
115     if (I.hasNoSignedWrap()) {
116       auto *OverflowOp =
117         B.CreateBinaryIntrinsic(Intrinsic::sadd_with_overflow, LHS, RHS);
118       Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
119     }
120     if (I.hasNoUnsignedWrap()) {
121       auto *OverflowOp =
122         B.CreateBinaryIntrinsic(Intrinsic::uadd_with_overflow, LHS, RHS);
123       Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
124     }
125     break;
126   }
127   case Instruction::Sub: {
128     if (I.hasNoSignedWrap()) {
129       auto *OverflowOp =
130         B.CreateBinaryIntrinsic(Intrinsic::ssub_with_overflow, LHS, RHS);
131       Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
132     }
133     if (I.hasNoUnsignedWrap()) {
134       auto *OverflowOp =
135         B.CreateBinaryIntrinsic(Intrinsic::usub_with_overflow, LHS, RHS);
136       Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
137     }
138     break;
139   }
140   case Instruction::Mul: {
141     if (I.hasNoSignedWrap()) {
142       auto *OverflowOp =
143         B.CreateBinaryIntrinsic(Intrinsic::smul_with_overflow, LHS, RHS);
144       Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
145     }
146     if (I.hasNoUnsignedWrap()) {
147       auto *OverflowOp =
148         B.CreateBinaryIntrinsic(Intrinsic::umul_with_overflow, LHS, RHS);
149       Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
150     }
151     break;
152   }
153   case Instruction::UDiv: {
154     if (I.isExact()) {
155       auto *Check =
156         B.CreateICmp(ICmpInst::ICMP_NE, B.CreateURem(LHS, RHS),
157                      ConstantInt::get(LHS->getType(), 0));
158       Checks.push_back(Check);
159     }
160     break;
161   }
162   case Instruction::SDiv: {
163     if (I.isExact()) {
164       auto *Check =
165         B.CreateICmp(ICmpInst::ICMP_NE, B.CreateSRem(LHS, RHS),
166                      ConstantInt::get(LHS->getType(), 0));
167       Checks.push_back(Check);
168     }
169     break;
170   }
171   case Instruction::AShr:
172   case Instruction::LShr:
173   case Instruction::Shl: {
174     Value *ShiftCheck =
175       B.CreateICmp(ICmpInst::ICMP_UGE, RHS,
176                    ConstantInt::get(RHS->getType(),
177                                     LHS->getType()->getScalarSizeInBits()));
178     Checks.push_back(ShiftCheck);
179     break;
180   }
181   };
182 }
183 
184 /// Given an instruction which can produce poison on non-poison inputs
185 /// (i.e. canCreatePoison returns true), generate runtime checks to produce
186 /// boolean indicators of when poison would result.
187 static void generateCreationChecks(Instruction &I,
188                                    SmallVectorImpl<Value*> &Checks) {
189   IRBuilder<> B(&I);
190   if (isa<BinaryOperator>(I) && !I.getType()->isVectorTy())
191     generateCreationChecksForBinOp(I, Checks);
192 
193   // Handle non-binops separately
194   switch (I.getOpcode()) {
195   default:
196     // Note there are a couple of missing cases here, once implemented, this
197     // should become an llvm_unreachable.
198     break;
199   case Instruction::ExtractElement: {
200     Value *Vec = I.getOperand(0);
201     auto *VecVTy = dyn_cast<FixedVectorType>(Vec->getType());
202     if (!VecVTy)
203       break;
204     Value *Idx = I.getOperand(1);
205     unsigned NumElts = VecVTy->getNumElements();
206     Value *Check =
207       B.CreateICmp(ICmpInst::ICMP_UGE, Idx,
208                    ConstantInt::get(Idx->getType(), NumElts));
209     Checks.push_back(Check);
210     break;
211   }
212   case Instruction::InsertElement: {
213     Value *Vec = I.getOperand(0);
214     auto *VecVTy = dyn_cast<FixedVectorType>(Vec->getType());
215     if (!VecVTy)
216       break;
217     Value *Idx = I.getOperand(2);
218     unsigned NumElts = VecVTy->getNumElements();
219     Value *Check =
220       B.CreateICmp(ICmpInst::ICMP_UGE, Idx,
221                    ConstantInt::get(Idx->getType(), NumElts));
222     Checks.push_back(Check);
223     break;
224   }
225   };
226 }
227 
228 static Value *getPoisonFor(DenseMap<Value *, Value *> &ValToPoison, Value *V) {
229   auto Itr = ValToPoison.find(V);
230   if (Itr != ValToPoison.end())
231     return Itr->second;
232   if (isa<Constant>(V)) {
233     return ConstantInt::getFalse(V->getContext());
234   }
235   // Return false for unknwon values - this implements a non-strict mode where
236   // unhandled IR constructs are simply considered to never produce poison.  At
237   // some point in the future, we probably want a "strict mode" for testing if
238   // nothing else.
239   return ConstantInt::getFalse(V->getContext());
240 }
241 
242 static void CreateAssert(IRBuilder<> &B, Value *Cond) {
243   assert(Cond->getType()->isIntegerTy(1));
244   if (auto *CI = dyn_cast<ConstantInt>(Cond))
245     if (CI->isAllOnesValue())
246       return;
247 
248   Module *M = B.GetInsertBlock()->getModule();
249   M->getOrInsertFunction("__poison_checker_assert",
250                          Type::getVoidTy(M->getContext()),
251                          Type::getInt1Ty(M->getContext()));
252   Function *TrapFunc = M->getFunction("__poison_checker_assert");
253   B.CreateCall(TrapFunc, Cond);
254 }
255 
256 static void CreateAssertNot(IRBuilder<> &B, Value *Cond) {
257   assert(Cond->getType()->isIntegerTy(1));
258   CreateAssert(B, B.CreateNot(Cond));
259 }
260 
261 static bool rewrite(Function &F) {
262   auto * const Int1Ty = Type::getInt1Ty(F.getContext());
263 
264   DenseMap<Value *, Value *> ValToPoison;
265 
266   for (BasicBlock &BB : F)
267     for (auto I = BB.begin(); isa<PHINode>(&*I); I++) {
268       auto *OldPHI = cast<PHINode>(&*I);
269       auto *NewPHI = PHINode::Create(Int1Ty, OldPHI->getNumIncomingValues());
270       for (unsigned i = 0; i < OldPHI->getNumIncomingValues(); i++)
271         NewPHI->addIncoming(UndefValue::get(Int1Ty),
272                             OldPHI->getIncomingBlock(i));
273       NewPHI->insertBefore(OldPHI);
274       ValToPoison[OldPHI] = NewPHI;
275     }
276 
277   for (BasicBlock &BB : F)
278     for (Instruction &I : BB) {
279       if (isa<PHINode>(I)) continue;
280 
281       IRBuilder<> B(cast<Instruction>(&I));
282 
283       // Note: There are many more sources of documented UB, but this pass only
284       // attempts to find UB triggered by propagation of poison.
285       SmallPtrSet<const Value *, 4> NonPoisonOps;
286       getGuaranteedNonPoisonOps(&I, NonPoisonOps);
287       for (const Value *Op : NonPoisonOps)
288         CreateAssertNot(B, getPoisonFor(ValToPoison, const_cast<Value *>(Op)));
289 
290       if (LocalCheck)
291         if (auto *RI = dyn_cast<ReturnInst>(&I))
292           if (RI->getNumOperands() != 0) {
293             Value *Op = RI->getOperand(0);
294             CreateAssertNot(B, getPoisonFor(ValToPoison, Op));
295           }
296 
297       SmallVector<Value*, 4> Checks;
298       if (propagatesPoison(cast<Operator>(&I)))
299         for (Value *V : I.operands())
300           Checks.push_back(getPoisonFor(ValToPoison, V));
301 
302       if (canCreatePoison(cast<Operator>(&I)))
303         generateCreationChecks(I, Checks);
304       ValToPoison[&I] = buildOrChain(B, Checks);
305     }
306 
307   for (BasicBlock &BB : F)
308     for (auto I = BB.begin(); isa<PHINode>(&*I); I++) {
309       auto *OldPHI = cast<PHINode>(&*I);
310       if (!ValToPoison.count(OldPHI))
311         continue; // skip the newly inserted phis
312       auto *NewPHI = cast<PHINode>(ValToPoison[OldPHI]);
313       for (unsigned i = 0; i < OldPHI->getNumIncomingValues(); i++) {
314         auto *OldVal = OldPHI->getIncomingValue(i);
315         NewPHI->setIncomingValue(i, getPoisonFor(ValToPoison, OldVal));
316       }
317     }
318   return true;
319 }
320 
321 
322 PreservedAnalyses PoisonCheckingPass::run(Module &M,
323                                           ModuleAnalysisManager &AM) {
324   bool Changed = false;
325   for (auto &F : M)
326     Changed |= rewrite(F);
327 
328   return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
329 }
330 
331 PreservedAnalyses PoisonCheckingPass::run(Function &F,
332                                           FunctionAnalysisManager &AM) {
333   return rewrite(F) ? PreservedAnalyses::none() : PreservedAnalyses::all();
334 }
335 
336 /* Major TODO Items:
337    - Control dependent poison UB
338    - Strict mode - (i.e. must analyze every operand)
339      - Poison through memory
340      - Function ABIs
341      - Full coverage of intrinsics, etc.. (ouch)
342 
343    Instructions w/Unclear Semantics:
344    - shufflevector - It would seem reasonable for an out of bounds mask element
345      to produce poison, but the LangRef does not state.
346    - all binary ops w/vector operands - The likely interpretation would be that
347      any element overflowing should produce poison for the entire result, but
348      the LangRef does not state.
349    - Floating point binary ops w/fmf flags other than (nnan, noinfs).  It seems
350      strange that only certian flags should be documented as producing poison.
351 
352    Cases of clear poison semantics not yet implemented:
353    - Exact flags on ashr/lshr produce poison
354    - NSW/NUW flags on shl produce poison
355    - Inbounds flag on getelementptr produce poison
356    - fptosi/fptoui (out of bounds input) produce poison
357    - Scalable vector types for insertelement/extractelement
358    - Floating point binary ops w/fmf nnan/noinfs flags produce poison
359  */
360