1 //===- DataFlowSanitizer.cpp - dynamic data flow analysis -----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This file is a part of DataFlowSanitizer, a generalised dynamic data flow
11 /// analysis.
12 ///
13 /// Unlike other Sanitizer tools, this tool is not designed to detect a specific
14 /// class of bugs on its own. Instead, it provides a generic dynamic data flow
15 /// analysis framework to be used by clients to help detect application-specific
16 /// issues within their own code.
17 ///
18 /// The analysis is based on automatic propagation of data flow labels (also
19 /// known as taint labels) through a program as it performs computation.
20 ///
21 /// There are two possible memory layouts. In the first one, each byte of
22 /// application memory is backed by a shadow memory byte. The shadow byte can
23 /// represent up to 8 labels. To enable this you must specify the
24 /// -dfsan-fast-8-labels flag. On Linux/x86_64, memory is then laid out as
25 /// follows:
26 ///
27 /// +--------------------+ 0x800000000000 (top of memory)
28 /// | application memory |
29 /// +--------------------+ 0x700000008000 (kAppAddr)
30 /// | |
31 /// | unused |
32 /// | |
33 /// +--------------------+ 0x300200000000 (kUnusedAddr)
34 /// | union table |
35 /// +--------------------+ 0x300000000000 (kUnionTableAddr)
36 /// | origin |
37 /// +--------------------+ 0x200000008000 (kOriginAddr)
38 /// | shadow memory |
39 /// +--------------------+ 0x100000008000 (kShadowAddr)
40 /// | unused |
41 /// +--------------------+ 0x000000010000
42 /// | reserved by kernel |
43 /// +--------------------+ 0x000000000000
44 ///
45 ///
46 /// In the second memory layout, each byte of application memory is backed by
47 /// two bytes of shadow memory which hold the label. That means we can represent
48 /// either 16 labels (with -dfsan-fast-16-labels flag) or 2^16 labels (on the
49 /// default legacy mode) per byte. On Linux/x86_64, memory is then laid out as
50 /// follows:
51 ///
52 /// +--------------------+ 0x800000000000 (top of memory)
53 /// | application memory |
54 /// +--------------------+ 0x700000008000 (kAppAddr)
55 /// | |
56 /// | unused |
57 /// | |
58 /// +--------------------+ 0x300200000000 (kUnusedAddr)
59 /// | union table |
60 /// +--------------------+ 0x300000000000 (kUnionTableAddr)
61 /// | origin |
62 /// +--------------------+ 0x200000008000 (kOriginAddr)
63 /// | shadow memory |
64 /// +--------------------+ 0x000000010000 (kShadowAddr)
65 /// | reserved by kernel |
66 /// +--------------------+ 0x000000000000
67 ///
68 ///
69 /// To derive a shadow memory address from an application memory address,
70 /// bits 44-46 are cleared to bring the address into the range
71 /// [0x000000008000,0x100000000000). Then the address is shifted left by 1 to
72 /// account for the double byte representation of shadow labels and move the
73 /// address into the shadow memory range. See the function
74 /// DataFlowSanitizer::getShadowAddress below.
75 ///
76 /// For more information, please refer to the design document:
77 /// http://clang.llvm.org/docs/DataFlowSanitizerDesign.html
78 //
79 //===----------------------------------------------------------------------===//
80
81 #include "llvm/Transforms/Instrumentation/DataFlowSanitizer.h"
82 #include "llvm/ADT/DenseMap.h"
83 #include "llvm/ADT/DenseSet.h"
84 #include "llvm/ADT/DepthFirstIterator.h"
85 #include "llvm/ADT/None.h"
86 #include "llvm/ADT/SmallPtrSet.h"
87 #include "llvm/ADT/SmallVector.h"
88 #include "llvm/ADT/StringExtras.h"
89 #include "llvm/ADT/StringRef.h"
90 #include "llvm/ADT/Triple.h"
91 #include "llvm/ADT/iterator.h"
92 #include "llvm/Analysis/ValueTracking.h"
93 #include "llvm/IR/Argument.h"
94 #include "llvm/IR/Attributes.h"
95 #include "llvm/IR/BasicBlock.h"
96 #include "llvm/IR/Constant.h"
97 #include "llvm/IR/Constants.h"
98 #include "llvm/IR/DataLayout.h"
99 #include "llvm/IR/DerivedTypes.h"
100 #include "llvm/IR/Dominators.h"
101 #include "llvm/IR/Function.h"
102 #include "llvm/IR/GlobalAlias.h"
103 #include "llvm/IR/GlobalValue.h"
104 #include "llvm/IR/GlobalVariable.h"
105 #include "llvm/IR/IRBuilder.h"
106 #include "llvm/IR/InlineAsm.h"
107 #include "llvm/IR/InstVisitor.h"
108 #include "llvm/IR/InstrTypes.h"
109 #include "llvm/IR/Instruction.h"
110 #include "llvm/IR/Instructions.h"
111 #include "llvm/IR/IntrinsicInst.h"
112 #include "llvm/IR/LLVMContext.h"
113 #include "llvm/IR/MDBuilder.h"
114 #include "llvm/IR/Module.h"
115 #include "llvm/IR/PassManager.h"
116 #include "llvm/IR/Type.h"
117 #include "llvm/IR/User.h"
118 #include "llvm/IR/Value.h"
119 #include "llvm/InitializePasses.h"
120 #include "llvm/Pass.h"
121 #include "llvm/Support/Alignment.h"
122 #include "llvm/Support/Casting.h"
123 #include "llvm/Support/CommandLine.h"
124 #include "llvm/Support/ErrorHandling.h"
125 #include "llvm/Support/SpecialCaseList.h"
126 #include "llvm/Support/VirtualFileSystem.h"
127 #include "llvm/Transforms/Instrumentation.h"
128 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
129 #include "llvm/Transforms/Utils/Local.h"
130 #include <algorithm>
131 #include <cassert>
132 #include <cstddef>
133 #include <cstdint>
134 #include <iterator>
135 #include <memory>
136 #include <set>
137 #include <string>
138 #include <utility>
139 #include <vector>
140
141 using namespace llvm;
142
143 // This must be consistent with ShadowWidthBits.
144 static const Align ShadowTLSAlignment = Align(2);
145
146 static const Align MinOriginAlignment = Align(4);
147
148 // The size of TLS variables. These constants must be kept in sync with the ones
149 // in dfsan.cpp.
150 static const unsigned ArgTLSSize = 800;
151 static const unsigned RetvalTLSSize = 800;
152
153 // External symbol to be used when generating the shadow address for
154 // architectures with multiple VMAs. Instead of using a constant integer
155 // the runtime will set the external mask based on the VMA range.
156 const char DFSanExternShadowPtrMask[] = "__dfsan_shadow_ptr_mask";
157
158 // The -dfsan-preserve-alignment flag controls whether this pass assumes that
159 // alignment requirements provided by the input IR are correct. For example,
160 // if the input IR contains a load with alignment 8, this flag will cause
161 // the shadow load to have alignment 16. This flag is disabled by default as
162 // we have unfortunately encountered too much code (including Clang itself;
163 // see PR14291) which performs misaligned access.
164 static cl::opt<bool> ClPreserveAlignment(
165 "dfsan-preserve-alignment",
166 cl::desc("respect alignment requirements provided by input IR"), cl::Hidden,
167 cl::init(false));
168
169 // The ABI list files control how shadow parameters are passed. The pass treats
170 // every function labelled "uninstrumented" in the ABI list file as conforming
171 // to the "native" (i.e. unsanitized) ABI. Unless the ABI list contains
172 // additional annotations for those functions, a call to one of those functions
173 // will produce a warning message, as the labelling behaviour of the function is
174 // unknown. The other supported annotations are "functional" and "discard",
175 // which are described below under DataFlowSanitizer::WrapperKind.
176 static cl::list<std::string> ClABIListFiles(
177 "dfsan-abilist",
178 cl::desc("File listing native ABI functions and how the pass treats them"),
179 cl::Hidden);
180
181 // Controls whether the pass uses IA_Args or IA_TLS as the ABI for instrumented
182 // functions (see DataFlowSanitizer::InstrumentedABI below).
183 static cl::opt<bool>
184 ClArgsABI("dfsan-args-abi",
185 cl::desc("Use the argument ABI rather than the TLS ABI"),
186 cl::Hidden);
187
188 // Controls whether the pass includes or ignores the labels of pointers in load
189 // instructions.
190 static cl::opt<bool> ClCombinePointerLabelsOnLoad(
191 "dfsan-combine-pointer-labels-on-load",
192 cl::desc("Combine the label of the pointer with the label of the data when "
193 "loading from memory."),
194 cl::Hidden, cl::init(true));
195
196 // Controls whether the pass includes or ignores the labels of pointers in
197 // stores instructions.
198 static cl::opt<bool> ClCombinePointerLabelsOnStore(
199 "dfsan-combine-pointer-labels-on-store",
200 cl::desc("Combine the label of the pointer with the label of the data when "
201 "storing in memory."),
202 cl::Hidden, cl::init(false));
203
204 static cl::opt<bool> ClDebugNonzeroLabels(
205 "dfsan-debug-nonzero-labels",
206 cl::desc("Insert calls to __dfsan_nonzero_label on observing a parameter, "
207 "load or return with a nonzero label"),
208 cl::Hidden);
209
210 // Experimental feature that inserts callbacks for certain data events.
211 // Currently callbacks are only inserted for loads, stores, memory transfers
212 // (i.e. memcpy and memmove), and comparisons.
213 //
214 // If this flag is set to true, the user must provide definitions for the
215 // following callback functions:
216 // void __dfsan_load_callback(dfsan_label Label, void* addr);
217 // void __dfsan_store_callback(dfsan_label Label, void* addr);
218 // void __dfsan_mem_transfer_callback(dfsan_label *Start, size_t Len);
219 // void __dfsan_cmp_callback(dfsan_label CombinedLabel);
220 static cl::opt<bool> ClEventCallbacks(
221 "dfsan-event-callbacks",
222 cl::desc("Insert calls to __dfsan_*_callback functions on data events."),
223 cl::Hidden, cl::init(false));
224
225 // Use a distinct bit for each base label, enabling faster unions with less
226 // instrumentation. Limits the max number of base labels to 16.
227 static cl::opt<bool> ClFast16Labels(
228 "dfsan-fast-16-labels",
229 cl::desc("Use more efficient instrumentation, limiting the number of "
230 "labels to 16."),
231 cl::Hidden, cl::init(false));
232
233 // Use a distinct bit for each base label, enabling faster unions with less
234 // instrumentation. Limits the max number of base labels to 8.
235 static cl::opt<bool> ClFast8Labels(
236 "dfsan-fast-8-labels",
237 cl::desc("Use more efficient instrumentation, limiting the number of "
238 "labels to 8."),
239 cl::Hidden, cl::init(false));
240
241 // Controls whether the pass tracks the control flow of select instructions.
242 static cl::opt<bool> ClTrackSelectControlFlow(
243 "dfsan-track-select-control-flow",
244 cl::desc("Propagate labels from condition values of select instructions "
245 "to results."),
246 cl::Hidden, cl::init(true));
247
248 // TODO: This default value follows MSan. DFSan may use a different value.
249 static cl::opt<int> ClInstrumentWithCallThreshold(
250 "dfsan-instrument-with-call-threshold",
251 cl::desc("If the function being instrumented requires more than "
252 "this number of origin stores, use callbacks instead of "
253 "inline checks (-1 means never use callbacks)."),
254 cl::Hidden, cl::init(3500));
255
256 // Controls how to track origins.
257 // * 0: do not track origins.
258 // * 1: track origins at memory store operations.
259 // * 2: track origins at memory load and store operations.
260 // TODO: track callsites.
261 static cl::opt<int> ClTrackOrigins("dfsan-track-origins",
262 cl::desc("Track origins of labels"),
263 cl::Hidden, cl::init(0));
264
getGlobalTypeString(const GlobalValue & G)265 static StringRef getGlobalTypeString(const GlobalValue &G) {
266 // Types of GlobalVariables are always pointer types.
267 Type *GType = G.getValueType();
268 // For now we support excluding struct types only.
269 if (StructType *SGType = dyn_cast<StructType>(GType)) {
270 if (!SGType->isLiteral())
271 return SGType->getName();
272 }
273 return "<unknown type>";
274 }
275
276 namespace {
277
278 class DFSanABIList {
279 std::unique_ptr<SpecialCaseList> SCL;
280
281 public:
282 DFSanABIList() = default;
283
set(std::unique_ptr<SpecialCaseList> List)284 void set(std::unique_ptr<SpecialCaseList> List) { SCL = std::move(List); }
285
286 /// Returns whether either this function or its source file are listed in the
287 /// given category.
isIn(const Function & F,StringRef Category) const288 bool isIn(const Function &F, StringRef Category) const {
289 return isIn(*F.getParent(), Category) ||
290 SCL->inSection("dataflow", "fun", F.getName(), Category);
291 }
292
293 /// Returns whether this global alias is listed in the given category.
294 ///
295 /// If GA aliases a function, the alias's name is matched as a function name
296 /// would be. Similarly, aliases of globals are matched like globals.
isIn(const GlobalAlias & GA,StringRef Category) const297 bool isIn(const GlobalAlias &GA, StringRef Category) const {
298 if (isIn(*GA.getParent(), Category))
299 return true;
300
301 if (isa<FunctionType>(GA.getValueType()))
302 return SCL->inSection("dataflow", "fun", GA.getName(), Category);
303
304 return SCL->inSection("dataflow", "global", GA.getName(), Category) ||
305 SCL->inSection("dataflow", "type", getGlobalTypeString(GA),
306 Category);
307 }
308
309 /// Returns whether this module is listed in the given category.
isIn(const Module & M,StringRef Category) const310 bool isIn(const Module &M, StringRef Category) const {
311 return SCL->inSection("dataflow", "src", M.getModuleIdentifier(), Category);
312 }
313 };
314
315 /// TransformedFunction is used to express the result of transforming one
316 /// function type into another. This struct is immutable. It holds metadata
317 /// useful for updating calls of the old function to the new type.
318 struct TransformedFunction {
TransformedFunction__anond47758270111::TransformedFunction319 TransformedFunction(FunctionType *OriginalType, FunctionType *TransformedType,
320 std::vector<unsigned> ArgumentIndexMapping)
321 : OriginalType(OriginalType), TransformedType(TransformedType),
322 ArgumentIndexMapping(ArgumentIndexMapping) {}
323
324 // Disallow copies.
325 TransformedFunction(const TransformedFunction &) = delete;
326 TransformedFunction &operator=(const TransformedFunction &) = delete;
327
328 // Allow moves.
329 TransformedFunction(TransformedFunction &&) = default;
330 TransformedFunction &operator=(TransformedFunction &&) = default;
331
332 /// Type of the function before the transformation.
333 FunctionType *OriginalType;
334
335 /// Type of the function after the transformation.
336 FunctionType *TransformedType;
337
338 /// Transforming a function may change the position of arguments. This
339 /// member records the mapping from each argument's old position to its new
340 /// position. Argument positions are zero-indexed. If the transformation
341 /// from F to F' made the first argument of F into the third argument of F',
342 /// then ArgumentIndexMapping[0] will equal 2.
343 std::vector<unsigned> ArgumentIndexMapping;
344 };
345
346 /// Given function attributes from a call site for the original function,
347 /// return function attributes appropriate for a call to the transformed
348 /// function.
349 AttributeList
transformFunctionAttributes(const TransformedFunction & TransformedFunction,LLVMContext & Ctx,AttributeList CallSiteAttrs)350 transformFunctionAttributes(const TransformedFunction &TransformedFunction,
351 LLVMContext &Ctx, AttributeList CallSiteAttrs) {
352
353 // Construct a vector of AttributeSet for each function argument.
354 std::vector<llvm::AttributeSet> ArgumentAttributes(
355 TransformedFunction.TransformedType->getNumParams());
356
357 // Copy attributes from the parameter of the original function to the
358 // transformed version. 'ArgumentIndexMapping' holds the mapping from
359 // old argument position to new.
360 for (unsigned I = 0, IE = TransformedFunction.ArgumentIndexMapping.size();
361 I < IE; ++I) {
362 unsigned TransformedIndex = TransformedFunction.ArgumentIndexMapping[I];
363 ArgumentAttributes[TransformedIndex] = CallSiteAttrs.getParamAttributes(I);
364 }
365
366 // Copy annotations on varargs arguments.
367 for (unsigned I = TransformedFunction.OriginalType->getNumParams(),
368 IE = CallSiteAttrs.getNumAttrSets();
369 I < IE; ++I) {
370 ArgumentAttributes.push_back(CallSiteAttrs.getParamAttributes(I));
371 }
372
373 return AttributeList::get(Ctx, CallSiteAttrs.getFnAttributes(),
374 CallSiteAttrs.getRetAttributes(),
375 llvm::makeArrayRef(ArgumentAttributes));
376 }
377
378 class DataFlowSanitizer {
379 friend struct DFSanFunction;
380 friend class DFSanVisitor;
381
382 enum {
383 OriginWidthBits = 32,
384 OriginWidthBytes = OriginWidthBits / 8
385 };
386
387 /// Which ABI should be used for instrumented functions?
388 enum InstrumentedABI {
389 /// Argument and return value labels are passed through additional
390 /// arguments and by modifying the return type.
391 IA_Args,
392
393 /// Argument and return value labels are passed through TLS variables
394 /// __dfsan_arg_tls and __dfsan_retval_tls.
395 IA_TLS
396 };
397
398 /// How should calls to uninstrumented functions be handled?
399 enum WrapperKind {
400 /// This function is present in an uninstrumented form but we don't know
401 /// how it should be handled. Print a warning and call the function anyway.
402 /// Don't label the return value.
403 WK_Warning,
404
405 /// This function does not write to (user-accessible) memory, and its return
406 /// value is unlabelled.
407 WK_Discard,
408
409 /// This function does not write to (user-accessible) memory, and the label
410 /// of its return value is the union of the label of its arguments.
411 WK_Functional,
412
413 /// Instead of calling the function, a custom wrapper __dfsw_F is called,
414 /// where F is the name of the function. This function may wrap the
415 /// original function or provide its own implementation. This is similar to
416 /// the IA_Args ABI, except that IA_Args uses a struct return type to
417 /// pass the return value shadow in a register, while WK_Custom uses an
418 /// extra pointer argument to return the shadow. This allows the wrapped
419 /// form of the function type to be expressed in C.
420 WK_Custom
421 };
422
423 unsigned ShadowWidthBits;
424 unsigned ShadowWidthBytes;
425
426 Module *Mod;
427 LLVMContext *Ctx;
428 Type *Int8Ptr;
429 IntegerType *OriginTy;
430 PointerType *OriginPtrTy;
431 ConstantInt *OriginBase;
432 ConstantInt *ZeroOrigin;
433 /// The shadow type for all primitive types and vector types.
434 IntegerType *PrimitiveShadowTy;
435 PointerType *PrimitiveShadowPtrTy;
436 IntegerType *IntptrTy;
437 ConstantInt *ZeroPrimitiveShadow;
438 ConstantInt *ShadowPtrMask;
439 ConstantInt *ShadowPtrMul;
440 Constant *ArgTLS;
441 ArrayType *ArgOriginTLSTy;
442 Constant *ArgOriginTLS;
443 Constant *RetvalTLS;
444 Constant *RetvalOriginTLS;
445 Constant *ExternalShadowMask;
446 FunctionType *DFSanUnionFnTy;
447 FunctionType *DFSanUnionLoadFnTy;
448 FunctionType *DFSanLoadLabelAndOriginFnTy;
449 FunctionType *DFSanUnimplementedFnTy;
450 FunctionType *DFSanSetLabelFnTy;
451 FunctionType *DFSanNonzeroLabelFnTy;
452 FunctionType *DFSanVarargWrapperFnTy;
453 FunctionType *DFSanCmpCallbackFnTy;
454 FunctionType *DFSanLoadStoreCallbackFnTy;
455 FunctionType *DFSanMemTransferCallbackFnTy;
456 FunctionType *DFSanChainOriginFnTy;
457 FunctionType *DFSanChainOriginIfTaintedFnTy;
458 FunctionType *DFSanMemOriginTransferFnTy;
459 FunctionType *DFSanMaybeStoreOriginFnTy;
460 FunctionCallee DFSanUnionFn;
461 FunctionCallee DFSanCheckedUnionFn;
462 FunctionCallee DFSanUnionLoadFn;
463 FunctionCallee DFSanUnionLoadFastLabelsFn;
464 FunctionCallee DFSanLoadLabelAndOriginFn;
465 FunctionCallee DFSanUnimplementedFn;
466 FunctionCallee DFSanSetLabelFn;
467 FunctionCallee DFSanNonzeroLabelFn;
468 FunctionCallee DFSanVarargWrapperFn;
469 FunctionCallee DFSanLoadCallbackFn;
470 FunctionCallee DFSanStoreCallbackFn;
471 FunctionCallee DFSanMemTransferCallbackFn;
472 FunctionCallee DFSanCmpCallbackFn;
473 FunctionCallee DFSanChainOriginFn;
474 FunctionCallee DFSanChainOriginIfTaintedFn;
475 FunctionCallee DFSanMemOriginTransferFn;
476 FunctionCallee DFSanMaybeStoreOriginFn;
477 SmallPtrSet<Value *, 16> DFSanRuntimeFunctions;
478 MDNode *ColdCallWeights;
479 MDNode *OriginStoreWeights;
480 DFSanABIList ABIList;
481 DenseMap<Value *, Function *> UnwrappedFnMap;
482 AttrBuilder ReadOnlyNoneAttrs;
483 bool DFSanRuntimeShadowMask = false;
484
485 Value *getShadowOffset(Value *Addr, IRBuilder<> &IRB);
486 Value *getShadowAddress(Value *Addr, Instruction *Pos);
487 Value *getShadowAddress(Value *Addr, Instruction *Pos, Value *ShadowOffset);
488 std::pair<Value *, Value *>
489 getShadowOriginAddress(Value *Addr, Align InstAlignment, Instruction *Pos);
490 bool isInstrumented(const Function *F);
491 bool isInstrumented(const GlobalAlias *GA);
492 FunctionType *getArgsFunctionType(FunctionType *T);
493 FunctionType *getTrampolineFunctionType(FunctionType *T);
494 TransformedFunction getCustomFunctionType(FunctionType *T);
495 InstrumentedABI getInstrumentedABI();
496 WrapperKind getWrapperKind(Function *F);
497 void addGlobalNamePrefix(GlobalValue *GV);
498 Function *buildWrapperFunction(Function *F, StringRef NewFName,
499 GlobalValue::LinkageTypes NewFLink,
500 FunctionType *NewFT);
501 Constant *getOrBuildTrampolineFunction(FunctionType *FT, StringRef FName);
502 void initializeCallbackFunctions(Module &M);
503 void initializeRuntimeFunctions(Module &M);
504 void injectMetadataGlobals(Module &M);
505
506 bool init(Module &M);
507
508 /// Advances \p OriginAddr to point to the next 32-bit origin and then loads
509 /// from it. Returns the origin's loaded value.
510 Value *loadNextOrigin(Instruction *Pos, Align OriginAlign,
511 Value **OriginAddr);
512
513 /// Returns whether fast8 or fast16 mode has been specified.
514 bool hasFastLabelsEnabled();
515
516 /// Returns whether the given load byte size is amenable to inlined
517 /// optimization patterns.
518 bool hasLoadSizeForFastPath(uint64_t Size);
519
520 /// Returns whether the pass tracks origins. Support only fast16 mode in TLS
521 /// ABI mode.
522 bool shouldTrackOrigins();
523
524 /// Returns whether the pass tracks labels for struct fields and array
525 /// indices. Support only fast16 mode in TLS ABI mode.
526 bool shouldTrackFieldsAndIndices();
527
528 /// Returns a zero constant with the shadow type of OrigTy.
529 ///
530 /// getZeroShadow({T1,T2,...}) = {getZeroShadow(T1),getZeroShadow(T2,...}
531 /// getZeroShadow([n x T]) = [n x getZeroShadow(T)]
532 /// getZeroShadow(other type) = i16(0)
533 ///
534 /// Note that a zero shadow is always i16(0) when shouldTrackFieldsAndIndices
535 /// returns false.
536 Constant *getZeroShadow(Type *OrigTy);
537 /// Returns a zero constant with the shadow type of V's type.
538 Constant *getZeroShadow(Value *V);
539
540 /// Checks if V is a zero shadow.
541 bool isZeroShadow(Value *V);
542
543 /// Returns the shadow type of OrigTy.
544 ///
545 /// getShadowTy({T1,T2,...}) = {getShadowTy(T1),getShadowTy(T2),...}
546 /// getShadowTy([n x T]) = [n x getShadowTy(T)]
547 /// getShadowTy(other type) = i16
548 ///
549 /// Note that a shadow type is always i16 when shouldTrackFieldsAndIndices
550 /// returns false.
551 Type *getShadowTy(Type *OrigTy);
552 /// Returns the shadow type of of V's type.
553 Type *getShadowTy(Value *V);
554
555 const uint64_t NumOfElementsInArgOrgTLS = ArgTLSSize / OriginWidthBytes;
556
557 public:
558 DataFlowSanitizer(const std::vector<std::string> &ABIListFiles);
559
560 bool runImpl(Module &M);
561 };
562
563 struct DFSanFunction {
564 DataFlowSanitizer &DFS;
565 Function *F;
566 DominatorTree DT;
567 DataFlowSanitizer::InstrumentedABI IA;
568 bool IsNativeABI;
569 AllocaInst *LabelReturnAlloca = nullptr;
570 AllocaInst *OriginReturnAlloca = nullptr;
571 DenseMap<Value *, Value *> ValShadowMap;
572 DenseMap<Value *, Value *> ValOriginMap;
573 DenseMap<AllocaInst *, AllocaInst *> AllocaShadowMap;
574 DenseMap<AllocaInst *, AllocaInst *> AllocaOriginMap;
575
576 struct PHIFixupElement {
577 PHINode *Phi;
578 PHINode *ShadowPhi;
579 PHINode *OriginPhi;
580 };
581 std::vector<PHIFixupElement> PHIFixups;
582
583 DenseSet<Instruction *> SkipInsts;
584 std::vector<Value *> NonZeroChecks;
585 bool AvoidNewBlocks;
586
587 struct CachedShadow {
588 BasicBlock *Block; // The block where Shadow is defined.
589 Value *Shadow;
590 };
591 /// Maps a value to its latest shadow value in terms of domination tree.
592 DenseMap<std::pair<Value *, Value *>, CachedShadow> CachedShadows;
593 /// Maps a value to its latest collapsed shadow value it was converted to in
594 /// terms of domination tree. When ClDebugNonzeroLabels is on, this cache is
595 /// used at a post process where CFG blocks are split. So it does not cache
596 /// BasicBlock like CachedShadows, but uses domination between values.
597 DenseMap<Value *, Value *> CachedCollapsedShadows;
598 DenseMap<Value *, std::set<Value *>> ShadowElements;
599
DFSanFunction__anond47758270111::DFSanFunction600 DFSanFunction(DataFlowSanitizer &DFS, Function *F, bool IsNativeABI)
601 : DFS(DFS), F(F), IA(DFS.getInstrumentedABI()), IsNativeABI(IsNativeABI) {
602 DT.recalculate(*F);
603 // FIXME: Need to track down the register allocator issue which causes poor
604 // performance in pathological cases with large numbers of basic blocks.
605 AvoidNewBlocks = F->size() > 1000;
606 }
607
608 /// Computes the shadow address for a given function argument.
609 ///
610 /// Shadow = ArgTLS+ArgOffset.
611 Value *getArgTLS(Type *T, unsigned ArgOffset, IRBuilder<> &IRB);
612
613 /// Computes the shadow address for a return value.
614 Value *getRetvalTLS(Type *T, IRBuilder<> &IRB);
615
616 /// Computes the origin address for a given function argument.
617 ///
618 /// Origin = ArgOriginTLS[ArgNo].
619 Value *getArgOriginTLS(unsigned ArgNo, IRBuilder<> &IRB);
620
621 /// Computes the origin address for a return value.
622 Value *getRetvalOriginTLS();
623
624 Value *getOrigin(Value *V);
625 void setOrigin(Instruction *I, Value *Origin);
626 /// Generates IR to compute the origin of the last operand with a taint label.
627 Value *combineOperandOrigins(Instruction *Inst);
628 /// Before the instruction Pos, generates IR to compute the last origin with a
629 /// taint label. Labels and origins are from vectors Shadows and Origins
630 /// correspondingly. The generated IR is like
631 /// Sn-1 != Zero ? On-1: ... S2 != Zero ? O2: S1 != Zero ? O1: O0
632 /// When Zero is nullptr, it uses ZeroPrimitiveShadow. Otherwise it can be
633 /// zeros with other bitwidths.
634 Value *combineOrigins(const std::vector<Value *> &Shadows,
635 const std::vector<Value *> &Origins, Instruction *Pos,
636 ConstantInt *Zero = nullptr);
637
638 Value *getShadow(Value *V);
639 void setShadow(Instruction *I, Value *Shadow);
640 /// Generates IR to compute the union of the two given shadows, inserting it
641 /// before Pos. The combined value is with primitive type.
642 Value *combineShadows(Value *V1, Value *V2, Instruction *Pos);
643 /// Combines the shadow values of V1 and V2, then converts the combined value
644 /// with primitive type into a shadow value with the original type T.
645 Value *combineShadowsThenConvert(Type *T, Value *V1, Value *V2,
646 Instruction *Pos);
647 Value *combineOperandShadows(Instruction *Inst);
648
649 /// Generates IR to load shadow and origin corresponding to bytes [\p
650 /// Addr, \p Addr + \p Size), where addr has alignment \p
651 /// InstAlignment, and take the union of each of those shadows. The returned
652 /// shadow always has primitive type.
653 ///
654 /// When tracking loads is enabled, the returned origin is a chain at the
655 /// current stack if the returned shadow is tainted.
656 std::pair<Value *, Value *> loadShadowOrigin(Value *Addr, uint64_t Size,
657 Align InstAlignment,
658 Instruction *Pos);
659
660 void storePrimitiveShadowOrigin(Value *Addr, uint64_t Size,
661 Align InstAlignment, Value *PrimitiveShadow,
662 Value *Origin, Instruction *Pos);
663 /// Applies PrimitiveShadow to all primitive subtypes of T, returning
664 /// the expanded shadow value.
665 ///
666 /// EFP({T1,T2, ...}, PS) = {EFP(T1,PS),EFP(T2,PS),...}
667 /// EFP([n x T], PS) = [n x EFP(T,PS)]
668 /// EFP(other types, PS) = PS
669 Value *expandFromPrimitiveShadow(Type *T, Value *PrimitiveShadow,
670 Instruction *Pos);
671 /// Collapses Shadow into a single primitive shadow value, unioning all
672 /// primitive shadow values in the process. Returns the final primitive
673 /// shadow value.
674 ///
675 /// CTP({V1,V2, ...}) = UNION(CFP(V1,PS),CFP(V2,PS),...)
676 /// CTP([V1,V2,...]) = UNION(CFP(V1,PS),CFP(V2,PS),...)
677 /// CTP(other types, PS) = PS
678 Value *collapseToPrimitiveShadow(Value *Shadow, Instruction *Pos);
679
680 void storeZeroPrimitiveShadow(Value *Addr, uint64_t Size, Align ShadowAlign,
681 Instruction *Pos);
682
683 Align getShadowAlign(Align InstAlignment);
684
685 private:
686 /// Collapses the shadow with aggregate type into a single primitive shadow
687 /// value.
688 template <class AggregateType>
689 Value *collapseAggregateShadow(AggregateType *AT, Value *Shadow,
690 IRBuilder<> &IRB);
691
692 Value *collapseToPrimitiveShadow(Value *Shadow, IRBuilder<> &IRB);
693
694 /// Returns the shadow value of an argument A.
695 Value *getShadowForTLSArgument(Argument *A);
696
697 /// The fast path of loading shadow in legacy mode.
698 Value *loadLegacyShadowFast(Value *ShadowAddr, uint64_t Size,
699 Align ShadowAlign, Instruction *Pos);
700
701 /// The fast path of loading shadow in fast-16-label mode.
702 std::pair<Value *, Value *>
703 loadFast16ShadowFast(Value *ShadowAddr, Value *OriginAddr, uint64_t Size,
704 Align ShadowAlign, Align OriginAlign, Value *FirstOrigin,
705 Instruction *Pos);
706
707 Align getOriginAlign(Align InstAlignment);
708
709 /// Because 4 contiguous bytes share one 4-byte origin, the most accurate load
710 /// is __dfsan_load_label_and_origin. This function returns the union of all
711 /// labels and the origin of the first taint label. However this is an
712 /// additional call with many instructions. To ensure common cases are fast,
713 /// checks if it is possible to load labels and origins without using the
714 /// callback function.
715 ///
716 /// When enabling tracking load instructions, we always use
717 /// __dfsan_load_label_and_origin to reduce code size.
718 bool useCallbackLoadLabelAndOrigin(uint64_t Size, Align InstAlignment);
719
720 /// Returns a chain at the current stack with previous origin V.
721 Value *updateOrigin(Value *V, IRBuilder<> &IRB);
722
723 /// Returns a chain at the current stack with previous origin V if Shadow is
724 /// tainted.
725 Value *updateOriginIfTainted(Value *Shadow, Value *Origin, IRBuilder<> &IRB);
726
727 /// Creates an Intptr = Origin | Origin << 32 if Intptr's size is 64. Returns
728 /// Origin otherwise.
729 Value *originToIntptr(IRBuilder<> &IRB, Value *Origin);
730
731 /// Stores Origin into the address range [StoreOriginAddr, StoreOriginAddr +
732 /// Size).
733 void paintOrigin(IRBuilder<> &IRB, Value *Origin, Value *StoreOriginAddr,
734 uint64_t StoreOriginSize, Align Alignment);
735
736 /// Stores Origin in terms of its Shadow value.
737 /// * Do not write origins for zero shadows because we do not trace origins
738 /// for untainted sinks.
739 /// * Use __dfsan_maybe_store_origin if there are too many origin store
740 /// instrumentations.
741 void storeOrigin(Instruction *Pos, Value *Addr, uint64_t Size, Value *Shadow,
742 Value *Origin, Value *StoreOriginAddr, Align InstAlignment);
743
744 /// Convert a scalar value to an i1 by comparing with 0.
745 Value *convertToBool(Value *V, IRBuilder<> &IRB, const Twine &Name = "");
746
747 bool shouldInstrumentWithCall();
748
749 /// Generates IR to load shadow and origin corresponding to bytes [\p
750 /// Addr, \p Addr + \p Size), where addr has alignment \p
751 /// InstAlignment, and take the union of each of those shadows. The returned
752 /// shadow always has primitive type.
753 std::pair<Value *, Value *>
754 loadShadowOriginSansLoadTracking(Value *Addr, uint64_t Size,
755 Align InstAlignment, Instruction *Pos);
756 int NumOriginStores = 0;
757 };
758
759 class DFSanVisitor : public InstVisitor<DFSanVisitor> {
760 public:
761 DFSanFunction &DFSF;
762
DFSanVisitor(DFSanFunction & DFSF)763 DFSanVisitor(DFSanFunction &DFSF) : DFSF(DFSF) {}
764
getDataLayout() const765 const DataLayout &getDataLayout() const {
766 return DFSF.F->getParent()->getDataLayout();
767 }
768
769 // Combines shadow values and origins for all of I's operands.
770 void visitInstOperands(Instruction &I);
771
772 void visitUnaryOperator(UnaryOperator &UO);
773 void visitBinaryOperator(BinaryOperator &BO);
774 void visitCastInst(CastInst &CI);
775 void visitCmpInst(CmpInst &CI);
776 void visitGetElementPtrInst(GetElementPtrInst &GEPI);
777 void visitLoadInst(LoadInst &LI);
778 void visitStoreInst(StoreInst &SI);
779 void visitAtomicRMWInst(AtomicRMWInst &I);
780 void visitAtomicCmpXchgInst(AtomicCmpXchgInst &I);
781 void visitReturnInst(ReturnInst &RI);
782 void visitCallBase(CallBase &CB);
783 void visitPHINode(PHINode &PN);
784 void visitExtractElementInst(ExtractElementInst &I);
785 void visitInsertElementInst(InsertElementInst &I);
786 void visitShuffleVectorInst(ShuffleVectorInst &I);
787 void visitExtractValueInst(ExtractValueInst &I);
788 void visitInsertValueInst(InsertValueInst &I);
789 void visitAllocaInst(AllocaInst &I);
790 void visitSelectInst(SelectInst &I);
791 void visitMemSetInst(MemSetInst &I);
792 void visitMemTransferInst(MemTransferInst &I);
793
794 private:
795 void visitCASOrRMW(Align InstAlignment, Instruction &I);
796
797 // Returns false when this is an invoke of a custom function.
798 bool visitWrappedCallBase(Function &F, CallBase &CB);
799
800 // Combines origins for all of I's operands.
801 void visitInstOperandOrigins(Instruction &I);
802
803 void addShadowArguments(Function &F, CallBase &CB, std::vector<Value *> &Args,
804 IRBuilder<> &IRB);
805
806 void addOriginArguments(Function &F, CallBase &CB, std::vector<Value *> &Args,
807 IRBuilder<> &IRB);
808 };
809
810 } // end anonymous namespace
811
DataFlowSanitizer(const std::vector<std::string> & ABIListFiles)812 DataFlowSanitizer::DataFlowSanitizer(
813 const std::vector<std::string> &ABIListFiles) {
814 if (ClFast8Labels && ClFast16Labels) {
815 report_fatal_error(
816 "cannot set both -dfsan-fast-8-labels and -dfsan-fast-16-labels");
817 }
818
819 ShadowWidthBits = ClFast8Labels ? 8 : 16;
820 ShadowWidthBytes = ShadowWidthBits / 8;
821
822 std::vector<std::string> AllABIListFiles(std::move(ABIListFiles));
823 llvm::append_range(AllABIListFiles, ClABIListFiles);
824 // FIXME: should we propagate vfs::FileSystem to this constructor?
825 ABIList.set(
826 SpecialCaseList::createOrDie(AllABIListFiles, *vfs::getRealFileSystem()));
827 }
828
getArgsFunctionType(FunctionType * T)829 FunctionType *DataFlowSanitizer::getArgsFunctionType(FunctionType *T) {
830 SmallVector<Type *, 4> ArgTypes(T->param_begin(), T->param_end());
831 ArgTypes.append(T->getNumParams(), PrimitiveShadowTy);
832 if (T->isVarArg())
833 ArgTypes.push_back(PrimitiveShadowPtrTy);
834 Type *RetType = T->getReturnType();
835 if (!RetType->isVoidTy())
836 RetType = StructType::get(RetType, PrimitiveShadowTy);
837 return FunctionType::get(RetType, ArgTypes, T->isVarArg());
838 }
839
getTrampolineFunctionType(FunctionType * T)840 FunctionType *DataFlowSanitizer::getTrampolineFunctionType(FunctionType *T) {
841 assert(!T->isVarArg());
842 SmallVector<Type *, 4> ArgTypes;
843 ArgTypes.push_back(T->getPointerTo());
844 ArgTypes.append(T->param_begin(), T->param_end());
845 ArgTypes.append(T->getNumParams(), PrimitiveShadowTy);
846 Type *RetType = T->getReturnType();
847 if (!RetType->isVoidTy())
848 ArgTypes.push_back(PrimitiveShadowPtrTy);
849
850 if (shouldTrackOrigins()) {
851 ArgTypes.append(T->getNumParams(), OriginTy);
852 if (!RetType->isVoidTy())
853 ArgTypes.push_back(OriginPtrTy);
854 }
855
856 return FunctionType::get(T->getReturnType(), ArgTypes, false);
857 }
858
getCustomFunctionType(FunctionType * T)859 TransformedFunction DataFlowSanitizer::getCustomFunctionType(FunctionType *T) {
860 SmallVector<Type *, 4> ArgTypes;
861
862 // Some parameters of the custom function being constructed are
863 // parameters of T. Record the mapping from parameters of T to
864 // parameters of the custom function, so that parameter attributes
865 // at call sites can be updated.
866 std::vector<unsigned> ArgumentIndexMapping;
867 for (unsigned I = 0, E = T->getNumParams(); I != E; ++I) {
868 Type *ParamType = T->getParamType(I);
869 FunctionType *FT;
870 if (isa<PointerType>(ParamType) &&
871 (FT = dyn_cast<FunctionType>(ParamType->getPointerElementType()))) {
872 ArgumentIndexMapping.push_back(ArgTypes.size());
873 ArgTypes.push_back(getTrampolineFunctionType(FT)->getPointerTo());
874 ArgTypes.push_back(Type::getInt8PtrTy(*Ctx));
875 } else {
876 ArgumentIndexMapping.push_back(ArgTypes.size());
877 ArgTypes.push_back(ParamType);
878 }
879 }
880 for (unsigned I = 0, E = T->getNumParams(); I != E; ++I)
881 ArgTypes.push_back(PrimitiveShadowTy);
882 if (T->isVarArg())
883 ArgTypes.push_back(PrimitiveShadowPtrTy);
884 Type *RetType = T->getReturnType();
885 if (!RetType->isVoidTy())
886 ArgTypes.push_back(PrimitiveShadowPtrTy);
887
888 if (shouldTrackOrigins()) {
889 for (unsigned I = 0, E = T->getNumParams(); I != E; ++I)
890 ArgTypes.push_back(OriginTy);
891 if (T->isVarArg())
892 ArgTypes.push_back(OriginPtrTy);
893 if (!RetType->isVoidTy())
894 ArgTypes.push_back(OriginPtrTy);
895 }
896
897 return TransformedFunction(
898 T, FunctionType::get(T->getReturnType(), ArgTypes, T->isVarArg()),
899 ArgumentIndexMapping);
900 }
901
isZeroShadow(Value * V)902 bool DataFlowSanitizer::isZeroShadow(Value *V) {
903 if (!shouldTrackFieldsAndIndices())
904 return ZeroPrimitiveShadow == V;
905
906 Type *T = V->getType();
907 if (!isa<ArrayType>(T) && !isa<StructType>(T)) {
908 if (const ConstantInt *CI = dyn_cast<ConstantInt>(V))
909 return CI->isZero();
910 return false;
911 }
912
913 return isa<ConstantAggregateZero>(V);
914 }
915
hasFastLabelsEnabled()916 bool DataFlowSanitizer::hasFastLabelsEnabled() {
917 static const bool HasFastLabelsEnabled = ClFast8Labels || ClFast16Labels;
918 return HasFastLabelsEnabled;
919 }
920
hasLoadSizeForFastPath(uint64_t Size)921 bool DataFlowSanitizer::hasLoadSizeForFastPath(uint64_t Size) {
922 uint64_t ShadowSize = Size * ShadowWidthBytes;
923 return ShadowSize % 8 == 0 || ShadowSize == 4;
924 }
925
shouldTrackOrigins()926 bool DataFlowSanitizer::shouldTrackOrigins() {
927 static const bool ShouldTrackOrigins =
928 ClTrackOrigins && getInstrumentedABI() == DataFlowSanitizer::IA_TLS &&
929 hasFastLabelsEnabled();
930 return ShouldTrackOrigins;
931 }
932
shouldTrackFieldsAndIndices()933 bool DataFlowSanitizer::shouldTrackFieldsAndIndices() {
934 return getInstrumentedABI() == DataFlowSanitizer::IA_TLS &&
935 hasFastLabelsEnabled();
936 }
937
getZeroShadow(Type * OrigTy)938 Constant *DataFlowSanitizer::getZeroShadow(Type *OrigTy) {
939 if (!shouldTrackFieldsAndIndices())
940 return ZeroPrimitiveShadow;
941
942 if (!isa<ArrayType>(OrigTy) && !isa<StructType>(OrigTy))
943 return ZeroPrimitiveShadow;
944 Type *ShadowTy = getShadowTy(OrigTy);
945 return ConstantAggregateZero::get(ShadowTy);
946 }
947
getZeroShadow(Value * V)948 Constant *DataFlowSanitizer::getZeroShadow(Value *V) {
949 return getZeroShadow(V->getType());
950 }
951
expandFromPrimitiveShadowRecursive(Value * Shadow,SmallVector<unsigned,4> & Indices,Type * SubShadowTy,Value * PrimitiveShadow,IRBuilder<> & IRB)952 static Value *expandFromPrimitiveShadowRecursive(
953 Value *Shadow, SmallVector<unsigned, 4> &Indices, Type *SubShadowTy,
954 Value *PrimitiveShadow, IRBuilder<> &IRB) {
955 if (!isa<ArrayType>(SubShadowTy) && !isa<StructType>(SubShadowTy))
956 return IRB.CreateInsertValue(Shadow, PrimitiveShadow, Indices);
957
958 if (ArrayType *AT = dyn_cast<ArrayType>(SubShadowTy)) {
959 for (unsigned Idx = 0; Idx < AT->getNumElements(); Idx++) {
960 Indices.push_back(Idx);
961 Shadow = expandFromPrimitiveShadowRecursive(
962 Shadow, Indices, AT->getElementType(), PrimitiveShadow, IRB);
963 Indices.pop_back();
964 }
965 return Shadow;
966 }
967
968 if (StructType *ST = dyn_cast<StructType>(SubShadowTy)) {
969 for (unsigned Idx = 0; Idx < ST->getNumElements(); Idx++) {
970 Indices.push_back(Idx);
971 Shadow = expandFromPrimitiveShadowRecursive(
972 Shadow, Indices, ST->getElementType(Idx), PrimitiveShadow, IRB);
973 Indices.pop_back();
974 }
975 return Shadow;
976 }
977 llvm_unreachable("Unexpected shadow type");
978 }
979
shouldInstrumentWithCall()980 bool DFSanFunction::shouldInstrumentWithCall() {
981 return ClInstrumentWithCallThreshold >= 0 &&
982 NumOriginStores >= ClInstrumentWithCallThreshold;
983 }
984
expandFromPrimitiveShadow(Type * T,Value * PrimitiveShadow,Instruction * Pos)985 Value *DFSanFunction::expandFromPrimitiveShadow(Type *T, Value *PrimitiveShadow,
986 Instruction *Pos) {
987 Type *ShadowTy = DFS.getShadowTy(T);
988
989 if (!isa<ArrayType>(ShadowTy) && !isa<StructType>(ShadowTy))
990 return PrimitiveShadow;
991
992 if (DFS.isZeroShadow(PrimitiveShadow))
993 return DFS.getZeroShadow(ShadowTy);
994
995 IRBuilder<> IRB(Pos);
996 SmallVector<unsigned, 4> Indices;
997 Value *Shadow = UndefValue::get(ShadowTy);
998 Shadow = expandFromPrimitiveShadowRecursive(Shadow, Indices, ShadowTy,
999 PrimitiveShadow, IRB);
1000
1001 // Caches the primitive shadow value that built the shadow value.
1002 CachedCollapsedShadows[Shadow] = PrimitiveShadow;
1003 return Shadow;
1004 }
1005
1006 template <class AggregateType>
collapseAggregateShadow(AggregateType * AT,Value * Shadow,IRBuilder<> & IRB)1007 Value *DFSanFunction::collapseAggregateShadow(AggregateType *AT, Value *Shadow,
1008 IRBuilder<> &IRB) {
1009 if (!AT->getNumElements())
1010 return DFS.ZeroPrimitiveShadow;
1011
1012 Value *FirstItem = IRB.CreateExtractValue(Shadow, 0);
1013 Value *Aggregator = collapseToPrimitiveShadow(FirstItem, IRB);
1014
1015 for (unsigned Idx = 1; Idx < AT->getNumElements(); Idx++) {
1016 Value *ShadowItem = IRB.CreateExtractValue(Shadow, Idx);
1017 Value *ShadowInner = collapseToPrimitiveShadow(ShadowItem, IRB);
1018 Aggregator = IRB.CreateOr(Aggregator, ShadowInner);
1019 }
1020 return Aggregator;
1021 }
1022
collapseToPrimitiveShadow(Value * Shadow,IRBuilder<> & IRB)1023 Value *DFSanFunction::collapseToPrimitiveShadow(Value *Shadow,
1024 IRBuilder<> &IRB) {
1025 Type *ShadowTy = Shadow->getType();
1026 if (!isa<ArrayType>(ShadowTy) && !isa<StructType>(ShadowTy))
1027 return Shadow;
1028 if (ArrayType *AT = dyn_cast<ArrayType>(ShadowTy))
1029 return collapseAggregateShadow<>(AT, Shadow, IRB);
1030 if (StructType *ST = dyn_cast<StructType>(ShadowTy))
1031 return collapseAggregateShadow<>(ST, Shadow, IRB);
1032 llvm_unreachable("Unexpected shadow type");
1033 }
1034
collapseToPrimitiveShadow(Value * Shadow,Instruction * Pos)1035 Value *DFSanFunction::collapseToPrimitiveShadow(Value *Shadow,
1036 Instruction *Pos) {
1037 Type *ShadowTy = Shadow->getType();
1038 if (!isa<ArrayType>(ShadowTy) && !isa<StructType>(ShadowTy))
1039 return Shadow;
1040
1041 assert(DFS.shouldTrackFieldsAndIndices());
1042
1043 // Checks if the cached collapsed shadow value dominates Pos.
1044 Value *&CS = CachedCollapsedShadows[Shadow];
1045 if (CS && DT.dominates(CS, Pos))
1046 return CS;
1047
1048 IRBuilder<> IRB(Pos);
1049 Value *PrimitiveShadow = collapseToPrimitiveShadow(Shadow, IRB);
1050 // Caches the converted primitive shadow value.
1051 CS = PrimitiveShadow;
1052 return PrimitiveShadow;
1053 }
1054
getShadowTy(Type * OrigTy)1055 Type *DataFlowSanitizer::getShadowTy(Type *OrigTy) {
1056 if (!shouldTrackFieldsAndIndices())
1057 return PrimitiveShadowTy;
1058
1059 if (!OrigTy->isSized())
1060 return PrimitiveShadowTy;
1061 if (isa<IntegerType>(OrigTy))
1062 return PrimitiveShadowTy;
1063 if (isa<VectorType>(OrigTy))
1064 return PrimitiveShadowTy;
1065 if (ArrayType *AT = dyn_cast<ArrayType>(OrigTy))
1066 return ArrayType::get(getShadowTy(AT->getElementType()),
1067 AT->getNumElements());
1068 if (StructType *ST = dyn_cast<StructType>(OrigTy)) {
1069 SmallVector<Type *, 4> Elements;
1070 for (unsigned I = 0, N = ST->getNumElements(); I < N; ++I)
1071 Elements.push_back(getShadowTy(ST->getElementType(I)));
1072 return StructType::get(*Ctx, Elements);
1073 }
1074 return PrimitiveShadowTy;
1075 }
1076
getShadowTy(Value * V)1077 Type *DataFlowSanitizer::getShadowTy(Value *V) {
1078 return getShadowTy(V->getType());
1079 }
1080
init(Module & M)1081 bool DataFlowSanitizer::init(Module &M) {
1082 Triple TargetTriple(M.getTargetTriple());
1083 const DataLayout &DL = M.getDataLayout();
1084
1085 Mod = &M;
1086 Ctx = &M.getContext();
1087 Int8Ptr = Type::getInt8PtrTy(*Ctx);
1088 OriginTy = IntegerType::get(*Ctx, OriginWidthBits);
1089 OriginPtrTy = PointerType::getUnqual(OriginTy);
1090 PrimitiveShadowTy = IntegerType::get(*Ctx, ShadowWidthBits);
1091 PrimitiveShadowPtrTy = PointerType::getUnqual(PrimitiveShadowTy);
1092 IntptrTy = DL.getIntPtrType(*Ctx);
1093 ZeroPrimitiveShadow = ConstantInt::getSigned(PrimitiveShadowTy, 0);
1094 ShadowPtrMul = ConstantInt::getSigned(IntptrTy, ShadowWidthBytes);
1095 OriginBase = ConstantInt::get(IntptrTy, 0x200000000000LL);
1096 ZeroOrigin = ConstantInt::getSigned(OriginTy, 0);
1097
1098 switch (TargetTriple.getArch()) {
1099 case Triple::x86_64:
1100 ShadowPtrMask = ClFast8Labels
1101 ? ConstantInt::getSigned(IntptrTy, ~0x600000000000LL)
1102 : ConstantInt::getSigned(IntptrTy, ~0x700000000000LL);
1103 break;
1104 case Triple::mips64:
1105 case Triple::mips64el:
1106 ShadowPtrMask = ClFast8Labels
1107 ? ConstantInt::getSigned(IntptrTy, ~0xE000000000LL)
1108 : ConstantInt::getSigned(IntptrTy, ~0xF000000000LL);
1109 break;
1110 case Triple::aarch64:
1111 case Triple::aarch64_be:
1112 // AArch64 supports multiple VMAs and the shadow mask is set at runtime.
1113 DFSanRuntimeShadowMask = true;
1114 break;
1115 default:
1116 report_fatal_error("unsupported triple");
1117 }
1118
1119 Type *DFSanUnionArgs[2] = {PrimitiveShadowTy, PrimitiveShadowTy};
1120 DFSanUnionFnTy =
1121 FunctionType::get(PrimitiveShadowTy, DFSanUnionArgs, /*isVarArg=*/false);
1122 Type *DFSanUnionLoadArgs[2] = {PrimitiveShadowPtrTy, IntptrTy};
1123 DFSanUnionLoadFnTy = FunctionType::get(PrimitiveShadowTy, DFSanUnionLoadArgs,
1124 /*isVarArg=*/false);
1125 Type *DFSanLoadLabelAndOriginArgs[2] = {Int8Ptr, IntptrTy};
1126 DFSanLoadLabelAndOriginFnTy =
1127 FunctionType::get(IntegerType::get(*Ctx, 64), DFSanLoadLabelAndOriginArgs,
1128 /*isVarArg=*/false);
1129 DFSanUnimplementedFnTy = FunctionType::get(
1130 Type::getVoidTy(*Ctx), Type::getInt8PtrTy(*Ctx), /*isVarArg=*/false);
1131 Type *DFSanSetLabelArgs[4] = {PrimitiveShadowTy, OriginTy,
1132 Type::getInt8PtrTy(*Ctx), IntptrTy};
1133 DFSanSetLabelFnTy = FunctionType::get(Type::getVoidTy(*Ctx),
1134 DFSanSetLabelArgs, /*isVarArg=*/false);
1135 DFSanNonzeroLabelFnTy =
1136 FunctionType::get(Type::getVoidTy(*Ctx), None, /*isVarArg=*/false);
1137 DFSanVarargWrapperFnTy = FunctionType::get(
1138 Type::getVoidTy(*Ctx), Type::getInt8PtrTy(*Ctx), /*isVarArg=*/false);
1139 DFSanCmpCallbackFnTy =
1140 FunctionType::get(Type::getVoidTy(*Ctx), PrimitiveShadowTy,
1141 /*isVarArg=*/false);
1142 DFSanChainOriginFnTy =
1143 FunctionType::get(OriginTy, OriginTy, /*isVarArg=*/false);
1144 Type *DFSanChainOriginIfTaintedArgs[2] = {PrimitiveShadowTy, OriginTy};
1145 DFSanChainOriginIfTaintedFnTy = FunctionType::get(
1146 OriginTy, DFSanChainOriginIfTaintedArgs, /*isVarArg=*/false);
1147 Type *DFSanMaybeStoreOriginArgs[4] = {IntegerType::get(*Ctx, ShadowWidthBits),
1148 Int8Ptr, IntptrTy, OriginTy};
1149 DFSanMaybeStoreOriginFnTy = FunctionType::get(
1150 Type::getVoidTy(*Ctx), DFSanMaybeStoreOriginArgs, /*isVarArg=*/false);
1151 Type *DFSanMemOriginTransferArgs[3] = {Int8Ptr, Int8Ptr, IntptrTy};
1152 DFSanMemOriginTransferFnTy = FunctionType::get(
1153 Type::getVoidTy(*Ctx), DFSanMemOriginTransferArgs, /*isVarArg=*/false);
1154 Type *DFSanLoadStoreCallbackArgs[2] = {PrimitiveShadowTy, Int8Ptr};
1155 DFSanLoadStoreCallbackFnTy =
1156 FunctionType::get(Type::getVoidTy(*Ctx), DFSanLoadStoreCallbackArgs,
1157 /*isVarArg=*/false);
1158 Type *DFSanMemTransferCallbackArgs[2] = {PrimitiveShadowPtrTy, IntptrTy};
1159 DFSanMemTransferCallbackFnTy =
1160 FunctionType::get(Type::getVoidTy(*Ctx), DFSanMemTransferCallbackArgs,
1161 /*isVarArg=*/false);
1162
1163 ColdCallWeights = MDBuilder(*Ctx).createBranchWeights(1, 1000);
1164 OriginStoreWeights = MDBuilder(*Ctx).createBranchWeights(1, 1000);
1165 return true;
1166 }
1167
isInstrumented(const Function * F)1168 bool DataFlowSanitizer::isInstrumented(const Function *F) {
1169 return !ABIList.isIn(*F, "uninstrumented");
1170 }
1171
isInstrumented(const GlobalAlias * GA)1172 bool DataFlowSanitizer::isInstrumented(const GlobalAlias *GA) {
1173 return !ABIList.isIn(*GA, "uninstrumented");
1174 }
1175
getInstrumentedABI()1176 DataFlowSanitizer::InstrumentedABI DataFlowSanitizer::getInstrumentedABI() {
1177 return ClArgsABI ? IA_Args : IA_TLS;
1178 }
1179
getWrapperKind(Function * F)1180 DataFlowSanitizer::WrapperKind DataFlowSanitizer::getWrapperKind(Function *F) {
1181 if (ABIList.isIn(*F, "functional"))
1182 return WK_Functional;
1183 if (ABIList.isIn(*F, "discard"))
1184 return WK_Discard;
1185 if (ABIList.isIn(*F, "custom"))
1186 return WK_Custom;
1187
1188 return WK_Warning;
1189 }
1190
addGlobalNamePrefix(GlobalValue * GV)1191 void DataFlowSanitizer::addGlobalNamePrefix(GlobalValue *GV) {
1192 std::string GVName = std::string(GV->getName()), Prefix = "dfs$";
1193 GV->setName(Prefix + GVName);
1194
1195 // Try to change the name of the function in module inline asm. We only do
1196 // this for specific asm directives, currently only ".symver", to try to avoid
1197 // corrupting asm which happens to contain the symbol name as a substring.
1198 // Note that the substitution for .symver assumes that the versioned symbol
1199 // also has an instrumented name.
1200 std::string Asm = GV->getParent()->getModuleInlineAsm();
1201 std::string SearchStr = ".symver " + GVName + ",";
1202 size_t Pos = Asm.find(SearchStr);
1203 if (Pos != std::string::npos) {
1204 Asm.replace(Pos, SearchStr.size(),
1205 ".symver " + Prefix + GVName + "," + Prefix);
1206 GV->getParent()->setModuleInlineAsm(Asm);
1207 }
1208 }
1209
1210 Function *
buildWrapperFunction(Function * F,StringRef NewFName,GlobalValue::LinkageTypes NewFLink,FunctionType * NewFT)1211 DataFlowSanitizer::buildWrapperFunction(Function *F, StringRef NewFName,
1212 GlobalValue::LinkageTypes NewFLink,
1213 FunctionType *NewFT) {
1214 FunctionType *FT = F->getFunctionType();
1215 Function *NewF = Function::Create(NewFT, NewFLink, F->getAddressSpace(),
1216 NewFName, F->getParent());
1217 NewF->copyAttributesFrom(F);
1218 NewF->removeAttributes(
1219 AttributeList::ReturnIndex,
1220 AttributeFuncs::typeIncompatible(NewFT->getReturnType()));
1221
1222 BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", NewF);
1223 if (F->isVarArg()) {
1224 NewF->removeAttributes(AttributeList::FunctionIndex,
1225 AttrBuilder().addAttribute("split-stack"));
1226 CallInst::Create(DFSanVarargWrapperFn,
1227 IRBuilder<>(BB).CreateGlobalStringPtr(F->getName()), "",
1228 BB);
1229 new UnreachableInst(*Ctx, BB);
1230 } else {
1231 auto ArgIt = pointer_iterator<Argument *>(NewF->arg_begin());
1232 std::vector<Value *> Args(ArgIt, ArgIt + FT->getNumParams());
1233
1234 CallInst *CI = CallInst::Create(F, Args, "", BB);
1235 if (FT->getReturnType()->isVoidTy())
1236 ReturnInst::Create(*Ctx, BB);
1237 else
1238 ReturnInst::Create(*Ctx, CI, BB);
1239 }
1240
1241 return NewF;
1242 }
1243
getOrBuildTrampolineFunction(FunctionType * FT,StringRef FName)1244 Constant *DataFlowSanitizer::getOrBuildTrampolineFunction(FunctionType *FT,
1245 StringRef FName) {
1246 FunctionType *FTT = getTrampolineFunctionType(FT);
1247 FunctionCallee C = Mod->getOrInsertFunction(FName, FTT);
1248 Function *F = dyn_cast<Function>(C.getCallee());
1249 if (F && F->isDeclaration()) {
1250 F->setLinkage(GlobalValue::LinkOnceODRLinkage);
1251 BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", F);
1252 std::vector<Value *> Args;
1253 Function::arg_iterator AI = F->arg_begin() + 1;
1254 for (unsigned N = FT->getNumParams(); N != 0; ++AI, --N)
1255 Args.push_back(&*AI);
1256 CallInst *CI = CallInst::Create(FT, &*F->arg_begin(), Args, "", BB);
1257 Type *RetType = FT->getReturnType();
1258 ReturnInst *RI = RetType->isVoidTy() ? ReturnInst::Create(*Ctx, BB)
1259 : ReturnInst::Create(*Ctx, CI, BB);
1260
1261 // F is called by a wrapped custom function with primitive shadows. So
1262 // its arguments and return value need conversion.
1263 DFSanFunction DFSF(*this, F, /*IsNativeABI=*/true);
1264 Function::arg_iterator ValAI = F->arg_begin(), ShadowAI = AI;
1265 ++ValAI;
1266 for (unsigned N = FT->getNumParams(); N != 0; ++ValAI, ++ShadowAI, --N) {
1267 Value *Shadow =
1268 DFSF.expandFromPrimitiveShadow(ValAI->getType(), &*ShadowAI, CI);
1269 DFSF.ValShadowMap[&*ValAI] = Shadow;
1270 }
1271 Function::arg_iterator RetShadowAI = ShadowAI;
1272 const bool ShouldTrackOrigins = shouldTrackOrigins();
1273 if (ShouldTrackOrigins) {
1274 ValAI = F->arg_begin();
1275 ++ValAI;
1276 Function::arg_iterator OriginAI = ShadowAI;
1277 if (!RetType->isVoidTy())
1278 ++OriginAI;
1279 for (unsigned N = FT->getNumParams(); N != 0; ++ValAI, ++OriginAI, --N) {
1280 DFSF.ValOriginMap[&*ValAI] = &*OriginAI;
1281 }
1282 }
1283 DFSanVisitor(DFSF).visitCallInst(*CI);
1284 if (!RetType->isVoidTy()) {
1285 Value *PrimitiveShadow = DFSF.collapseToPrimitiveShadow(
1286 DFSF.getShadow(RI->getReturnValue()), RI);
1287 new StoreInst(PrimitiveShadow, &*RetShadowAI, RI);
1288 if (ShouldTrackOrigins) {
1289 Value *Origin = DFSF.getOrigin(RI->getReturnValue());
1290 new StoreInst(Origin, &*std::prev(F->arg_end()), RI);
1291 }
1292 }
1293 }
1294
1295 return cast<Constant>(C.getCallee());
1296 }
1297
1298 // Initialize DataFlowSanitizer runtime functions and declare them in the module
initializeRuntimeFunctions(Module & M)1299 void DataFlowSanitizer::initializeRuntimeFunctions(Module &M) {
1300 {
1301 AttributeList AL;
1302 AL = AL.addAttribute(M.getContext(), AttributeList::FunctionIndex,
1303 Attribute::NoUnwind);
1304 AL = AL.addAttribute(M.getContext(), AttributeList::FunctionIndex,
1305 Attribute::ReadNone);
1306 AL = AL.addAttribute(M.getContext(), AttributeList::ReturnIndex,
1307 Attribute::ZExt);
1308 AL = AL.addParamAttribute(M.getContext(), 0, Attribute::ZExt);
1309 AL = AL.addParamAttribute(M.getContext(), 1, Attribute::ZExt);
1310 DFSanUnionFn =
1311 Mod->getOrInsertFunction("__dfsan_union", DFSanUnionFnTy, AL);
1312 }
1313 {
1314 AttributeList AL;
1315 AL = AL.addAttribute(M.getContext(), AttributeList::FunctionIndex,
1316 Attribute::NoUnwind);
1317 AL = AL.addAttribute(M.getContext(), AttributeList::FunctionIndex,
1318 Attribute::ReadNone);
1319 AL = AL.addAttribute(M.getContext(), AttributeList::ReturnIndex,
1320 Attribute::ZExt);
1321 AL = AL.addParamAttribute(M.getContext(), 0, Attribute::ZExt);
1322 AL = AL.addParamAttribute(M.getContext(), 1, Attribute::ZExt);
1323 DFSanCheckedUnionFn =
1324 Mod->getOrInsertFunction("dfsan_union", DFSanUnionFnTy, AL);
1325 }
1326 {
1327 AttributeList AL;
1328 AL = AL.addAttribute(M.getContext(), AttributeList::FunctionIndex,
1329 Attribute::NoUnwind);
1330 AL = AL.addAttribute(M.getContext(), AttributeList::FunctionIndex,
1331 Attribute::ReadOnly);
1332 AL = AL.addAttribute(M.getContext(), AttributeList::ReturnIndex,
1333 Attribute::ZExt);
1334 DFSanUnionLoadFn =
1335 Mod->getOrInsertFunction("__dfsan_union_load", DFSanUnionLoadFnTy, AL);
1336 }
1337 {
1338 AttributeList AL;
1339 AL = AL.addAttribute(M.getContext(), AttributeList::FunctionIndex,
1340 Attribute::NoUnwind);
1341 AL = AL.addAttribute(M.getContext(), AttributeList::FunctionIndex,
1342 Attribute::ReadOnly);
1343 AL = AL.addAttribute(M.getContext(), AttributeList::ReturnIndex,
1344 Attribute::ZExt);
1345 DFSanUnionLoadFastLabelsFn = Mod->getOrInsertFunction(
1346 "__dfsan_union_load_fast16labels", DFSanUnionLoadFnTy, AL);
1347 }
1348 {
1349 AttributeList AL;
1350 AL = AL.addAttribute(M.getContext(), AttributeList::FunctionIndex,
1351 Attribute::NoUnwind);
1352 AL = AL.addAttribute(M.getContext(), AttributeList::FunctionIndex,
1353 Attribute::ReadOnly);
1354 AL = AL.addAttribute(M.getContext(), AttributeList::ReturnIndex,
1355 Attribute::ZExt);
1356 DFSanLoadLabelAndOriginFn = Mod->getOrInsertFunction(
1357 "__dfsan_load_label_and_origin", DFSanLoadLabelAndOriginFnTy, AL);
1358 }
1359 DFSanUnimplementedFn =
1360 Mod->getOrInsertFunction("__dfsan_unimplemented", DFSanUnimplementedFnTy);
1361 {
1362 AttributeList AL;
1363 AL = AL.addParamAttribute(M.getContext(), 0, Attribute::ZExt);
1364 AL = AL.addParamAttribute(M.getContext(), 1, Attribute::ZExt);
1365 DFSanSetLabelFn =
1366 Mod->getOrInsertFunction("__dfsan_set_label", DFSanSetLabelFnTy, AL);
1367 }
1368 DFSanNonzeroLabelFn =
1369 Mod->getOrInsertFunction("__dfsan_nonzero_label", DFSanNonzeroLabelFnTy);
1370 DFSanVarargWrapperFn = Mod->getOrInsertFunction("__dfsan_vararg_wrapper",
1371 DFSanVarargWrapperFnTy);
1372 {
1373 AttributeList AL;
1374 AL = AL.addParamAttribute(M.getContext(), 0, Attribute::ZExt);
1375 AL = AL.addAttribute(M.getContext(), AttributeList::ReturnIndex,
1376 Attribute::ZExt);
1377 DFSanChainOriginFn = Mod->getOrInsertFunction("__dfsan_chain_origin",
1378 DFSanChainOriginFnTy, AL);
1379 }
1380 {
1381 AttributeList AL;
1382 AL = AL.addParamAttribute(M.getContext(), 0, Attribute::ZExt);
1383 AL = AL.addParamAttribute(M.getContext(), 1, Attribute::ZExt);
1384 AL = AL.addAttribute(M.getContext(), AttributeList::ReturnIndex,
1385 Attribute::ZExt);
1386 DFSanChainOriginIfTaintedFn = Mod->getOrInsertFunction(
1387 "__dfsan_chain_origin_if_tainted", DFSanChainOriginIfTaintedFnTy, AL);
1388 }
1389 DFSanMemOriginTransferFn = Mod->getOrInsertFunction(
1390 "__dfsan_mem_origin_transfer", DFSanMemOriginTransferFnTy);
1391
1392 {
1393 AttributeList AL;
1394 AL = AL.addParamAttribute(M.getContext(), 0, Attribute::ZExt);
1395 AL = AL.addParamAttribute(M.getContext(), 3, Attribute::ZExt);
1396 DFSanMaybeStoreOriginFn = Mod->getOrInsertFunction(
1397 "__dfsan_maybe_store_origin", DFSanMaybeStoreOriginFnTy, AL);
1398 }
1399
1400 DFSanRuntimeFunctions.insert(DFSanUnionFn.getCallee()->stripPointerCasts());
1401 DFSanRuntimeFunctions.insert(
1402 DFSanCheckedUnionFn.getCallee()->stripPointerCasts());
1403 DFSanRuntimeFunctions.insert(
1404 DFSanUnionLoadFn.getCallee()->stripPointerCasts());
1405 DFSanRuntimeFunctions.insert(
1406 DFSanUnionLoadFastLabelsFn.getCallee()->stripPointerCasts());
1407 DFSanRuntimeFunctions.insert(
1408 DFSanLoadLabelAndOriginFn.getCallee()->stripPointerCasts());
1409 DFSanRuntimeFunctions.insert(
1410 DFSanUnimplementedFn.getCallee()->stripPointerCasts());
1411 DFSanRuntimeFunctions.insert(
1412 DFSanSetLabelFn.getCallee()->stripPointerCasts());
1413 DFSanRuntimeFunctions.insert(
1414 DFSanNonzeroLabelFn.getCallee()->stripPointerCasts());
1415 DFSanRuntimeFunctions.insert(
1416 DFSanVarargWrapperFn.getCallee()->stripPointerCasts());
1417 DFSanRuntimeFunctions.insert(
1418 DFSanLoadCallbackFn.getCallee()->stripPointerCasts());
1419 DFSanRuntimeFunctions.insert(
1420 DFSanStoreCallbackFn.getCallee()->stripPointerCasts());
1421 DFSanRuntimeFunctions.insert(
1422 DFSanMemTransferCallbackFn.getCallee()->stripPointerCasts());
1423 DFSanRuntimeFunctions.insert(
1424 DFSanCmpCallbackFn.getCallee()->stripPointerCasts());
1425 DFSanRuntimeFunctions.insert(
1426 DFSanChainOriginFn.getCallee()->stripPointerCasts());
1427 DFSanRuntimeFunctions.insert(
1428 DFSanChainOriginIfTaintedFn.getCallee()->stripPointerCasts());
1429 DFSanRuntimeFunctions.insert(
1430 DFSanMemOriginTransferFn.getCallee()->stripPointerCasts());
1431 DFSanRuntimeFunctions.insert(
1432 DFSanMaybeStoreOriginFn.getCallee()->stripPointerCasts());
1433 }
1434
1435 // Initializes event callback functions and declare them in the module
initializeCallbackFunctions(Module & M)1436 void DataFlowSanitizer::initializeCallbackFunctions(Module &M) {
1437 DFSanLoadCallbackFn = Mod->getOrInsertFunction("__dfsan_load_callback",
1438 DFSanLoadStoreCallbackFnTy);
1439 DFSanStoreCallbackFn = Mod->getOrInsertFunction("__dfsan_store_callback",
1440 DFSanLoadStoreCallbackFnTy);
1441 DFSanMemTransferCallbackFn = Mod->getOrInsertFunction(
1442 "__dfsan_mem_transfer_callback", DFSanMemTransferCallbackFnTy);
1443 DFSanCmpCallbackFn =
1444 Mod->getOrInsertFunction("__dfsan_cmp_callback", DFSanCmpCallbackFnTy);
1445 }
1446
injectMetadataGlobals(Module & M)1447 void DataFlowSanitizer::injectMetadataGlobals(Module &M) {
1448 // These variables can be used:
1449 // - by the runtime (to discover what the shadow width was, during
1450 // compilation)
1451 // - in testing (to avoid hardcoding the shadow width and type but instead
1452 // extract them by pattern matching)
1453 Type *IntTy = Type::getInt32Ty(*Ctx);
1454 (void)Mod->getOrInsertGlobal("__dfsan_shadow_width_bits", IntTy, [&] {
1455 return new GlobalVariable(
1456 M, IntTy, /*isConstant=*/true, GlobalValue::WeakODRLinkage,
1457 ConstantInt::get(IntTy, ShadowWidthBits), "__dfsan_shadow_width_bits");
1458 });
1459 (void)Mod->getOrInsertGlobal("__dfsan_shadow_width_bytes", IntTy, [&] {
1460 return new GlobalVariable(M, IntTy, /*isConstant=*/true,
1461 GlobalValue::WeakODRLinkage,
1462 ConstantInt::get(IntTy, ShadowWidthBytes),
1463 "__dfsan_shadow_width_bytes");
1464 });
1465 }
1466
runImpl(Module & M)1467 bool DataFlowSanitizer::runImpl(Module &M) {
1468 init(M);
1469
1470 if (ABIList.isIn(M, "skip"))
1471 return false;
1472
1473 const unsigned InitialGlobalSize = M.global_size();
1474 const unsigned InitialModuleSize = M.size();
1475
1476 bool Changed = false;
1477
1478 auto GetOrInsertGlobal = [this, &Changed](StringRef Name,
1479 Type *Ty) -> Constant * {
1480 Constant *C = Mod->getOrInsertGlobal(Name, Ty);
1481 if (GlobalVariable *G = dyn_cast<GlobalVariable>(C)) {
1482 Changed |= G->getThreadLocalMode() != GlobalVariable::InitialExecTLSModel;
1483 G->setThreadLocalMode(GlobalVariable::InitialExecTLSModel);
1484 }
1485 return C;
1486 };
1487
1488 // These globals must be kept in sync with the ones in dfsan.cpp.
1489 ArgTLS =
1490 GetOrInsertGlobal("__dfsan_arg_tls",
1491 ArrayType::get(Type::getInt64Ty(*Ctx), ArgTLSSize / 8));
1492 RetvalTLS = GetOrInsertGlobal(
1493 "__dfsan_retval_tls",
1494 ArrayType::get(Type::getInt64Ty(*Ctx), RetvalTLSSize / 8));
1495 ArgOriginTLSTy = ArrayType::get(OriginTy, NumOfElementsInArgOrgTLS);
1496 ArgOriginTLS = GetOrInsertGlobal("__dfsan_arg_origin_tls", ArgOriginTLSTy);
1497 RetvalOriginTLS = GetOrInsertGlobal("__dfsan_retval_origin_tls", OriginTy);
1498
1499 (void)Mod->getOrInsertGlobal("__dfsan_track_origins", OriginTy, [&] {
1500 Changed = true;
1501 return new GlobalVariable(
1502 M, OriginTy, true, GlobalValue::WeakODRLinkage,
1503 ConstantInt::getSigned(OriginTy, shouldTrackOrigins()),
1504 "__dfsan_track_origins");
1505 });
1506
1507 injectMetadataGlobals(M);
1508
1509 ExternalShadowMask =
1510 Mod->getOrInsertGlobal(DFSanExternShadowPtrMask, IntptrTy);
1511
1512 initializeCallbackFunctions(M);
1513 initializeRuntimeFunctions(M);
1514
1515 std::vector<Function *> FnsToInstrument;
1516 SmallPtrSet<Function *, 2> FnsWithNativeABI;
1517 for (Function &F : M)
1518 if (!F.isIntrinsic() && !DFSanRuntimeFunctions.contains(&F))
1519 FnsToInstrument.push_back(&F);
1520
1521 // Give function aliases prefixes when necessary, and build wrappers where the
1522 // instrumentedness is inconsistent.
1523 for (Module::alias_iterator AI = M.alias_begin(), AE = M.alias_end();
1524 AI != AE;) {
1525 GlobalAlias *GA = &*AI;
1526 ++AI;
1527 // Don't stop on weak. We assume people aren't playing games with the
1528 // instrumentedness of overridden weak aliases.
1529 auto *F = dyn_cast<Function>(GA->getBaseObject());
1530 if (!F)
1531 continue;
1532
1533 bool GAInst = isInstrumented(GA), FInst = isInstrumented(F);
1534 if (GAInst && FInst) {
1535 addGlobalNamePrefix(GA);
1536 } else if (GAInst != FInst) {
1537 // Non-instrumented alias of an instrumented function, or vice versa.
1538 // Replace the alias with a native-ABI wrapper of the aliasee. The pass
1539 // below will take care of instrumenting it.
1540 Function *NewF =
1541 buildWrapperFunction(F, "", GA->getLinkage(), F->getFunctionType());
1542 GA->replaceAllUsesWith(ConstantExpr::getBitCast(NewF, GA->getType()));
1543 NewF->takeName(GA);
1544 GA->eraseFromParent();
1545 FnsToInstrument.push_back(NewF);
1546 }
1547 }
1548
1549 ReadOnlyNoneAttrs.addAttribute(Attribute::ReadOnly)
1550 .addAttribute(Attribute::ReadNone);
1551
1552 // First, change the ABI of every function in the module. ABI-listed
1553 // functions keep their original ABI and get a wrapper function.
1554 for (std::vector<Function *>::iterator FI = FnsToInstrument.begin(),
1555 FE = FnsToInstrument.end();
1556 FI != FE; ++FI) {
1557 Function &F = **FI;
1558 FunctionType *FT = F.getFunctionType();
1559
1560 bool IsZeroArgsVoidRet = (FT->getNumParams() == 0 && !FT->isVarArg() &&
1561 FT->getReturnType()->isVoidTy());
1562
1563 if (isInstrumented(&F)) {
1564 // Instrumented functions get a 'dfs$' prefix. This allows us to more
1565 // easily identify cases of mismatching ABIs.
1566 if (getInstrumentedABI() == IA_Args && !IsZeroArgsVoidRet) {
1567 FunctionType *NewFT = getArgsFunctionType(FT);
1568 Function *NewF = Function::Create(NewFT, F.getLinkage(),
1569 F.getAddressSpace(), "", &M);
1570 NewF->copyAttributesFrom(&F);
1571 NewF->removeAttributes(
1572 AttributeList::ReturnIndex,
1573 AttributeFuncs::typeIncompatible(NewFT->getReturnType()));
1574 for (Function::arg_iterator FArg = F.arg_begin(),
1575 NewFArg = NewF->arg_begin(),
1576 FArgEnd = F.arg_end();
1577 FArg != FArgEnd; ++FArg, ++NewFArg) {
1578 FArg->replaceAllUsesWith(&*NewFArg);
1579 }
1580 NewF->getBasicBlockList().splice(NewF->begin(), F.getBasicBlockList());
1581
1582 for (Function::user_iterator UI = F.user_begin(), UE = F.user_end();
1583 UI != UE;) {
1584 BlockAddress *BA = dyn_cast<BlockAddress>(*UI);
1585 ++UI;
1586 if (BA) {
1587 BA->replaceAllUsesWith(
1588 BlockAddress::get(NewF, BA->getBasicBlock()));
1589 delete BA;
1590 }
1591 }
1592 F.replaceAllUsesWith(
1593 ConstantExpr::getBitCast(NewF, PointerType::getUnqual(FT)));
1594 NewF->takeName(&F);
1595 F.eraseFromParent();
1596 *FI = NewF;
1597 addGlobalNamePrefix(NewF);
1598 } else {
1599 addGlobalNamePrefix(&F);
1600 }
1601 } else if (!IsZeroArgsVoidRet || getWrapperKind(&F) == WK_Custom) {
1602 // Build a wrapper function for F. The wrapper simply calls F, and is
1603 // added to FnsToInstrument so that any instrumentation according to its
1604 // WrapperKind is done in the second pass below.
1605 FunctionType *NewFT =
1606 getInstrumentedABI() == IA_Args ? getArgsFunctionType(FT) : FT;
1607
1608 // If the function being wrapped has local linkage, then preserve the
1609 // function's linkage in the wrapper function.
1610 GlobalValue::LinkageTypes WrapperLinkage =
1611 F.hasLocalLinkage() ? F.getLinkage()
1612 : GlobalValue::LinkOnceODRLinkage;
1613
1614 Function *NewF = buildWrapperFunction(
1615 &F,
1616 (shouldTrackOrigins() ? std::string("dfso$") : std::string("dfsw$")) +
1617 std::string(F.getName()),
1618 WrapperLinkage, NewFT);
1619 if (getInstrumentedABI() == IA_TLS)
1620 NewF->removeAttributes(AttributeList::FunctionIndex, ReadOnlyNoneAttrs);
1621
1622 Value *WrappedFnCst =
1623 ConstantExpr::getBitCast(NewF, PointerType::getUnqual(FT));
1624 F.replaceAllUsesWith(WrappedFnCst);
1625
1626 UnwrappedFnMap[WrappedFnCst] = &F;
1627 *FI = NewF;
1628
1629 if (!F.isDeclaration()) {
1630 // This function is probably defining an interposition of an
1631 // uninstrumented function and hence needs to keep the original ABI.
1632 // But any functions it may call need to use the instrumented ABI, so
1633 // we instrument it in a mode which preserves the original ABI.
1634 FnsWithNativeABI.insert(&F);
1635
1636 // This code needs to rebuild the iterators, as they may be invalidated
1637 // by the push_back, taking care that the new range does not include
1638 // any functions added by this code.
1639 size_t N = FI - FnsToInstrument.begin(),
1640 Count = FE - FnsToInstrument.begin();
1641 FnsToInstrument.push_back(&F);
1642 FI = FnsToInstrument.begin() + N;
1643 FE = FnsToInstrument.begin() + Count;
1644 }
1645 // Hopefully, nobody will try to indirectly call a vararg
1646 // function... yet.
1647 } else if (FT->isVarArg()) {
1648 UnwrappedFnMap[&F] = &F;
1649 *FI = nullptr;
1650 }
1651 }
1652
1653 for (Function *F : FnsToInstrument) {
1654 if (!F || F->isDeclaration())
1655 continue;
1656
1657 removeUnreachableBlocks(*F);
1658
1659 DFSanFunction DFSF(*this, F, FnsWithNativeABI.count(F));
1660
1661 // DFSanVisitor may create new basic blocks, which confuses df_iterator.
1662 // Build a copy of the list before iterating over it.
1663 SmallVector<BasicBlock *, 4> BBList(depth_first(&F->getEntryBlock()));
1664
1665 for (BasicBlock *BB : BBList) {
1666 Instruction *Inst = &BB->front();
1667 while (true) {
1668 // DFSanVisitor may split the current basic block, changing the current
1669 // instruction's next pointer and moving the next instruction to the
1670 // tail block from which we should continue.
1671 Instruction *Next = Inst->getNextNode();
1672 // DFSanVisitor may delete Inst, so keep track of whether it was a
1673 // terminator.
1674 bool IsTerminator = Inst->isTerminator();
1675 if (!DFSF.SkipInsts.count(Inst))
1676 DFSanVisitor(DFSF).visit(Inst);
1677 if (IsTerminator)
1678 break;
1679 Inst = Next;
1680 }
1681 }
1682
1683 // We will not necessarily be able to compute the shadow for every phi node
1684 // until we have visited every block. Therefore, the code that handles phi
1685 // nodes adds them to the PHIFixups list so that they can be properly
1686 // handled here.
1687 for (DFSanFunction::PHIFixupElement &P : DFSF.PHIFixups) {
1688 for (unsigned Val = 0, N = P.Phi->getNumIncomingValues(); Val != N;
1689 ++Val) {
1690 P.ShadowPhi->setIncomingValue(
1691 Val, DFSF.getShadow(P.Phi->getIncomingValue(Val)));
1692 if (P.OriginPhi)
1693 P.OriginPhi->setIncomingValue(
1694 Val, DFSF.getOrigin(P.Phi->getIncomingValue(Val)));
1695 }
1696 }
1697
1698 // -dfsan-debug-nonzero-labels will split the CFG in all kinds of crazy
1699 // places (i.e. instructions in basic blocks we haven't even begun visiting
1700 // yet). To make our life easier, do this work in a pass after the main
1701 // instrumentation.
1702 if (ClDebugNonzeroLabels) {
1703 for (Value *V : DFSF.NonZeroChecks) {
1704 Instruction *Pos;
1705 if (Instruction *I = dyn_cast<Instruction>(V))
1706 Pos = I->getNextNode();
1707 else
1708 Pos = &DFSF.F->getEntryBlock().front();
1709 while (isa<PHINode>(Pos) || isa<AllocaInst>(Pos))
1710 Pos = Pos->getNextNode();
1711 IRBuilder<> IRB(Pos);
1712 Value *PrimitiveShadow = DFSF.collapseToPrimitiveShadow(V, Pos);
1713 Value *Ne =
1714 IRB.CreateICmpNE(PrimitiveShadow, DFSF.DFS.ZeroPrimitiveShadow);
1715 BranchInst *BI = cast<BranchInst>(SplitBlockAndInsertIfThen(
1716 Ne, Pos, /*Unreachable=*/false, ColdCallWeights));
1717 IRBuilder<> ThenIRB(BI);
1718 ThenIRB.CreateCall(DFSF.DFS.DFSanNonzeroLabelFn, {});
1719 }
1720 }
1721 }
1722
1723 return Changed || !FnsToInstrument.empty() ||
1724 M.global_size() != InitialGlobalSize || M.size() != InitialModuleSize;
1725 }
1726
getArgTLS(Type * T,unsigned ArgOffset,IRBuilder<> & IRB)1727 Value *DFSanFunction::getArgTLS(Type *T, unsigned ArgOffset, IRBuilder<> &IRB) {
1728 Value *Base = IRB.CreatePointerCast(DFS.ArgTLS, DFS.IntptrTy);
1729 if (ArgOffset)
1730 Base = IRB.CreateAdd(Base, ConstantInt::get(DFS.IntptrTy, ArgOffset));
1731 return IRB.CreateIntToPtr(Base, PointerType::get(DFS.getShadowTy(T), 0),
1732 "_dfsarg");
1733 }
1734
getRetvalTLS(Type * T,IRBuilder<> & IRB)1735 Value *DFSanFunction::getRetvalTLS(Type *T, IRBuilder<> &IRB) {
1736 return IRB.CreatePointerCast(
1737 DFS.RetvalTLS, PointerType::get(DFS.getShadowTy(T), 0), "_dfsret");
1738 }
1739
getRetvalOriginTLS()1740 Value *DFSanFunction::getRetvalOriginTLS() { return DFS.RetvalOriginTLS; }
1741
getArgOriginTLS(unsigned ArgNo,IRBuilder<> & IRB)1742 Value *DFSanFunction::getArgOriginTLS(unsigned ArgNo, IRBuilder<> &IRB) {
1743 return IRB.CreateConstGEP2_64(DFS.ArgOriginTLSTy, DFS.ArgOriginTLS, 0, ArgNo,
1744 "_dfsarg_o");
1745 }
1746
getOrigin(Value * V)1747 Value *DFSanFunction::getOrigin(Value *V) {
1748 assert(DFS.shouldTrackOrigins());
1749 if (!isa<Argument>(V) && !isa<Instruction>(V))
1750 return DFS.ZeroOrigin;
1751 Value *&Origin = ValOriginMap[V];
1752 if (!Origin) {
1753 if (Argument *A = dyn_cast<Argument>(V)) {
1754 if (IsNativeABI)
1755 return DFS.ZeroOrigin;
1756 switch (IA) {
1757 case DataFlowSanitizer::IA_TLS: {
1758 if (A->getArgNo() < DFS.NumOfElementsInArgOrgTLS) {
1759 Instruction *ArgOriginTLSPos = &*F->getEntryBlock().begin();
1760 IRBuilder<> IRB(ArgOriginTLSPos);
1761 Value *ArgOriginPtr = getArgOriginTLS(A->getArgNo(), IRB);
1762 Origin = IRB.CreateLoad(DFS.OriginTy, ArgOriginPtr);
1763 } else {
1764 // Overflow
1765 Origin = DFS.ZeroOrigin;
1766 }
1767 break;
1768 }
1769 case DataFlowSanitizer::IA_Args: {
1770 Origin = DFS.ZeroOrigin;
1771 break;
1772 }
1773 }
1774 } else {
1775 Origin = DFS.ZeroOrigin;
1776 }
1777 }
1778 return Origin;
1779 }
1780
setOrigin(Instruction * I,Value * Origin)1781 void DFSanFunction::setOrigin(Instruction *I, Value *Origin) {
1782 if (!DFS.shouldTrackOrigins())
1783 return;
1784 assert(!ValOriginMap.count(I));
1785 assert(Origin->getType() == DFS.OriginTy);
1786 ValOriginMap[I] = Origin;
1787 }
1788
getShadowForTLSArgument(Argument * A)1789 Value *DFSanFunction::getShadowForTLSArgument(Argument *A) {
1790 unsigned ArgOffset = 0;
1791 const DataLayout &DL = F->getParent()->getDataLayout();
1792 for (auto &FArg : F->args()) {
1793 if (!FArg.getType()->isSized()) {
1794 if (A == &FArg)
1795 break;
1796 continue;
1797 }
1798
1799 unsigned Size = DL.getTypeAllocSize(DFS.getShadowTy(&FArg));
1800 if (A != &FArg) {
1801 ArgOffset += alignTo(Size, ShadowTLSAlignment);
1802 if (ArgOffset > ArgTLSSize)
1803 break; // ArgTLS overflows, uses a zero shadow.
1804 continue;
1805 }
1806
1807 if (ArgOffset + Size > ArgTLSSize)
1808 break; // ArgTLS overflows, uses a zero shadow.
1809
1810 Instruction *ArgTLSPos = &*F->getEntryBlock().begin();
1811 IRBuilder<> IRB(ArgTLSPos);
1812 Value *ArgShadowPtr = getArgTLS(FArg.getType(), ArgOffset, IRB);
1813 return IRB.CreateAlignedLoad(DFS.getShadowTy(&FArg), ArgShadowPtr,
1814 ShadowTLSAlignment);
1815 }
1816
1817 return DFS.getZeroShadow(A);
1818 }
1819
getShadow(Value * V)1820 Value *DFSanFunction::getShadow(Value *V) {
1821 if (!isa<Argument>(V) && !isa<Instruction>(V))
1822 return DFS.getZeroShadow(V);
1823 Value *&Shadow = ValShadowMap[V];
1824 if (!Shadow) {
1825 if (Argument *A = dyn_cast<Argument>(V)) {
1826 if (IsNativeABI)
1827 return DFS.getZeroShadow(V);
1828 switch (IA) {
1829 case DataFlowSanitizer::IA_TLS: {
1830 Shadow = getShadowForTLSArgument(A);
1831 break;
1832 }
1833 case DataFlowSanitizer::IA_Args: {
1834 unsigned ArgIdx = A->getArgNo() + F->arg_size() / 2;
1835 Function::arg_iterator Arg = F->arg_begin();
1836 std::advance(Arg, ArgIdx);
1837 Shadow = &*Arg;
1838 assert(Shadow->getType() == DFS.PrimitiveShadowTy);
1839 break;
1840 }
1841 }
1842 NonZeroChecks.push_back(Shadow);
1843 } else {
1844 Shadow = DFS.getZeroShadow(V);
1845 }
1846 }
1847 return Shadow;
1848 }
1849
setShadow(Instruction * I,Value * Shadow)1850 void DFSanFunction::setShadow(Instruction *I, Value *Shadow) {
1851 assert(!ValShadowMap.count(I));
1852 assert(DFS.shouldTrackFieldsAndIndices() ||
1853 Shadow->getType() == DFS.PrimitiveShadowTy);
1854 ValShadowMap[I] = Shadow;
1855 }
1856
getShadowOffset(Value * Addr,IRBuilder<> & IRB)1857 Value *DataFlowSanitizer::getShadowOffset(Value *Addr, IRBuilder<> &IRB) {
1858 // Returns Addr & shadow_mask
1859 assert(Addr != RetvalTLS && "Reinstrumenting?");
1860 Value *ShadowPtrMaskValue;
1861 if (DFSanRuntimeShadowMask)
1862 ShadowPtrMaskValue = IRB.CreateLoad(IntptrTy, ExternalShadowMask);
1863 else
1864 ShadowPtrMaskValue = ShadowPtrMask;
1865 return IRB.CreateAnd(IRB.CreatePtrToInt(Addr, IntptrTy),
1866 IRB.CreatePtrToInt(ShadowPtrMaskValue, IntptrTy));
1867 }
1868
1869 std::pair<Value *, Value *>
getShadowOriginAddress(Value * Addr,Align InstAlignment,Instruction * Pos)1870 DataFlowSanitizer::getShadowOriginAddress(Value *Addr, Align InstAlignment,
1871 Instruction *Pos) {
1872 // Returns ((Addr & shadow_mask) + origin_base) & ~4UL
1873 IRBuilder<> IRB(Pos);
1874 Value *ShadowOffset = getShadowOffset(Addr, IRB);
1875 Value *ShadowPtr = getShadowAddress(Addr, Pos, ShadowOffset);
1876 Value *OriginPtr = nullptr;
1877 if (shouldTrackOrigins()) {
1878 Value *OriginLong = IRB.CreateAdd(ShadowOffset, OriginBase);
1879 const Align Alignment = llvm::assumeAligned(InstAlignment.value());
1880 // When alignment is >= 4, Addr must be aligned to 4, otherwise it is UB.
1881 // So Mask is unnecessary.
1882 if (Alignment < MinOriginAlignment) {
1883 uint64_t Mask = MinOriginAlignment.value() - 1;
1884 OriginLong = IRB.CreateAnd(OriginLong, ConstantInt::get(IntptrTy, ~Mask));
1885 }
1886 OriginPtr = IRB.CreateIntToPtr(OriginLong, OriginPtrTy);
1887 }
1888 return {ShadowPtr, OriginPtr};
1889 }
1890
getShadowAddress(Value * Addr,Instruction * Pos,Value * ShadowOffset)1891 Value *DataFlowSanitizer::getShadowAddress(Value *Addr, Instruction *Pos,
1892 Value *ShadowOffset) {
1893 IRBuilder<> IRB(Pos);
1894
1895 if (!ShadowPtrMul->isOne())
1896 ShadowOffset = IRB.CreateMul(ShadowOffset, ShadowPtrMul);
1897
1898 return IRB.CreateIntToPtr(ShadowOffset, PrimitiveShadowPtrTy);
1899 }
1900
getShadowAddress(Value * Addr,Instruction * Pos)1901 Value *DataFlowSanitizer::getShadowAddress(Value *Addr, Instruction *Pos) {
1902 // Returns (Addr & shadow_mask) x 2
1903 IRBuilder<> IRB(Pos);
1904 Value *ShadowOffset = getShadowOffset(Addr, IRB);
1905 return getShadowAddress(Addr, Pos, ShadowOffset);
1906 }
1907
combineShadowsThenConvert(Type * T,Value * V1,Value * V2,Instruction * Pos)1908 Value *DFSanFunction::combineShadowsThenConvert(Type *T, Value *V1, Value *V2,
1909 Instruction *Pos) {
1910 Value *PrimitiveValue = combineShadows(V1, V2, Pos);
1911 return expandFromPrimitiveShadow(T, PrimitiveValue, Pos);
1912 }
1913
1914 // Generates IR to compute the union of the two given shadows, inserting it
1915 // before Pos. The combined value is with primitive type.
combineShadows(Value * V1,Value * V2,Instruction * Pos)1916 Value *DFSanFunction::combineShadows(Value *V1, Value *V2, Instruction *Pos) {
1917 if (DFS.isZeroShadow(V1))
1918 return collapseToPrimitiveShadow(V2, Pos);
1919 if (DFS.isZeroShadow(V2))
1920 return collapseToPrimitiveShadow(V1, Pos);
1921 if (V1 == V2)
1922 return collapseToPrimitiveShadow(V1, Pos);
1923
1924 auto V1Elems = ShadowElements.find(V1);
1925 auto V2Elems = ShadowElements.find(V2);
1926 if (V1Elems != ShadowElements.end() && V2Elems != ShadowElements.end()) {
1927 if (std::includes(V1Elems->second.begin(), V1Elems->second.end(),
1928 V2Elems->second.begin(), V2Elems->second.end())) {
1929 return collapseToPrimitiveShadow(V1, Pos);
1930 }
1931 if (std::includes(V2Elems->second.begin(), V2Elems->second.end(),
1932 V1Elems->second.begin(), V1Elems->second.end())) {
1933 return collapseToPrimitiveShadow(V2, Pos);
1934 }
1935 } else if (V1Elems != ShadowElements.end()) {
1936 if (V1Elems->second.count(V2))
1937 return collapseToPrimitiveShadow(V1, Pos);
1938 } else if (V2Elems != ShadowElements.end()) {
1939 if (V2Elems->second.count(V1))
1940 return collapseToPrimitiveShadow(V2, Pos);
1941 }
1942
1943 auto Key = std::make_pair(V1, V2);
1944 if (V1 > V2)
1945 std::swap(Key.first, Key.second);
1946 CachedShadow &CCS = CachedShadows[Key];
1947 if (CCS.Block && DT.dominates(CCS.Block, Pos->getParent()))
1948 return CCS.Shadow;
1949
1950 // Converts inputs shadows to shadows with primitive types.
1951 Value *PV1 = collapseToPrimitiveShadow(V1, Pos);
1952 Value *PV2 = collapseToPrimitiveShadow(V2, Pos);
1953
1954 IRBuilder<> IRB(Pos);
1955 if (DFS.hasFastLabelsEnabled()) {
1956 CCS.Block = Pos->getParent();
1957 CCS.Shadow = IRB.CreateOr(PV1, PV2);
1958 } else if (AvoidNewBlocks) {
1959 CallInst *Call = IRB.CreateCall(DFS.DFSanCheckedUnionFn, {PV1, PV2});
1960 Call->addAttribute(AttributeList::ReturnIndex, Attribute::ZExt);
1961 Call->addParamAttr(0, Attribute::ZExt);
1962 Call->addParamAttr(1, Attribute::ZExt);
1963
1964 CCS.Block = Pos->getParent();
1965 CCS.Shadow = Call;
1966 } else {
1967 BasicBlock *Head = Pos->getParent();
1968 Value *Ne = IRB.CreateICmpNE(PV1, PV2);
1969 BranchInst *BI = cast<BranchInst>(SplitBlockAndInsertIfThen(
1970 Ne, Pos, /*Unreachable=*/false, DFS.ColdCallWeights, &DT));
1971 IRBuilder<> ThenIRB(BI);
1972 CallInst *Call = ThenIRB.CreateCall(DFS.DFSanUnionFn, {PV1, PV2});
1973 Call->addAttribute(AttributeList::ReturnIndex, Attribute::ZExt);
1974 Call->addParamAttr(0, Attribute::ZExt);
1975 Call->addParamAttr(1, Attribute::ZExt);
1976
1977 BasicBlock *Tail = BI->getSuccessor(0);
1978 PHINode *Phi =
1979 PHINode::Create(DFS.PrimitiveShadowTy, 2, "", &Tail->front());
1980 Phi->addIncoming(Call, Call->getParent());
1981 Phi->addIncoming(PV1, Head);
1982
1983 CCS.Block = Tail;
1984 CCS.Shadow = Phi;
1985 }
1986
1987 std::set<Value *> UnionElems;
1988 if (V1Elems != ShadowElements.end()) {
1989 UnionElems = V1Elems->second;
1990 } else {
1991 UnionElems.insert(V1);
1992 }
1993 if (V2Elems != ShadowElements.end()) {
1994 UnionElems.insert(V2Elems->second.begin(), V2Elems->second.end());
1995 } else {
1996 UnionElems.insert(V2);
1997 }
1998 ShadowElements[CCS.Shadow] = std::move(UnionElems);
1999
2000 return CCS.Shadow;
2001 }
2002
2003 // A convenience function which folds the shadows of each of the operands
2004 // of the provided instruction Inst, inserting the IR before Inst. Returns
2005 // the computed union Value.
combineOperandShadows(Instruction * Inst)2006 Value *DFSanFunction::combineOperandShadows(Instruction *Inst) {
2007 if (Inst->getNumOperands() == 0)
2008 return DFS.getZeroShadow(Inst);
2009
2010 Value *Shadow = getShadow(Inst->getOperand(0));
2011 for (unsigned I = 1, N = Inst->getNumOperands(); I < N; ++I)
2012 Shadow = combineShadows(Shadow, getShadow(Inst->getOperand(I)), Inst);
2013
2014 return expandFromPrimitiveShadow(Inst->getType(), Shadow, Inst);
2015 }
2016
visitInstOperands(Instruction & I)2017 void DFSanVisitor::visitInstOperands(Instruction &I) {
2018 Value *CombinedShadow = DFSF.combineOperandShadows(&I);
2019 DFSF.setShadow(&I, CombinedShadow);
2020 visitInstOperandOrigins(I);
2021 }
2022
combineOrigins(const std::vector<Value * > & Shadows,const std::vector<Value * > & Origins,Instruction * Pos,ConstantInt * Zero)2023 Value *DFSanFunction::combineOrigins(const std::vector<Value *> &Shadows,
2024 const std::vector<Value *> &Origins,
2025 Instruction *Pos, ConstantInt *Zero) {
2026 assert(Shadows.size() == Origins.size());
2027 size_t Size = Origins.size();
2028 if (Size == 0)
2029 return DFS.ZeroOrigin;
2030 Value *Origin = nullptr;
2031 if (!Zero)
2032 Zero = DFS.ZeroPrimitiveShadow;
2033 for (size_t I = 0; I != Size; ++I) {
2034 Value *OpOrigin = Origins[I];
2035 Constant *ConstOpOrigin = dyn_cast<Constant>(OpOrigin);
2036 if (ConstOpOrigin && ConstOpOrigin->isNullValue())
2037 continue;
2038 if (!Origin) {
2039 Origin = OpOrigin;
2040 continue;
2041 }
2042 Value *OpShadow = Shadows[I];
2043 Value *PrimitiveShadow = collapseToPrimitiveShadow(OpShadow, Pos);
2044 IRBuilder<> IRB(Pos);
2045 Value *Cond = IRB.CreateICmpNE(PrimitiveShadow, Zero);
2046 Origin = IRB.CreateSelect(Cond, OpOrigin, Origin);
2047 }
2048 return Origin ? Origin : DFS.ZeroOrigin;
2049 }
2050
combineOperandOrigins(Instruction * Inst)2051 Value *DFSanFunction::combineOperandOrigins(Instruction *Inst) {
2052 size_t Size = Inst->getNumOperands();
2053 std::vector<Value *> Shadows(Size);
2054 std::vector<Value *> Origins(Size);
2055 for (unsigned I = 0; I != Size; ++I) {
2056 Shadows[I] = getShadow(Inst->getOperand(I));
2057 Origins[I] = getOrigin(Inst->getOperand(I));
2058 }
2059 return combineOrigins(Shadows, Origins, Inst);
2060 }
2061
visitInstOperandOrigins(Instruction & I)2062 void DFSanVisitor::visitInstOperandOrigins(Instruction &I) {
2063 if (!DFSF.DFS.shouldTrackOrigins())
2064 return;
2065 Value *CombinedOrigin = DFSF.combineOperandOrigins(&I);
2066 DFSF.setOrigin(&I, CombinedOrigin);
2067 }
2068
getShadowAlign(Align InstAlignment)2069 Align DFSanFunction::getShadowAlign(Align InstAlignment) {
2070 const Align Alignment = ClPreserveAlignment ? InstAlignment : Align(1);
2071 return Align(Alignment.value() * DFS.ShadowWidthBytes);
2072 }
2073
getOriginAlign(Align InstAlignment)2074 Align DFSanFunction::getOriginAlign(Align InstAlignment) {
2075 const Align Alignment = llvm::assumeAligned(InstAlignment.value());
2076 return Align(std::max(MinOriginAlignment, Alignment));
2077 }
2078
useCallbackLoadLabelAndOrigin(uint64_t Size,Align InstAlignment)2079 bool DFSanFunction::useCallbackLoadLabelAndOrigin(uint64_t Size,
2080 Align InstAlignment) {
2081 // When enabling tracking load instructions, we always use
2082 // __dfsan_load_label_and_origin to reduce code size.
2083 if (ClTrackOrigins == 2)
2084 return true;
2085
2086 assert(Size != 0);
2087 // * if Size == 1, it is sufficient to load its origin aligned at 4.
2088 // * if Size == 2, we assume most cases Addr % 2 == 0, so it is sufficient to
2089 // load its origin aligned at 4. If not, although origins may be lost, it
2090 // should not happen very often.
2091 // * if align >= 4, Addr must be aligned to 4, otherwise it is UB. When
2092 // Size % 4 == 0, it is more efficient to load origins without callbacks.
2093 // * Otherwise we use __dfsan_load_label_and_origin.
2094 // This should ensure that common cases run efficiently.
2095 if (Size <= 2)
2096 return false;
2097
2098 const Align Alignment = llvm::assumeAligned(InstAlignment.value());
2099 return Alignment < MinOriginAlignment || !DFS.hasLoadSizeForFastPath(Size);
2100 }
2101
loadNextOrigin(Instruction * Pos,Align OriginAlign,Value ** OriginAddr)2102 Value *DataFlowSanitizer::loadNextOrigin(Instruction *Pos, Align OriginAlign,
2103 Value **OriginAddr) {
2104 IRBuilder<> IRB(Pos);
2105 *OriginAddr =
2106 IRB.CreateGEP(OriginTy, *OriginAddr, ConstantInt::get(IntptrTy, 1));
2107 return IRB.CreateAlignedLoad(OriginTy, *OriginAddr, OriginAlign);
2108 }
2109
loadFast16ShadowFast(Value * ShadowAddr,Value * OriginAddr,uint64_t Size,Align ShadowAlign,Align OriginAlign,Value * FirstOrigin,Instruction * Pos)2110 std::pair<Value *, Value *> DFSanFunction::loadFast16ShadowFast(
2111 Value *ShadowAddr, Value *OriginAddr, uint64_t Size, Align ShadowAlign,
2112 Align OriginAlign, Value *FirstOrigin, Instruction *Pos) {
2113 const bool ShouldTrackOrigins = DFS.shouldTrackOrigins();
2114 const uint64_t ShadowSize = Size * DFS.ShadowWidthBytes;
2115
2116 assert(Size >= 4 && "Not large enough load size for fast path!");
2117
2118 // Used for origin tracking.
2119 std::vector<Value *> Shadows;
2120 std::vector<Value *> Origins;
2121
2122 // Load instructions in LLVM can have arbitrary byte sizes (e.g., 3, 12, 20)
2123 // but this function is only used in a subset of cases that make it possible
2124 // to optimize the instrumentation.
2125 //
2126 // Specifically, when the shadow size in bytes (i.e., loaded bytes x shadow
2127 // per byte) is either:
2128 // - a multiple of 8 (common)
2129 // - equal to 4 (only for load32 in fast-8 mode)
2130 //
2131 // For the second case, we can fit the wide shadow in a 32-bit integer. In all
2132 // other cases, we use a 64-bit integer to hold the wide shadow.
2133 Type *WideShadowTy =
2134 ShadowSize == 4 ? Type::getInt32Ty(*DFS.Ctx) : Type::getInt64Ty(*DFS.Ctx);
2135
2136 IRBuilder<> IRB(Pos);
2137 Value *WideAddr = IRB.CreateBitCast(ShadowAddr, WideShadowTy->getPointerTo());
2138 Value *CombinedWideShadow =
2139 IRB.CreateAlignedLoad(WideShadowTy, WideAddr, ShadowAlign);
2140
2141 unsigned WideShadowBitWidth = WideShadowTy->getIntegerBitWidth();
2142 const uint64_t BytesPerWideShadow = WideShadowBitWidth / DFS.ShadowWidthBits;
2143
2144 auto AppendWideShadowAndOrigin = [&](Value *WideShadow, Value *Origin) {
2145 if (BytesPerWideShadow > 4) {
2146 assert(BytesPerWideShadow == 8);
2147 // The wide shadow relates to two origin pointers: one for the first four
2148 // application bytes, and one for the latest four. We use a left shift to
2149 // get just the shadow bytes that correspond to the first origin pointer,
2150 // and then the entire shadow for the second origin pointer (which will be
2151 // chosen by combineOrigins() iff the least-significant half of the wide
2152 // shadow was empty but the other half was not).
2153 Value *WideShadowLo = IRB.CreateShl(
2154 WideShadow, ConstantInt::get(WideShadowTy, WideShadowBitWidth / 2));
2155 Shadows.push_back(WideShadow);
2156 Origins.push_back(DFS.loadNextOrigin(Pos, OriginAlign, &OriginAddr));
2157
2158 Shadows.push_back(WideShadowLo);
2159 Origins.push_back(Origin);
2160 } else {
2161 Shadows.push_back(WideShadow);
2162 Origins.push_back(Origin);
2163 }
2164 };
2165
2166 if (ShouldTrackOrigins)
2167 AppendWideShadowAndOrigin(CombinedWideShadow, FirstOrigin);
2168
2169 // First OR all the WideShadows (i.e., 64bit or 32bit shadow chunks) linearly;
2170 // then OR individual shadows within the combined WideShadow by binary ORing.
2171 // This is fewer instructions than ORing shadows individually, since it
2172 // needs logN shift/or instructions (N being the bytes of the combined wide
2173 // shadow).
2174 for (uint64_t ByteOfs = BytesPerWideShadow; ByteOfs < Size;
2175 ByteOfs += BytesPerWideShadow) {
2176 WideAddr = IRB.CreateGEP(WideShadowTy, WideAddr,
2177 ConstantInt::get(DFS.IntptrTy, 1));
2178 Value *NextWideShadow =
2179 IRB.CreateAlignedLoad(WideShadowTy, WideAddr, ShadowAlign);
2180 CombinedWideShadow = IRB.CreateOr(CombinedWideShadow, NextWideShadow);
2181 if (ShouldTrackOrigins) {
2182 Value *NextOrigin = DFS.loadNextOrigin(Pos, OriginAlign, &OriginAddr);
2183 AppendWideShadowAndOrigin(NextWideShadow, NextOrigin);
2184 }
2185 }
2186 for (unsigned Width = WideShadowBitWidth / 2; Width >= DFS.ShadowWidthBits;
2187 Width >>= 1) {
2188 Value *ShrShadow = IRB.CreateLShr(CombinedWideShadow, Width);
2189 CombinedWideShadow = IRB.CreateOr(CombinedWideShadow, ShrShadow);
2190 }
2191 return {IRB.CreateTrunc(CombinedWideShadow, DFS.PrimitiveShadowTy),
2192 ShouldTrackOrigins
2193 ? combineOrigins(Shadows, Origins, Pos,
2194 ConstantInt::getSigned(IRB.getInt64Ty(), 0))
2195 : DFS.ZeroOrigin};
2196 }
2197
loadLegacyShadowFast(Value * ShadowAddr,uint64_t Size,Align ShadowAlign,Instruction * Pos)2198 Value *DFSanFunction::loadLegacyShadowFast(Value *ShadowAddr, uint64_t Size,
2199 Align ShadowAlign,
2200 Instruction *Pos) {
2201 // Fast path for the common case where each byte has identical shadow: load
2202 // shadow 64 (or 32) bits at a time, fall out to a __dfsan_union_load call if
2203 // any shadow is non-equal.
2204 BasicBlock *FallbackBB = BasicBlock::Create(*DFS.Ctx, "", F);
2205 IRBuilder<> FallbackIRB(FallbackBB);
2206 CallInst *FallbackCall = FallbackIRB.CreateCall(
2207 DFS.DFSanUnionLoadFn, {ShadowAddr, ConstantInt::get(DFS.IntptrTy, Size)});
2208 FallbackCall->addAttribute(AttributeList::ReturnIndex, Attribute::ZExt);
2209
2210 const uint64_t ShadowSize = Size * DFS.ShadowWidthBytes;
2211 assert(Size >= 4 && "Not large enough load size for fast path!");
2212
2213 // Same as in loadFast16AShadowsFast. In the case of load32, we can fit the
2214 // wide shadow in a 32-bit integer instead.
2215 Type *WideShadowTy =
2216 ShadowSize == 4 ? Type::getInt32Ty(*DFS.Ctx) : Type::getInt64Ty(*DFS.Ctx);
2217
2218 // Compare each of the shadows stored in the loaded 64 bits to each other,
2219 // by computing (WideShadow rotl ShadowWidthBits) == WideShadow.
2220 IRBuilder<> IRB(Pos);
2221 unsigned WideShadowBitWidth = WideShadowTy->getIntegerBitWidth();
2222 Value *WideAddr = IRB.CreateBitCast(ShadowAddr, WideShadowTy->getPointerTo());
2223 Value *WideShadow =
2224 IRB.CreateAlignedLoad(WideShadowTy, WideAddr, ShadowAlign);
2225 Value *TruncShadow = IRB.CreateTrunc(WideShadow, DFS.PrimitiveShadowTy);
2226 Value *ShlShadow = IRB.CreateShl(WideShadow, DFS.ShadowWidthBits);
2227 Value *ShrShadow =
2228 IRB.CreateLShr(WideShadow, WideShadowBitWidth - DFS.ShadowWidthBits);
2229 Value *RotShadow = IRB.CreateOr(ShlShadow, ShrShadow);
2230 Value *ShadowsEq = IRB.CreateICmpEQ(WideShadow, RotShadow);
2231
2232 BasicBlock *Head = Pos->getParent();
2233 BasicBlock *Tail = Head->splitBasicBlock(Pos->getIterator());
2234
2235 if (DomTreeNode *OldNode = DT.getNode(Head)) {
2236 std::vector<DomTreeNode *> Children(OldNode->begin(), OldNode->end());
2237
2238 DomTreeNode *NewNode = DT.addNewBlock(Tail, Head);
2239 for (auto *Child : Children)
2240 DT.changeImmediateDominator(Child, NewNode);
2241 }
2242
2243 // In the following code LastBr will refer to the previous basic block's
2244 // conditional branch instruction, whose true successor is fixed up to point
2245 // to the next block during the loop below or to the tail after the final
2246 // iteration.
2247 BranchInst *LastBr = BranchInst::Create(FallbackBB, FallbackBB, ShadowsEq);
2248 ReplaceInstWithInst(Head->getTerminator(), LastBr);
2249 DT.addNewBlock(FallbackBB, Head);
2250
2251 const uint64_t BytesPerWideShadow = WideShadowBitWidth / DFS.ShadowWidthBits;
2252
2253 for (uint64_t ByteOfs = BytesPerWideShadow; ByteOfs < Size;
2254 ByteOfs += BytesPerWideShadow) {
2255 BasicBlock *NextBB = BasicBlock::Create(*DFS.Ctx, "", F);
2256 DT.addNewBlock(NextBB, LastBr->getParent());
2257 IRBuilder<> NextIRB(NextBB);
2258 WideAddr = NextIRB.CreateGEP(WideShadowTy, WideAddr,
2259 ConstantInt::get(DFS.IntptrTy, 1));
2260 Value *NextWideShadow =
2261 NextIRB.CreateAlignedLoad(WideShadowTy, WideAddr, ShadowAlign);
2262 ShadowsEq = NextIRB.CreateICmpEQ(WideShadow, NextWideShadow);
2263 LastBr->setSuccessor(0, NextBB);
2264 LastBr = NextIRB.CreateCondBr(ShadowsEq, FallbackBB, FallbackBB);
2265 }
2266
2267 LastBr->setSuccessor(0, Tail);
2268 FallbackIRB.CreateBr(Tail);
2269 PHINode *Shadow =
2270 PHINode::Create(DFS.PrimitiveShadowTy, 2, "", &Tail->front());
2271 Shadow->addIncoming(FallbackCall, FallbackBB);
2272 Shadow->addIncoming(TruncShadow, LastBr->getParent());
2273 return Shadow;
2274 }
2275
loadShadowOriginSansLoadTracking(Value * Addr,uint64_t Size,Align InstAlignment,Instruction * Pos)2276 std::pair<Value *, Value *> DFSanFunction::loadShadowOriginSansLoadTracking(
2277 Value *Addr, uint64_t Size, Align InstAlignment, Instruction *Pos) {
2278 const bool ShouldTrackOrigins = DFS.shouldTrackOrigins();
2279
2280 // Non-escaped loads.
2281 if (AllocaInst *AI = dyn_cast<AllocaInst>(Addr)) {
2282 const auto SI = AllocaShadowMap.find(AI);
2283 if (SI != AllocaShadowMap.end()) {
2284 IRBuilder<> IRB(Pos);
2285 Value *ShadowLI = IRB.CreateLoad(DFS.PrimitiveShadowTy, SI->second);
2286 const auto OI = AllocaOriginMap.find(AI);
2287 assert(!ShouldTrackOrigins || OI != AllocaOriginMap.end());
2288 return {ShadowLI, ShouldTrackOrigins
2289 ? IRB.CreateLoad(DFS.OriginTy, OI->second)
2290 : nullptr};
2291 }
2292 }
2293
2294 // Load from constant addresses.
2295 SmallVector<const Value *, 2> Objs;
2296 getUnderlyingObjects(Addr, Objs);
2297 bool AllConstants = true;
2298 for (const Value *Obj : Objs) {
2299 if (isa<Function>(Obj) || isa<BlockAddress>(Obj))
2300 continue;
2301 if (isa<GlobalVariable>(Obj) && cast<GlobalVariable>(Obj)->isConstant())
2302 continue;
2303
2304 AllConstants = false;
2305 break;
2306 }
2307 if (AllConstants)
2308 return {DFS.ZeroPrimitiveShadow,
2309 ShouldTrackOrigins ? DFS.ZeroOrigin : nullptr};
2310
2311 if (Size == 0)
2312 return {DFS.ZeroPrimitiveShadow,
2313 ShouldTrackOrigins ? DFS.ZeroOrigin : nullptr};
2314
2315 // Use callback to load if this is not an optimizable case for origin
2316 // tracking.
2317 if (ShouldTrackOrigins &&
2318 useCallbackLoadLabelAndOrigin(Size, InstAlignment)) {
2319 IRBuilder<> IRB(Pos);
2320 CallInst *Call =
2321 IRB.CreateCall(DFS.DFSanLoadLabelAndOriginFn,
2322 {IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy()),
2323 ConstantInt::get(DFS.IntptrTy, Size)});
2324 Call->addAttribute(AttributeList::ReturnIndex, Attribute::ZExt);
2325 return {IRB.CreateTrunc(IRB.CreateLShr(Call, DFS.OriginWidthBits),
2326 DFS.PrimitiveShadowTy),
2327 IRB.CreateTrunc(Call, DFS.OriginTy)};
2328 }
2329
2330 // Other cases that support loading shadows or origins in a fast way.
2331 Value *ShadowAddr, *OriginAddr;
2332 std::tie(ShadowAddr, OriginAddr) =
2333 DFS.getShadowOriginAddress(Addr, InstAlignment, Pos);
2334
2335 const Align ShadowAlign = getShadowAlign(InstAlignment);
2336 const Align OriginAlign = getOriginAlign(InstAlignment);
2337 Value *Origin = nullptr;
2338 if (ShouldTrackOrigins) {
2339 IRBuilder<> IRB(Pos);
2340 Origin = IRB.CreateAlignedLoad(DFS.OriginTy, OriginAddr, OriginAlign);
2341 }
2342
2343 // When the byte size is small enough, we can load the shadow directly with
2344 // just a few instructions.
2345 switch (Size) {
2346 case 1: {
2347 LoadInst *LI = new LoadInst(DFS.PrimitiveShadowTy, ShadowAddr, "", Pos);
2348 LI->setAlignment(ShadowAlign);
2349 return {LI, Origin};
2350 }
2351 case 2: {
2352 IRBuilder<> IRB(Pos);
2353 Value *ShadowAddr1 = IRB.CreateGEP(DFS.PrimitiveShadowTy, ShadowAddr,
2354 ConstantInt::get(DFS.IntptrTy, 1));
2355 Value *Load =
2356 IRB.CreateAlignedLoad(DFS.PrimitiveShadowTy, ShadowAddr, ShadowAlign);
2357 Value *Load1 =
2358 IRB.CreateAlignedLoad(DFS.PrimitiveShadowTy, ShadowAddr1, ShadowAlign);
2359 return {combineShadows(Load, Load1, Pos), Origin};
2360 }
2361 }
2362 bool HasSizeForFastPath = DFS.hasLoadSizeForFastPath(Size);
2363 bool HasFastLabelsEnabled = DFS.hasFastLabelsEnabled();
2364
2365 if (HasFastLabelsEnabled && HasSizeForFastPath)
2366 return loadFast16ShadowFast(ShadowAddr, OriginAddr, Size, ShadowAlign,
2367 OriginAlign, Origin, Pos);
2368
2369 if (!AvoidNewBlocks && HasSizeForFastPath)
2370 return {loadLegacyShadowFast(ShadowAddr, Size, ShadowAlign, Pos), Origin};
2371
2372 IRBuilder<> IRB(Pos);
2373 FunctionCallee &UnionLoadFn = HasFastLabelsEnabled
2374 ? DFS.DFSanUnionLoadFastLabelsFn
2375 : DFS.DFSanUnionLoadFn;
2376 CallInst *FallbackCall = IRB.CreateCall(
2377 UnionLoadFn, {ShadowAddr, ConstantInt::get(DFS.IntptrTy, Size)});
2378 FallbackCall->addAttribute(AttributeList::ReturnIndex, Attribute::ZExt);
2379 return {FallbackCall, Origin};
2380 }
2381
loadShadowOrigin(Value * Addr,uint64_t Size,Align InstAlignment,Instruction * Pos)2382 std::pair<Value *, Value *> DFSanFunction::loadShadowOrigin(Value *Addr,
2383 uint64_t Size,
2384 Align InstAlignment,
2385 Instruction *Pos) {
2386 Value *PrimitiveShadow, *Origin;
2387 std::tie(PrimitiveShadow, Origin) =
2388 loadShadowOriginSansLoadTracking(Addr, Size, InstAlignment, Pos);
2389 if (DFS.shouldTrackOrigins()) {
2390 if (ClTrackOrigins == 2) {
2391 IRBuilder<> IRB(Pos);
2392 auto *ConstantShadow = dyn_cast<Constant>(PrimitiveShadow);
2393 if (!ConstantShadow || !ConstantShadow->isZeroValue())
2394 Origin = updateOriginIfTainted(PrimitiveShadow, Origin, IRB);
2395 }
2396 }
2397 return {PrimitiveShadow, Origin};
2398 }
2399
addAcquireOrdering(AtomicOrdering AO)2400 static AtomicOrdering addAcquireOrdering(AtomicOrdering AO) {
2401 switch (AO) {
2402 case AtomicOrdering::NotAtomic:
2403 return AtomicOrdering::NotAtomic;
2404 case AtomicOrdering::Unordered:
2405 case AtomicOrdering::Monotonic:
2406 case AtomicOrdering::Acquire:
2407 return AtomicOrdering::Acquire;
2408 case AtomicOrdering::Release:
2409 case AtomicOrdering::AcquireRelease:
2410 return AtomicOrdering::AcquireRelease;
2411 case AtomicOrdering::SequentiallyConsistent:
2412 return AtomicOrdering::SequentiallyConsistent;
2413 }
2414 llvm_unreachable("Unknown ordering");
2415 }
2416
visitLoadInst(LoadInst & LI)2417 void DFSanVisitor::visitLoadInst(LoadInst &LI) {
2418 auto &DL = LI.getModule()->getDataLayout();
2419 uint64_t Size = DL.getTypeStoreSize(LI.getType());
2420 if (Size == 0) {
2421 DFSF.setShadow(&LI, DFSF.DFS.getZeroShadow(&LI));
2422 DFSF.setOrigin(&LI, DFSF.DFS.ZeroOrigin);
2423 return;
2424 }
2425
2426 // When an application load is atomic, increase atomic ordering between
2427 // atomic application loads and stores to ensure happen-before order; load
2428 // shadow data after application data; store zero shadow data before
2429 // application data. This ensure shadow loads return either labels of the
2430 // initial application data or zeros.
2431 if (LI.isAtomic())
2432 LI.setOrdering(addAcquireOrdering(LI.getOrdering()));
2433
2434 Instruction *Pos = LI.isAtomic() ? LI.getNextNode() : &LI;
2435 std::vector<Value *> Shadows;
2436 std::vector<Value *> Origins;
2437 Value *PrimitiveShadow, *Origin;
2438 std::tie(PrimitiveShadow, Origin) =
2439 DFSF.loadShadowOrigin(LI.getPointerOperand(), Size, LI.getAlign(), Pos);
2440 const bool ShouldTrackOrigins = DFSF.DFS.shouldTrackOrigins();
2441 if (ShouldTrackOrigins) {
2442 Shadows.push_back(PrimitiveShadow);
2443 Origins.push_back(Origin);
2444 }
2445 if (ClCombinePointerLabelsOnLoad) {
2446 Value *PtrShadow = DFSF.getShadow(LI.getPointerOperand());
2447 PrimitiveShadow = DFSF.combineShadows(PrimitiveShadow, PtrShadow, Pos);
2448 if (ShouldTrackOrigins) {
2449 Shadows.push_back(PtrShadow);
2450 Origins.push_back(DFSF.getOrigin(LI.getPointerOperand()));
2451 }
2452 }
2453 if (!DFSF.DFS.isZeroShadow(PrimitiveShadow))
2454 DFSF.NonZeroChecks.push_back(PrimitiveShadow);
2455
2456 Value *Shadow =
2457 DFSF.expandFromPrimitiveShadow(LI.getType(), PrimitiveShadow, Pos);
2458 DFSF.setShadow(&LI, Shadow);
2459
2460 if (ShouldTrackOrigins) {
2461 DFSF.setOrigin(&LI, DFSF.combineOrigins(Shadows, Origins, Pos));
2462 }
2463
2464 if (ClEventCallbacks) {
2465 IRBuilder<> IRB(Pos);
2466 Value *Addr8 = IRB.CreateBitCast(LI.getPointerOperand(), DFSF.DFS.Int8Ptr);
2467 IRB.CreateCall(DFSF.DFS.DFSanLoadCallbackFn, {PrimitiveShadow, Addr8});
2468 }
2469 }
2470
updateOriginIfTainted(Value * Shadow,Value * Origin,IRBuilder<> & IRB)2471 Value *DFSanFunction::updateOriginIfTainted(Value *Shadow, Value *Origin,
2472 IRBuilder<> &IRB) {
2473 assert(DFS.shouldTrackOrigins());
2474 return IRB.CreateCall(DFS.DFSanChainOriginIfTaintedFn, {Shadow, Origin});
2475 }
2476
updateOrigin(Value * V,IRBuilder<> & IRB)2477 Value *DFSanFunction::updateOrigin(Value *V, IRBuilder<> &IRB) {
2478 if (!DFS.shouldTrackOrigins())
2479 return V;
2480 return IRB.CreateCall(DFS.DFSanChainOriginFn, V);
2481 }
2482
originToIntptr(IRBuilder<> & IRB,Value * Origin)2483 Value *DFSanFunction::originToIntptr(IRBuilder<> &IRB, Value *Origin) {
2484 const unsigned OriginSize = DataFlowSanitizer::OriginWidthBytes;
2485 const DataLayout &DL = F->getParent()->getDataLayout();
2486 unsigned IntptrSize = DL.getTypeStoreSize(DFS.IntptrTy);
2487 if (IntptrSize == OriginSize)
2488 return Origin;
2489 assert(IntptrSize == OriginSize * 2);
2490 Origin = IRB.CreateIntCast(Origin, DFS.IntptrTy, /* isSigned */ false);
2491 return IRB.CreateOr(Origin, IRB.CreateShl(Origin, OriginSize * 8));
2492 }
2493
paintOrigin(IRBuilder<> & IRB,Value * Origin,Value * StoreOriginAddr,uint64_t StoreOriginSize,Align Alignment)2494 void DFSanFunction::paintOrigin(IRBuilder<> &IRB, Value *Origin,
2495 Value *StoreOriginAddr,
2496 uint64_t StoreOriginSize, Align Alignment) {
2497 const unsigned OriginSize = DataFlowSanitizer::OriginWidthBytes;
2498 const DataLayout &DL = F->getParent()->getDataLayout();
2499 const Align IntptrAlignment = DL.getABITypeAlign(DFS.IntptrTy);
2500 unsigned IntptrSize = DL.getTypeStoreSize(DFS.IntptrTy);
2501 assert(IntptrAlignment >= MinOriginAlignment);
2502 assert(IntptrSize >= OriginSize);
2503
2504 unsigned Ofs = 0;
2505 Align CurrentAlignment = Alignment;
2506 if (Alignment >= IntptrAlignment && IntptrSize > OriginSize) {
2507 Value *IntptrOrigin = originToIntptr(IRB, Origin);
2508 Value *IntptrStoreOriginPtr = IRB.CreatePointerCast(
2509 StoreOriginAddr, PointerType::get(DFS.IntptrTy, 0));
2510 for (unsigned I = 0; I < StoreOriginSize / IntptrSize; ++I) {
2511 Value *Ptr =
2512 I ? IRB.CreateConstGEP1_32(DFS.IntptrTy, IntptrStoreOriginPtr, I)
2513 : IntptrStoreOriginPtr;
2514 IRB.CreateAlignedStore(IntptrOrigin, Ptr, CurrentAlignment);
2515 Ofs += IntptrSize / OriginSize;
2516 CurrentAlignment = IntptrAlignment;
2517 }
2518 }
2519
2520 for (unsigned I = Ofs; I < (StoreOriginSize + OriginSize - 1) / OriginSize;
2521 ++I) {
2522 Value *GEP = I ? IRB.CreateConstGEP1_32(DFS.OriginTy, StoreOriginAddr, I)
2523 : StoreOriginAddr;
2524 IRB.CreateAlignedStore(Origin, GEP, CurrentAlignment);
2525 CurrentAlignment = MinOriginAlignment;
2526 }
2527 }
2528
convertToBool(Value * V,IRBuilder<> & IRB,const Twine & Name)2529 Value *DFSanFunction::convertToBool(Value *V, IRBuilder<> &IRB,
2530 const Twine &Name) {
2531 Type *VTy = V->getType();
2532 assert(VTy->isIntegerTy());
2533 if (VTy->getIntegerBitWidth() == 1)
2534 // Just converting a bool to a bool, so do nothing.
2535 return V;
2536 return IRB.CreateICmpNE(V, ConstantInt::get(VTy, 0), Name);
2537 }
2538
storeOrigin(Instruction * Pos,Value * Addr,uint64_t Size,Value * Shadow,Value * Origin,Value * StoreOriginAddr,Align InstAlignment)2539 void DFSanFunction::storeOrigin(Instruction *Pos, Value *Addr, uint64_t Size,
2540 Value *Shadow, Value *Origin,
2541 Value *StoreOriginAddr, Align InstAlignment) {
2542 // Do not write origins for zero shadows because we do not trace origins for
2543 // untainted sinks.
2544 const Align OriginAlignment = getOriginAlign(InstAlignment);
2545 Value *CollapsedShadow = collapseToPrimitiveShadow(Shadow, Pos);
2546 IRBuilder<> IRB(Pos);
2547 if (auto *ConstantShadow = dyn_cast<Constant>(CollapsedShadow)) {
2548 if (!ConstantShadow->isZeroValue())
2549 paintOrigin(IRB, updateOrigin(Origin, IRB), StoreOriginAddr, Size,
2550 OriginAlignment);
2551 return;
2552 }
2553
2554 if (shouldInstrumentWithCall()) {
2555 IRB.CreateCall(DFS.DFSanMaybeStoreOriginFn,
2556 {CollapsedShadow,
2557 IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy()),
2558 ConstantInt::get(DFS.IntptrTy, Size), Origin});
2559 } else {
2560 Value *Cmp = convertToBool(CollapsedShadow, IRB, "_dfscmp");
2561 Instruction *CheckTerm = SplitBlockAndInsertIfThen(
2562 Cmp, &*IRB.GetInsertPoint(), false, DFS.OriginStoreWeights, &DT);
2563 IRBuilder<> IRBNew(CheckTerm);
2564 paintOrigin(IRBNew, updateOrigin(Origin, IRBNew), StoreOriginAddr, Size,
2565 OriginAlignment);
2566 ++NumOriginStores;
2567 }
2568 }
2569
storeZeroPrimitiveShadow(Value * Addr,uint64_t Size,Align ShadowAlign,Instruction * Pos)2570 void DFSanFunction::storeZeroPrimitiveShadow(Value *Addr, uint64_t Size,
2571 Align ShadowAlign,
2572 Instruction *Pos) {
2573 IRBuilder<> IRB(Pos);
2574 IntegerType *ShadowTy =
2575 IntegerType::get(*DFS.Ctx, Size * DFS.ShadowWidthBits);
2576 Value *ExtZeroShadow = ConstantInt::get(ShadowTy, 0);
2577 Value *ShadowAddr = DFS.getShadowAddress(Addr, Pos);
2578 Value *ExtShadowAddr =
2579 IRB.CreateBitCast(ShadowAddr, PointerType::getUnqual(ShadowTy));
2580 IRB.CreateAlignedStore(ExtZeroShadow, ExtShadowAddr, ShadowAlign);
2581 // Do not write origins for 0 shadows because we do not trace origins for
2582 // untainted sinks.
2583 }
2584
storePrimitiveShadowOrigin(Value * Addr,uint64_t Size,Align InstAlignment,Value * PrimitiveShadow,Value * Origin,Instruction * Pos)2585 void DFSanFunction::storePrimitiveShadowOrigin(Value *Addr, uint64_t Size,
2586 Align InstAlignment,
2587 Value *PrimitiveShadow,
2588 Value *Origin,
2589 Instruction *Pos) {
2590 const bool ShouldTrackOrigins = DFS.shouldTrackOrigins() && Origin;
2591
2592 if (AllocaInst *AI = dyn_cast<AllocaInst>(Addr)) {
2593 const auto SI = AllocaShadowMap.find(AI);
2594 if (SI != AllocaShadowMap.end()) {
2595 IRBuilder<> IRB(Pos);
2596 IRB.CreateStore(PrimitiveShadow, SI->second);
2597
2598 // Do not write origins for 0 shadows because we do not trace origins for
2599 // untainted sinks.
2600 if (ShouldTrackOrigins && !DFS.isZeroShadow(PrimitiveShadow)) {
2601 const auto OI = AllocaOriginMap.find(AI);
2602 assert(OI != AllocaOriginMap.end() && Origin);
2603 IRB.CreateStore(Origin, OI->second);
2604 }
2605 return;
2606 }
2607 }
2608
2609 const Align ShadowAlign = getShadowAlign(InstAlignment);
2610 if (DFS.isZeroShadow(PrimitiveShadow)) {
2611 storeZeroPrimitiveShadow(Addr, Size, ShadowAlign, Pos);
2612 return;
2613 }
2614
2615 IRBuilder<> IRB(Pos);
2616 Value *ShadowAddr, *OriginAddr;
2617 std::tie(ShadowAddr, OriginAddr) =
2618 DFS.getShadowOriginAddress(Addr, InstAlignment, Pos);
2619
2620 const unsigned ShadowVecSize = 8;
2621 assert(ShadowVecSize * DFS.ShadowWidthBits <= 128 &&
2622 "Shadow vector is too large!");
2623
2624 uint64_t Offset = 0;
2625 uint64_t LeftSize = Size;
2626 if (LeftSize >= ShadowVecSize) {
2627 auto *ShadowVecTy =
2628 FixedVectorType::get(DFS.PrimitiveShadowTy, ShadowVecSize);
2629 Value *ShadowVec = UndefValue::get(ShadowVecTy);
2630 for (unsigned I = 0; I != ShadowVecSize; ++I) {
2631 ShadowVec = IRB.CreateInsertElement(
2632 ShadowVec, PrimitiveShadow,
2633 ConstantInt::get(Type::getInt32Ty(*DFS.Ctx), I));
2634 }
2635 Value *ShadowVecAddr =
2636 IRB.CreateBitCast(ShadowAddr, PointerType::getUnqual(ShadowVecTy));
2637 do {
2638 Value *CurShadowVecAddr =
2639 IRB.CreateConstGEP1_32(ShadowVecTy, ShadowVecAddr, Offset);
2640 IRB.CreateAlignedStore(ShadowVec, CurShadowVecAddr, ShadowAlign);
2641 LeftSize -= ShadowVecSize;
2642 ++Offset;
2643 } while (LeftSize >= ShadowVecSize);
2644 Offset *= ShadowVecSize;
2645 }
2646 while (LeftSize > 0) {
2647 Value *CurShadowAddr =
2648 IRB.CreateConstGEP1_32(DFS.PrimitiveShadowTy, ShadowAddr, Offset);
2649 IRB.CreateAlignedStore(PrimitiveShadow, CurShadowAddr, ShadowAlign);
2650 --LeftSize;
2651 ++Offset;
2652 }
2653
2654 if (ShouldTrackOrigins) {
2655 storeOrigin(Pos, Addr, Size, PrimitiveShadow, Origin, OriginAddr,
2656 InstAlignment);
2657 }
2658 }
2659
addReleaseOrdering(AtomicOrdering AO)2660 static AtomicOrdering addReleaseOrdering(AtomicOrdering AO) {
2661 switch (AO) {
2662 case AtomicOrdering::NotAtomic:
2663 return AtomicOrdering::NotAtomic;
2664 case AtomicOrdering::Unordered:
2665 case AtomicOrdering::Monotonic:
2666 case AtomicOrdering::Release:
2667 return AtomicOrdering::Release;
2668 case AtomicOrdering::Acquire:
2669 case AtomicOrdering::AcquireRelease:
2670 return AtomicOrdering::AcquireRelease;
2671 case AtomicOrdering::SequentiallyConsistent:
2672 return AtomicOrdering::SequentiallyConsistent;
2673 }
2674 llvm_unreachable("Unknown ordering");
2675 }
2676
visitStoreInst(StoreInst & SI)2677 void DFSanVisitor::visitStoreInst(StoreInst &SI) {
2678 auto &DL = SI.getModule()->getDataLayout();
2679 Value *Val = SI.getValueOperand();
2680 uint64_t Size = DL.getTypeStoreSize(Val->getType());
2681 if (Size == 0)
2682 return;
2683
2684 // When an application store is atomic, increase atomic ordering between
2685 // atomic application loads and stores to ensure happen-before order; load
2686 // shadow data after application data; store zero shadow data before
2687 // application data. This ensure shadow loads return either labels of the
2688 // initial application data or zeros.
2689 if (SI.isAtomic())
2690 SI.setOrdering(addReleaseOrdering(SI.getOrdering()));
2691
2692 const bool ShouldTrackOrigins =
2693 DFSF.DFS.shouldTrackOrigins() && !SI.isAtomic();
2694 std::vector<Value *> Shadows;
2695 std::vector<Value *> Origins;
2696
2697 Value *Shadow =
2698 SI.isAtomic() ? DFSF.DFS.getZeroShadow(Val) : DFSF.getShadow(Val);
2699
2700 if (ShouldTrackOrigins) {
2701 Shadows.push_back(Shadow);
2702 Origins.push_back(DFSF.getOrigin(Val));
2703 }
2704
2705 Value *PrimitiveShadow;
2706 if (ClCombinePointerLabelsOnStore) {
2707 Value *PtrShadow = DFSF.getShadow(SI.getPointerOperand());
2708 if (ShouldTrackOrigins) {
2709 Shadows.push_back(PtrShadow);
2710 Origins.push_back(DFSF.getOrigin(SI.getPointerOperand()));
2711 }
2712 PrimitiveShadow = DFSF.combineShadows(Shadow, PtrShadow, &SI);
2713 } else {
2714 PrimitiveShadow = DFSF.collapseToPrimitiveShadow(Shadow, &SI);
2715 }
2716 Value *Origin = nullptr;
2717 if (ShouldTrackOrigins)
2718 Origin = DFSF.combineOrigins(Shadows, Origins, &SI);
2719 DFSF.storePrimitiveShadowOrigin(SI.getPointerOperand(), Size, SI.getAlign(),
2720 PrimitiveShadow, Origin, &SI);
2721 if (ClEventCallbacks) {
2722 IRBuilder<> IRB(&SI);
2723 Value *Addr8 = IRB.CreateBitCast(SI.getPointerOperand(), DFSF.DFS.Int8Ptr);
2724 IRB.CreateCall(DFSF.DFS.DFSanStoreCallbackFn, {PrimitiveShadow, Addr8});
2725 }
2726 }
2727
visitCASOrRMW(Align InstAlignment,Instruction & I)2728 void DFSanVisitor::visitCASOrRMW(Align InstAlignment, Instruction &I) {
2729 assert(isa<AtomicRMWInst>(I) || isa<AtomicCmpXchgInst>(I));
2730
2731 Value *Val = I.getOperand(1);
2732 const auto &DL = I.getModule()->getDataLayout();
2733 uint64_t Size = DL.getTypeStoreSize(Val->getType());
2734 if (Size == 0)
2735 return;
2736
2737 // Conservatively set data at stored addresses and return with zero shadow to
2738 // prevent shadow data races.
2739 IRBuilder<> IRB(&I);
2740 Value *Addr = I.getOperand(0);
2741 const Align ShadowAlign = DFSF.getShadowAlign(InstAlignment);
2742 DFSF.storeZeroPrimitiveShadow(Addr, Size, ShadowAlign, &I);
2743 DFSF.setShadow(&I, DFSF.DFS.getZeroShadow(&I));
2744 DFSF.setOrigin(&I, DFSF.DFS.ZeroOrigin);
2745 }
2746
visitAtomicRMWInst(AtomicRMWInst & I)2747 void DFSanVisitor::visitAtomicRMWInst(AtomicRMWInst &I) {
2748 visitCASOrRMW(I.getAlign(), I);
2749 // TODO: The ordering change follows MSan. It is possible not to change
2750 // ordering because we always set and use 0 shadows.
2751 I.setOrdering(addReleaseOrdering(I.getOrdering()));
2752 }
2753
visitAtomicCmpXchgInst(AtomicCmpXchgInst & I)2754 void DFSanVisitor::visitAtomicCmpXchgInst(AtomicCmpXchgInst &I) {
2755 visitCASOrRMW(I.getAlign(), I);
2756 // TODO: The ordering change follows MSan. It is possible not to change
2757 // ordering because we always set and use 0 shadows.
2758 I.setSuccessOrdering(addReleaseOrdering(I.getSuccessOrdering()));
2759 }
2760
visitUnaryOperator(UnaryOperator & UO)2761 void DFSanVisitor::visitUnaryOperator(UnaryOperator &UO) {
2762 visitInstOperands(UO);
2763 }
2764
visitBinaryOperator(BinaryOperator & BO)2765 void DFSanVisitor::visitBinaryOperator(BinaryOperator &BO) {
2766 visitInstOperands(BO);
2767 }
2768
visitCastInst(CastInst & CI)2769 void DFSanVisitor::visitCastInst(CastInst &CI) { visitInstOperands(CI); }
2770
visitCmpInst(CmpInst & CI)2771 void DFSanVisitor::visitCmpInst(CmpInst &CI) {
2772 visitInstOperands(CI);
2773 if (ClEventCallbacks) {
2774 IRBuilder<> IRB(&CI);
2775 Value *CombinedShadow = DFSF.getShadow(&CI);
2776 IRB.CreateCall(DFSF.DFS.DFSanCmpCallbackFn, CombinedShadow);
2777 }
2778 }
2779
visitGetElementPtrInst(GetElementPtrInst & GEPI)2780 void DFSanVisitor::visitGetElementPtrInst(GetElementPtrInst &GEPI) {
2781 visitInstOperands(GEPI);
2782 }
2783
visitExtractElementInst(ExtractElementInst & I)2784 void DFSanVisitor::visitExtractElementInst(ExtractElementInst &I) {
2785 visitInstOperands(I);
2786 }
2787
visitInsertElementInst(InsertElementInst & I)2788 void DFSanVisitor::visitInsertElementInst(InsertElementInst &I) {
2789 visitInstOperands(I);
2790 }
2791
visitShuffleVectorInst(ShuffleVectorInst & I)2792 void DFSanVisitor::visitShuffleVectorInst(ShuffleVectorInst &I) {
2793 visitInstOperands(I);
2794 }
2795
visitExtractValueInst(ExtractValueInst & I)2796 void DFSanVisitor::visitExtractValueInst(ExtractValueInst &I) {
2797 if (!DFSF.DFS.shouldTrackFieldsAndIndices()) {
2798 visitInstOperands(I);
2799 return;
2800 }
2801
2802 IRBuilder<> IRB(&I);
2803 Value *Agg = I.getAggregateOperand();
2804 Value *AggShadow = DFSF.getShadow(Agg);
2805 Value *ResShadow = IRB.CreateExtractValue(AggShadow, I.getIndices());
2806 DFSF.setShadow(&I, ResShadow);
2807 visitInstOperandOrigins(I);
2808 }
2809
visitInsertValueInst(InsertValueInst & I)2810 void DFSanVisitor::visitInsertValueInst(InsertValueInst &I) {
2811 if (!DFSF.DFS.shouldTrackFieldsAndIndices()) {
2812 visitInstOperands(I);
2813 return;
2814 }
2815
2816 IRBuilder<> IRB(&I);
2817 Value *AggShadow = DFSF.getShadow(I.getAggregateOperand());
2818 Value *InsShadow = DFSF.getShadow(I.getInsertedValueOperand());
2819 Value *Res = IRB.CreateInsertValue(AggShadow, InsShadow, I.getIndices());
2820 DFSF.setShadow(&I, Res);
2821 visitInstOperandOrigins(I);
2822 }
2823
visitAllocaInst(AllocaInst & I)2824 void DFSanVisitor::visitAllocaInst(AllocaInst &I) {
2825 bool AllLoadsStores = true;
2826 for (User *U : I.users()) {
2827 if (isa<LoadInst>(U))
2828 continue;
2829
2830 if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
2831 if (SI->getPointerOperand() == &I)
2832 continue;
2833 }
2834
2835 AllLoadsStores = false;
2836 break;
2837 }
2838 if (AllLoadsStores) {
2839 IRBuilder<> IRB(&I);
2840 DFSF.AllocaShadowMap[&I] = IRB.CreateAlloca(DFSF.DFS.PrimitiveShadowTy);
2841 if (DFSF.DFS.shouldTrackOrigins()) {
2842 DFSF.AllocaOriginMap[&I] =
2843 IRB.CreateAlloca(DFSF.DFS.OriginTy, nullptr, "_dfsa");
2844 }
2845 }
2846 DFSF.setShadow(&I, DFSF.DFS.ZeroPrimitiveShadow);
2847 DFSF.setOrigin(&I, DFSF.DFS.ZeroOrigin);
2848 }
2849
visitSelectInst(SelectInst & I)2850 void DFSanVisitor::visitSelectInst(SelectInst &I) {
2851 Value *CondShadow = DFSF.getShadow(I.getCondition());
2852 Value *TrueShadow = DFSF.getShadow(I.getTrueValue());
2853 Value *FalseShadow = DFSF.getShadow(I.getFalseValue());
2854 Value *ShadowSel = nullptr;
2855 const bool ShouldTrackOrigins = DFSF.DFS.shouldTrackOrigins();
2856 std::vector<Value *> Shadows;
2857 std::vector<Value *> Origins;
2858 Value *TrueOrigin =
2859 ShouldTrackOrigins ? DFSF.getOrigin(I.getTrueValue()) : nullptr;
2860 Value *FalseOrigin =
2861 ShouldTrackOrigins ? DFSF.getOrigin(I.getFalseValue()) : nullptr;
2862
2863 if (isa<VectorType>(I.getCondition()->getType())) {
2864 ShadowSel = DFSF.combineShadowsThenConvert(I.getType(), TrueShadow,
2865 FalseShadow, &I);
2866 if (ShouldTrackOrigins) {
2867 Shadows.push_back(TrueShadow);
2868 Shadows.push_back(FalseShadow);
2869 Origins.push_back(TrueOrigin);
2870 Origins.push_back(FalseOrigin);
2871 }
2872 } else {
2873 if (TrueShadow == FalseShadow) {
2874 ShadowSel = TrueShadow;
2875 if (ShouldTrackOrigins) {
2876 Shadows.push_back(TrueShadow);
2877 Origins.push_back(TrueOrigin);
2878 }
2879 } else {
2880 ShadowSel =
2881 SelectInst::Create(I.getCondition(), TrueShadow, FalseShadow, "", &I);
2882 if (ShouldTrackOrigins) {
2883 Shadows.push_back(ShadowSel);
2884 Origins.push_back(SelectInst::Create(I.getCondition(), TrueOrigin,
2885 FalseOrigin, "", &I));
2886 }
2887 }
2888 }
2889 DFSF.setShadow(&I, ClTrackSelectControlFlow
2890 ? DFSF.combineShadowsThenConvert(
2891 I.getType(), CondShadow, ShadowSel, &I)
2892 : ShadowSel);
2893 if (ShouldTrackOrigins) {
2894 if (ClTrackSelectControlFlow) {
2895 Shadows.push_back(CondShadow);
2896 Origins.push_back(DFSF.getOrigin(I.getCondition()));
2897 }
2898 DFSF.setOrigin(&I, DFSF.combineOrigins(Shadows, Origins, &I));
2899 }
2900 }
2901
visitMemSetInst(MemSetInst & I)2902 void DFSanVisitor::visitMemSetInst(MemSetInst &I) {
2903 IRBuilder<> IRB(&I);
2904 Value *ValShadow = DFSF.getShadow(I.getValue());
2905 Value *ValOrigin = DFSF.DFS.shouldTrackOrigins()
2906 ? DFSF.getOrigin(I.getValue())
2907 : DFSF.DFS.ZeroOrigin;
2908 IRB.CreateCall(
2909 DFSF.DFS.DFSanSetLabelFn,
2910 {ValShadow, ValOrigin,
2911 IRB.CreateBitCast(I.getDest(), Type::getInt8PtrTy(*DFSF.DFS.Ctx)),
2912 IRB.CreateZExtOrTrunc(I.getLength(), DFSF.DFS.IntptrTy)});
2913 }
2914
visitMemTransferInst(MemTransferInst & I)2915 void DFSanVisitor::visitMemTransferInst(MemTransferInst &I) {
2916 IRBuilder<> IRB(&I);
2917
2918 // CopyOrMoveOrigin transfers origins by refering to their shadows. So we
2919 // need to move origins before moving shadows.
2920 if (DFSF.DFS.shouldTrackOrigins()) {
2921 IRB.CreateCall(
2922 DFSF.DFS.DFSanMemOriginTransferFn,
2923 {IRB.CreatePointerCast(I.getArgOperand(0), IRB.getInt8PtrTy()),
2924 IRB.CreatePointerCast(I.getArgOperand(1), IRB.getInt8PtrTy()),
2925 IRB.CreateIntCast(I.getArgOperand(2), DFSF.DFS.IntptrTy, false)});
2926 }
2927
2928 Value *RawDestShadow = DFSF.DFS.getShadowAddress(I.getDest(), &I);
2929 Value *SrcShadow = DFSF.DFS.getShadowAddress(I.getSource(), &I);
2930 Value *LenShadow =
2931 IRB.CreateMul(I.getLength(), ConstantInt::get(I.getLength()->getType(),
2932 DFSF.DFS.ShadowWidthBytes));
2933 Type *Int8Ptr = Type::getInt8PtrTy(*DFSF.DFS.Ctx);
2934 Value *DestShadow = IRB.CreateBitCast(RawDestShadow, Int8Ptr);
2935 SrcShadow = IRB.CreateBitCast(SrcShadow, Int8Ptr);
2936 auto *MTI = cast<MemTransferInst>(
2937 IRB.CreateCall(I.getFunctionType(), I.getCalledOperand(),
2938 {DestShadow, SrcShadow, LenShadow, I.getVolatileCst()}));
2939 if (ClPreserveAlignment) {
2940 MTI->setDestAlignment(I.getDestAlign() * DFSF.DFS.ShadowWidthBytes);
2941 MTI->setSourceAlignment(I.getSourceAlign() * DFSF.DFS.ShadowWidthBytes);
2942 } else {
2943 MTI->setDestAlignment(Align(DFSF.DFS.ShadowWidthBytes));
2944 MTI->setSourceAlignment(Align(DFSF.DFS.ShadowWidthBytes));
2945 }
2946 if (ClEventCallbacks) {
2947 IRB.CreateCall(DFSF.DFS.DFSanMemTransferCallbackFn,
2948 {RawDestShadow,
2949 IRB.CreateZExtOrTrunc(I.getLength(), DFSF.DFS.IntptrTy)});
2950 }
2951 }
2952
visitReturnInst(ReturnInst & RI)2953 void DFSanVisitor::visitReturnInst(ReturnInst &RI) {
2954 if (!DFSF.IsNativeABI && RI.getReturnValue()) {
2955 switch (DFSF.IA) {
2956 case DataFlowSanitizer::IA_TLS: {
2957 Value *S = DFSF.getShadow(RI.getReturnValue());
2958 IRBuilder<> IRB(&RI);
2959 Type *RT = DFSF.F->getFunctionType()->getReturnType();
2960 unsigned Size =
2961 getDataLayout().getTypeAllocSize(DFSF.DFS.getShadowTy(RT));
2962 if (Size <= RetvalTLSSize) {
2963 // If the size overflows, stores nothing. At callsite, oversized return
2964 // shadows are set to zero.
2965 IRB.CreateAlignedStore(S, DFSF.getRetvalTLS(RT, IRB),
2966 ShadowTLSAlignment);
2967 }
2968 if (DFSF.DFS.shouldTrackOrigins()) {
2969 Value *O = DFSF.getOrigin(RI.getReturnValue());
2970 IRB.CreateStore(O, DFSF.getRetvalOriginTLS());
2971 }
2972 break;
2973 }
2974 case DataFlowSanitizer::IA_Args: {
2975 IRBuilder<> IRB(&RI);
2976 Type *RT = DFSF.F->getFunctionType()->getReturnType();
2977 Value *InsVal =
2978 IRB.CreateInsertValue(UndefValue::get(RT), RI.getReturnValue(), 0);
2979 Value *InsShadow =
2980 IRB.CreateInsertValue(InsVal, DFSF.getShadow(RI.getReturnValue()), 1);
2981 RI.setOperand(0, InsShadow);
2982 break;
2983 }
2984 }
2985 }
2986 }
2987
addShadowArguments(Function & F,CallBase & CB,std::vector<Value * > & Args,IRBuilder<> & IRB)2988 void DFSanVisitor::addShadowArguments(Function &F, CallBase &CB,
2989 std::vector<Value *> &Args,
2990 IRBuilder<> &IRB) {
2991 FunctionType *FT = F.getFunctionType();
2992
2993 auto *I = CB.arg_begin();
2994
2995 // Adds non-variable argument shadows.
2996 for (unsigned N = FT->getNumParams(); N != 0; ++I, --N)
2997 Args.push_back(DFSF.collapseToPrimitiveShadow(DFSF.getShadow(*I), &CB));
2998
2999 // Adds variable argument shadows.
3000 if (FT->isVarArg()) {
3001 auto *LabelVATy = ArrayType::get(DFSF.DFS.PrimitiveShadowTy,
3002 CB.arg_size() - FT->getNumParams());
3003 auto *LabelVAAlloca =
3004 new AllocaInst(LabelVATy, getDataLayout().getAllocaAddrSpace(),
3005 "labelva", &DFSF.F->getEntryBlock().front());
3006
3007 for (unsigned N = 0; I != CB.arg_end(); ++I, ++N) {
3008 auto *LabelVAPtr = IRB.CreateStructGEP(LabelVATy, LabelVAAlloca, N);
3009 IRB.CreateStore(DFSF.collapseToPrimitiveShadow(DFSF.getShadow(*I), &CB),
3010 LabelVAPtr);
3011 }
3012
3013 Args.push_back(IRB.CreateStructGEP(LabelVATy, LabelVAAlloca, 0));
3014 }
3015
3016 // Adds the return value shadow.
3017 if (!FT->getReturnType()->isVoidTy()) {
3018 if (!DFSF.LabelReturnAlloca) {
3019 DFSF.LabelReturnAlloca = new AllocaInst(
3020 DFSF.DFS.PrimitiveShadowTy, getDataLayout().getAllocaAddrSpace(),
3021 "labelreturn", &DFSF.F->getEntryBlock().front());
3022 }
3023 Args.push_back(DFSF.LabelReturnAlloca);
3024 }
3025 }
3026
addOriginArguments(Function & F,CallBase & CB,std::vector<Value * > & Args,IRBuilder<> & IRB)3027 void DFSanVisitor::addOriginArguments(Function &F, CallBase &CB,
3028 std::vector<Value *> &Args,
3029 IRBuilder<> &IRB) {
3030 FunctionType *FT = F.getFunctionType();
3031
3032 auto *I = CB.arg_begin();
3033
3034 // Add non-variable argument origins.
3035 for (unsigned N = FT->getNumParams(); N != 0; ++I, --N)
3036 Args.push_back(DFSF.getOrigin(*I));
3037
3038 // Add variable argument origins.
3039 if (FT->isVarArg()) {
3040 auto *OriginVATy =
3041 ArrayType::get(DFSF.DFS.OriginTy, CB.arg_size() - FT->getNumParams());
3042 auto *OriginVAAlloca =
3043 new AllocaInst(OriginVATy, getDataLayout().getAllocaAddrSpace(),
3044 "originva", &DFSF.F->getEntryBlock().front());
3045
3046 for (unsigned N = 0; I != CB.arg_end(); ++I, ++N) {
3047 auto *OriginVAPtr = IRB.CreateStructGEP(OriginVATy, OriginVAAlloca, N);
3048 IRB.CreateStore(DFSF.getOrigin(*I), OriginVAPtr);
3049 }
3050
3051 Args.push_back(IRB.CreateStructGEP(OriginVATy, OriginVAAlloca, 0));
3052 }
3053
3054 // Add the return value origin.
3055 if (!FT->getReturnType()->isVoidTy()) {
3056 if (!DFSF.OriginReturnAlloca) {
3057 DFSF.OriginReturnAlloca = new AllocaInst(
3058 DFSF.DFS.OriginTy, getDataLayout().getAllocaAddrSpace(),
3059 "originreturn", &DFSF.F->getEntryBlock().front());
3060 }
3061 Args.push_back(DFSF.OriginReturnAlloca);
3062 }
3063 }
3064
visitWrappedCallBase(Function & F,CallBase & CB)3065 bool DFSanVisitor::visitWrappedCallBase(Function &F, CallBase &CB) {
3066 IRBuilder<> IRB(&CB);
3067 switch (DFSF.DFS.getWrapperKind(&F)) {
3068 case DataFlowSanitizer::WK_Warning:
3069 CB.setCalledFunction(&F);
3070 IRB.CreateCall(DFSF.DFS.DFSanUnimplementedFn,
3071 IRB.CreateGlobalStringPtr(F.getName()));
3072 DFSF.setShadow(&CB, DFSF.DFS.getZeroShadow(&CB));
3073 DFSF.setOrigin(&CB, DFSF.DFS.ZeroOrigin);
3074 return true;
3075 case DataFlowSanitizer::WK_Discard:
3076 CB.setCalledFunction(&F);
3077 DFSF.setShadow(&CB, DFSF.DFS.getZeroShadow(&CB));
3078 DFSF.setOrigin(&CB, DFSF.DFS.ZeroOrigin);
3079 return true;
3080 case DataFlowSanitizer::WK_Functional:
3081 CB.setCalledFunction(&F);
3082 visitInstOperands(CB);
3083 return true;
3084 case DataFlowSanitizer::WK_Custom:
3085 // Don't try to handle invokes of custom functions, it's too complicated.
3086 // Instead, invoke the dfsw$ wrapper, which will in turn call the __dfsw_
3087 // wrapper.
3088 CallInst *CI = dyn_cast<CallInst>(&CB);
3089 if (!CI)
3090 return false;
3091
3092 const bool ShouldTrackOrigins = DFSF.DFS.shouldTrackOrigins();
3093 FunctionType *FT = F.getFunctionType();
3094 TransformedFunction CustomFn = DFSF.DFS.getCustomFunctionType(FT);
3095 std::string CustomFName = ShouldTrackOrigins ? "__dfso_" : "__dfsw_";
3096 CustomFName += F.getName();
3097 FunctionCallee CustomF = DFSF.DFS.Mod->getOrInsertFunction(
3098 CustomFName, CustomFn.TransformedType);
3099 if (Function *CustomFn = dyn_cast<Function>(CustomF.getCallee())) {
3100 CustomFn->copyAttributesFrom(&F);
3101
3102 // Custom functions returning non-void will write to the return label.
3103 if (!FT->getReturnType()->isVoidTy()) {
3104 CustomFn->removeAttributes(AttributeList::FunctionIndex,
3105 DFSF.DFS.ReadOnlyNoneAttrs);
3106 }
3107 }
3108
3109 std::vector<Value *> Args;
3110
3111 // Adds non-variable arguments.
3112 auto *I = CB.arg_begin();
3113 for (unsigned N = FT->getNumParams(); N != 0; ++I, --N) {
3114 Type *T = (*I)->getType();
3115 FunctionType *ParamFT;
3116 if (isa<PointerType>(T) &&
3117 (ParamFT = dyn_cast<FunctionType>(T->getPointerElementType()))) {
3118 std::string TName = "dfst";
3119 TName += utostr(FT->getNumParams() - N);
3120 TName += "$";
3121 TName += F.getName();
3122 Constant *T = DFSF.DFS.getOrBuildTrampolineFunction(ParamFT, TName);
3123 Args.push_back(T);
3124 Args.push_back(
3125 IRB.CreateBitCast(*I, Type::getInt8PtrTy(*DFSF.DFS.Ctx)));
3126 } else {
3127 Args.push_back(*I);
3128 }
3129 }
3130
3131 // Adds shadow arguments.
3132 const unsigned ShadowArgStart = Args.size();
3133 addShadowArguments(F, CB, Args, IRB);
3134
3135 // Adds origin arguments.
3136 const unsigned OriginArgStart = Args.size();
3137 if (ShouldTrackOrigins)
3138 addOriginArguments(F, CB, Args, IRB);
3139
3140 // Adds variable arguments.
3141 append_range(Args, drop_begin(CB.args(), FT->getNumParams()));
3142
3143 CallInst *CustomCI = IRB.CreateCall(CustomF, Args);
3144 CustomCI->setCallingConv(CI->getCallingConv());
3145 CustomCI->setAttributes(transformFunctionAttributes(
3146 CustomFn, CI->getContext(), CI->getAttributes()));
3147
3148 // Update the parameter attributes of the custom call instruction to
3149 // zero extend the shadow parameters. This is required for targets
3150 // which consider PrimitiveShadowTy an illegal type.
3151 for (unsigned N = 0; N < FT->getNumParams(); N++) {
3152 const unsigned ArgNo = ShadowArgStart + N;
3153 if (CustomCI->getArgOperand(ArgNo)->getType() ==
3154 DFSF.DFS.PrimitiveShadowTy)
3155 CustomCI->addParamAttr(ArgNo, Attribute::ZExt);
3156 if (ShouldTrackOrigins) {
3157 const unsigned OriginArgNo = OriginArgStart + N;
3158 if (CustomCI->getArgOperand(OriginArgNo)->getType() ==
3159 DFSF.DFS.OriginTy)
3160 CustomCI->addParamAttr(OriginArgNo, Attribute::ZExt);
3161 }
3162 }
3163
3164 // Loads the return value shadow and origin.
3165 if (!FT->getReturnType()->isVoidTy()) {
3166 LoadInst *LabelLoad =
3167 IRB.CreateLoad(DFSF.DFS.PrimitiveShadowTy, DFSF.LabelReturnAlloca);
3168 DFSF.setShadow(CustomCI, DFSF.expandFromPrimitiveShadow(
3169 FT->getReturnType(), LabelLoad, &CB));
3170 if (ShouldTrackOrigins) {
3171 LoadInst *OriginLoad =
3172 IRB.CreateLoad(DFSF.DFS.OriginTy, DFSF.OriginReturnAlloca);
3173 DFSF.setOrigin(CustomCI, OriginLoad);
3174 }
3175 }
3176
3177 CI->replaceAllUsesWith(CustomCI);
3178 CI->eraseFromParent();
3179 return true;
3180 }
3181 return false;
3182 }
3183
visitCallBase(CallBase & CB)3184 void DFSanVisitor::visitCallBase(CallBase &CB) {
3185 Function *F = CB.getCalledFunction();
3186 if ((F && F->isIntrinsic()) || CB.isInlineAsm()) {
3187 visitInstOperands(CB);
3188 return;
3189 }
3190
3191 // Calls to this function are synthesized in wrappers, and we shouldn't
3192 // instrument them.
3193 if (F == DFSF.DFS.DFSanVarargWrapperFn.getCallee()->stripPointerCasts())
3194 return;
3195
3196 DenseMap<Value *, Function *>::iterator UnwrappedFnIt =
3197 DFSF.DFS.UnwrappedFnMap.find(CB.getCalledOperand());
3198 if (UnwrappedFnIt != DFSF.DFS.UnwrappedFnMap.end())
3199 if (visitWrappedCallBase(*UnwrappedFnIt->second, CB))
3200 return;
3201
3202 IRBuilder<> IRB(&CB);
3203
3204 const bool ShouldTrackOrigins = DFSF.DFS.shouldTrackOrigins();
3205 FunctionType *FT = CB.getFunctionType();
3206 if (DFSF.DFS.getInstrumentedABI() == DataFlowSanitizer::IA_TLS) {
3207 // Stores argument shadows.
3208 unsigned ArgOffset = 0;
3209 const DataLayout &DL = getDataLayout();
3210 for (unsigned I = 0, N = FT->getNumParams(); I != N; ++I) {
3211 if (ShouldTrackOrigins) {
3212 // Ignore overflowed origins
3213 Value *ArgShadow = DFSF.getShadow(CB.getArgOperand(I));
3214 if (I < DFSF.DFS.NumOfElementsInArgOrgTLS &&
3215 !DFSF.DFS.isZeroShadow(ArgShadow))
3216 IRB.CreateStore(DFSF.getOrigin(CB.getArgOperand(I)),
3217 DFSF.getArgOriginTLS(I, IRB));
3218 }
3219
3220 unsigned Size =
3221 DL.getTypeAllocSize(DFSF.DFS.getShadowTy(FT->getParamType(I)));
3222 // Stop storing if arguments' size overflows. Inside a function, arguments
3223 // after overflow have zero shadow values.
3224 if (ArgOffset + Size > ArgTLSSize)
3225 break;
3226 IRB.CreateAlignedStore(
3227 DFSF.getShadow(CB.getArgOperand(I)),
3228 DFSF.getArgTLS(FT->getParamType(I), ArgOffset, IRB),
3229 ShadowTLSAlignment);
3230 ArgOffset += alignTo(Size, ShadowTLSAlignment);
3231 }
3232 }
3233
3234 Instruction *Next = nullptr;
3235 if (!CB.getType()->isVoidTy()) {
3236 if (InvokeInst *II = dyn_cast<InvokeInst>(&CB)) {
3237 if (II->getNormalDest()->getSinglePredecessor()) {
3238 Next = &II->getNormalDest()->front();
3239 } else {
3240 BasicBlock *NewBB =
3241 SplitEdge(II->getParent(), II->getNormalDest(), &DFSF.DT);
3242 Next = &NewBB->front();
3243 }
3244 } else {
3245 assert(CB.getIterator() != CB.getParent()->end());
3246 Next = CB.getNextNode();
3247 }
3248
3249 if (DFSF.DFS.getInstrumentedABI() == DataFlowSanitizer::IA_TLS) {
3250 // Loads the return value shadow.
3251 IRBuilder<> NextIRB(Next);
3252 const DataLayout &DL = getDataLayout();
3253 unsigned Size = DL.getTypeAllocSize(DFSF.DFS.getShadowTy(&CB));
3254 if (Size > RetvalTLSSize) {
3255 // Set overflowed return shadow to be zero.
3256 DFSF.setShadow(&CB, DFSF.DFS.getZeroShadow(&CB));
3257 } else {
3258 LoadInst *LI = NextIRB.CreateAlignedLoad(
3259 DFSF.DFS.getShadowTy(&CB), DFSF.getRetvalTLS(CB.getType(), NextIRB),
3260 ShadowTLSAlignment, "_dfsret");
3261 DFSF.SkipInsts.insert(LI);
3262 DFSF.setShadow(&CB, LI);
3263 DFSF.NonZeroChecks.push_back(LI);
3264 }
3265
3266 if (ShouldTrackOrigins) {
3267 LoadInst *LI = NextIRB.CreateLoad(
3268 DFSF.DFS.OriginTy, DFSF.getRetvalOriginTLS(), "_dfsret_o");
3269 DFSF.SkipInsts.insert(LI);
3270 DFSF.setOrigin(&CB, LI);
3271 }
3272 }
3273 }
3274
3275 // Do all instrumentation for IA_Args down here to defer tampering with the
3276 // CFG in a way that SplitEdge may be able to detect.
3277 if (DFSF.DFS.getInstrumentedABI() == DataFlowSanitizer::IA_Args) {
3278 FunctionType *NewFT = DFSF.DFS.getArgsFunctionType(FT);
3279 Value *Func =
3280 IRB.CreateBitCast(CB.getCalledOperand(), PointerType::getUnqual(NewFT));
3281
3282 const unsigned NumParams = FT->getNumParams();
3283
3284 // Copy original arguments.
3285 auto *ArgIt = CB.arg_begin(), *ArgEnd = CB.arg_end();
3286 std::vector<Value *> Args(NumParams);
3287 std::copy_n(ArgIt, NumParams, Args.begin());
3288
3289 // Add shadow arguments by transforming original arguments.
3290 std::generate_n(std::back_inserter(Args), NumParams,
3291 [&]() { return DFSF.getShadow(*ArgIt++); });
3292
3293 if (FT->isVarArg()) {
3294 unsigned VarArgSize = CB.arg_size() - NumParams;
3295 ArrayType *VarArgArrayTy =
3296 ArrayType::get(DFSF.DFS.PrimitiveShadowTy, VarArgSize);
3297 AllocaInst *VarArgShadow =
3298 new AllocaInst(VarArgArrayTy, getDataLayout().getAllocaAddrSpace(),
3299 "", &DFSF.F->getEntryBlock().front());
3300 Args.push_back(IRB.CreateConstGEP2_32(VarArgArrayTy, VarArgShadow, 0, 0));
3301
3302 // Copy remaining var args.
3303 unsigned GepIndex = 0;
3304 std::for_each(ArgIt, ArgEnd, [&](Value *Arg) {
3305 IRB.CreateStore(
3306 DFSF.getShadow(Arg),
3307 IRB.CreateConstGEP2_32(VarArgArrayTy, VarArgShadow, 0, GepIndex++));
3308 Args.push_back(Arg);
3309 });
3310 }
3311
3312 CallBase *NewCB;
3313 if (InvokeInst *II = dyn_cast<InvokeInst>(&CB)) {
3314 NewCB = IRB.CreateInvoke(NewFT, Func, II->getNormalDest(),
3315 II->getUnwindDest(), Args);
3316 } else {
3317 NewCB = IRB.CreateCall(NewFT, Func, Args);
3318 }
3319 NewCB->setCallingConv(CB.getCallingConv());
3320 NewCB->setAttributes(CB.getAttributes().removeAttributes(
3321 *DFSF.DFS.Ctx, AttributeList::ReturnIndex,
3322 AttributeFuncs::typeIncompatible(NewCB->getType())));
3323
3324 if (Next) {
3325 ExtractValueInst *ExVal = ExtractValueInst::Create(NewCB, 0, "", Next);
3326 DFSF.SkipInsts.insert(ExVal);
3327 ExtractValueInst *ExShadow = ExtractValueInst::Create(NewCB, 1, "", Next);
3328 DFSF.SkipInsts.insert(ExShadow);
3329 DFSF.setShadow(ExVal, ExShadow);
3330 DFSF.NonZeroChecks.push_back(ExShadow);
3331
3332 CB.replaceAllUsesWith(ExVal);
3333 }
3334
3335 CB.eraseFromParent();
3336 }
3337 }
3338
visitPHINode(PHINode & PN)3339 void DFSanVisitor::visitPHINode(PHINode &PN) {
3340 Type *ShadowTy = DFSF.DFS.getShadowTy(&PN);
3341 PHINode *ShadowPN =
3342 PHINode::Create(ShadowTy, PN.getNumIncomingValues(), "", &PN);
3343
3344 // Give the shadow phi node valid predecessors to fool SplitEdge into working.
3345 Value *UndefShadow = UndefValue::get(ShadowTy);
3346 for (BasicBlock *BB : PN.blocks())
3347 ShadowPN->addIncoming(UndefShadow, BB);
3348
3349 DFSF.setShadow(&PN, ShadowPN);
3350
3351 PHINode *OriginPN = nullptr;
3352 if (DFSF.DFS.shouldTrackOrigins()) {
3353 OriginPN =
3354 PHINode::Create(DFSF.DFS.OriginTy, PN.getNumIncomingValues(), "", &PN);
3355 Value *UndefOrigin = UndefValue::get(DFSF.DFS.OriginTy);
3356 for (BasicBlock *BB : PN.blocks())
3357 OriginPN->addIncoming(UndefOrigin, BB);
3358 DFSF.setOrigin(&PN, OriginPN);
3359 }
3360
3361 DFSF.PHIFixups.push_back({&PN, ShadowPN, OriginPN});
3362 }
3363
3364 namespace {
3365 class DataFlowSanitizerLegacyPass : public ModulePass {
3366 private:
3367 std::vector<std::string> ABIListFiles;
3368
3369 public:
3370 static char ID;
3371
DataFlowSanitizerLegacyPass(const std::vector<std::string> & ABIListFiles=std::vector<std::string> ())3372 DataFlowSanitizerLegacyPass(
3373 const std::vector<std::string> &ABIListFiles = std::vector<std::string>())
3374 : ModulePass(ID), ABIListFiles(ABIListFiles) {}
3375
runOnModule(Module & M)3376 bool runOnModule(Module &M) override {
3377 return DataFlowSanitizer(ABIListFiles).runImpl(M);
3378 }
3379 };
3380 } // namespace
3381
3382 char DataFlowSanitizerLegacyPass::ID;
3383
3384 INITIALIZE_PASS(DataFlowSanitizerLegacyPass, "dfsan",
3385 "DataFlowSanitizer: dynamic data flow analysis.", false, false)
3386
createDataFlowSanitizerLegacyPassPass(const std::vector<std::string> & ABIListFiles)3387 ModulePass *llvm::createDataFlowSanitizerLegacyPassPass(
3388 const std::vector<std::string> &ABIListFiles) {
3389 return new DataFlowSanitizerLegacyPass(ABIListFiles);
3390 }
3391
run(Module & M,ModuleAnalysisManager & AM)3392 PreservedAnalyses DataFlowSanitizerPass::run(Module &M,
3393 ModuleAnalysisManager &AM) {
3394 if (DataFlowSanitizer(ABIListFiles).runImpl(M)) {
3395 return PreservedAnalyses::none();
3396 }
3397 return PreservedAnalyses::all();
3398 }
3399