xref: /llvm-project/llvm/lib/Transforms/Instrumentation/NumericalStabilitySanitizer.cpp (revision 171067923744d597648450d3da204dd2f4a3fea0)
1 //===-- NumericalStabilitySanitizer.cpp -----------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the instrumentation pass for the numerical sanitizer.
10 // Conceptually the pass injects shadow computations using higher precision
11 // types and inserts consistency checks. For details see the paper
12 // https://arxiv.org/abs/2102.12782.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "llvm/Transforms/Instrumentation/NumericalStabilitySanitizer.h"
17 
18 #include "llvm/ADT/DenseMap.h"
19 #include "llvm/ADT/SmallString.h"
20 #include "llvm/ADT/SmallVector.h"
21 #include "llvm/ADT/Statistic.h"
22 #include "llvm/ADT/StringExtras.h"
23 #include "llvm/Analysis/TargetLibraryInfo.h"
24 #include "llvm/Analysis/ValueTracking.h"
25 #include "llvm/IR/DataLayout.h"
26 #include "llvm/IR/Function.h"
27 #include "llvm/IR/IRBuilder.h"
28 #include "llvm/IR/IntrinsicInst.h"
29 #include "llvm/IR/Intrinsics.h"
30 #include "llvm/IR/LLVMContext.h"
31 #include "llvm/IR/MDBuilder.h"
32 #include "llvm/IR/Metadata.h"
33 #include "llvm/IR/Module.h"
34 #include "llvm/IR/Type.h"
35 #include "llvm/InitializePasses.h"
36 #include "llvm/Support/CommandLine.h"
37 #include "llvm/Support/Debug.h"
38 #include "llvm/Support/MathExtras.h"
39 #include "llvm/Support/Regex.h"
40 #include "llvm/Support/raw_ostream.h"
41 #include "llvm/Transforms/Instrumentation.h"
42 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
43 #include "llvm/Transforms/Utils/EscapeEnumerator.h"
44 #include "llvm/Transforms/Utils/Local.h"
45 #include "llvm/Transforms/Utils/ModuleUtils.h"
46 
47 #include <cstdint>
48 
49 using namespace llvm;
50 
51 #define DEBUG_TYPE "nsan"
52 
53 STATISTIC(NumInstrumentedFTLoads,
54           "Number of instrumented floating-point loads");
55 
56 STATISTIC(NumInstrumentedFTCalls,
57           "Number of instrumented floating-point calls");
58 STATISTIC(NumInstrumentedFTRets,
59           "Number of instrumented floating-point returns");
60 STATISTIC(NumInstrumentedFTStores,
61           "Number of instrumented floating-point stores");
62 STATISTIC(NumInstrumentedNonFTStores,
63           "Number of instrumented non floating-point stores");
64 STATISTIC(
65     NumInstrumentedNonFTMemcpyStores,
66     "Number of instrumented non floating-point stores with memcpy semantics");
67 STATISTIC(NumInstrumentedFCmp, "Number of instrumented fcmps");
68 
69 // Using smaller shadow types types can help improve speed. For example, `dlq`
70 // is 3x slower to 5x faster in opt mode and 2-6x faster in dbg mode compared to
71 // `dqq`.
72 static cl::opt<std::string> ClShadowMapping(
73     "nsan-shadow-type-mapping", cl::init("dqq"),
74     cl::desc("One shadow type id for each of `float`, `double`, `long double`. "
75              "`d`,`l`,`q`,`e` mean double, x86_fp80, fp128 (quad) and "
76              "ppc_fp128 (extended double) respectively. The default is to "
77              "shadow `float` as `double`, and `double` and `x86_fp80` as "
78              "`fp128`"),
79     cl::Hidden);
80 
81 static cl::opt<bool>
82     ClInstrumentFCmp("nsan-instrument-fcmp", cl::init(true),
83                      cl::desc("Instrument floating-point comparisons"),
84                      cl::Hidden);
85 
86 static cl::opt<std::string> ClCheckFunctionsFilter(
87     "check-functions-filter",
88     cl::desc("Only emit checks for arguments of functions "
89              "whose names match the given regular expression"),
90     cl::value_desc("regex"));
91 
92 static cl::opt<bool> ClTruncateFCmpEq(
93     "nsan-truncate-fcmp-eq", cl::init(true),
94     cl::desc(
95         "This flag controls the behaviour of fcmp equality comparisons."
96         "For equality comparisons such as `x == 0.0f`, we can perform the "
97         "shadow check in the shadow (`x_shadow == 0.0) == (x == 0.0f)`) or app "
98         " domain (`(trunc(x_shadow) == 0.0f) == (x == 0.0f)`). This helps "
99         "catch the case when `x_shadow` is accurate enough (and therefore "
100         "close enough to zero) so that `trunc(x_shadow)` is zero even though "
101         "both `x` and `x_shadow` are not"),
102     cl::Hidden);
103 
104 // When there is external, uninstrumented code writing to memory, the shadow
105 // memory can get out of sync with the application memory. Enabling this flag
106 // emits consistency checks for loads to catch this situation.
107 // When everything is instrumented, this is not strictly necessary because any
108 // load should have a corresponding store, but can help debug cases when the
109 // framework did a bad job at tracking shadow memory modifications by failing on
110 // load rather than store.
111 // TODO: provide a way to resume computations from the FT value when the load
112 // is inconsistent. This ensures that further computations are not polluted.
113 static cl::opt<bool> ClCheckLoads("nsan-check-loads",
114                                   cl::desc("Check floating-point load"),
115                                   cl::Hidden);
116 
117 static cl::opt<bool> ClCheckStores("nsan-check-stores", cl::init(true),
118                                    cl::desc("Check floating-point stores"),
119                                    cl::Hidden);
120 
121 static cl::opt<bool> ClCheckRet("nsan-check-ret", cl::init(true),
122                                 cl::desc("Check floating-point return values"),
123                                 cl::Hidden);
124 
125 // LLVM may store constant floats as bitcasted ints.
126 // It's not really necessary to shadow such stores,
127 // if the shadow value is unknown the framework will re-extend it on load
128 // anyway. Moreover, because of size collisions (e.g. bf16 vs f16) it is
129 // impossible to determine the floating-point type based on the size.
130 // However, for debugging purposes it can be useful to model such stores.
131 static cl::opt<bool> ClPropagateNonFTConstStoresAsFT(
132     "nsan-propagate-non-ft-const-stores-as-ft",
133     cl::desc(
134         "Propagate non floating-point const stores as floating point values."
135         "For debugging purposes only"),
136     cl::Hidden);
137 
138 constexpr StringLiteral kNsanModuleCtorName("nsan.module_ctor");
139 constexpr StringLiteral kNsanInitName("__nsan_init");
140 
141 // The following values must be kept in sync with the runtime.
142 constexpr int kShadowScale = 2;
143 constexpr int kMaxVectorWidth = 8;
144 constexpr int kMaxNumArgs = 128;
145 constexpr int kMaxShadowTypeSizeBytes = 16; // fp128
146 
147 namespace {
148 
149 // Defines the characteristics (type id, type, and floating-point semantics)
150 // attached for all possible shadow types.
151 class ShadowTypeConfig {
152 public:
153   static std::unique_ptr<ShadowTypeConfig> fromNsanTypeId(char TypeId);
154 
155   // The LLVM Type corresponding to the shadow type.
156   virtual Type *getType(LLVMContext &Context) const = 0;
157 
158   // The nsan type id of the shadow type (`d`, `l`, `q`, ...).
159   virtual char getNsanTypeId() const = 0;
160 
161   virtual ~ShadowTypeConfig() = default;
162 };
163 
164 template <char NsanTypeId>
165 class ShadowTypeConfigImpl : public ShadowTypeConfig {
166 public:
167   char getNsanTypeId() const override { return NsanTypeId; }
168   static constexpr const char kNsanTypeId = NsanTypeId;
169 };
170 
171 // `double` (`d`) shadow type.
172 class F64ShadowConfig : public ShadowTypeConfigImpl<'d'> {
173   Type *getType(LLVMContext &Context) const override {
174     return Type::getDoubleTy(Context);
175   }
176 };
177 
178 // `x86_fp80` (`l`) shadow type: X86 long double.
179 class F80ShadowConfig : public ShadowTypeConfigImpl<'l'> {
180   Type *getType(LLVMContext &Context) const override {
181     return Type::getX86_FP80Ty(Context);
182   }
183 };
184 
185 // `fp128` (`q`) shadow type.
186 class F128ShadowConfig : public ShadowTypeConfigImpl<'q'> {
187   Type *getType(LLVMContext &Context) const override {
188     return Type::getFP128Ty(Context);
189   }
190 };
191 
192 // `ppc_fp128` (`e`) shadow type: IBM extended double with 106 bits of mantissa.
193 class PPC128ShadowConfig : public ShadowTypeConfigImpl<'e'> {
194   Type *getType(LLVMContext &Context) const override {
195     return Type::getPPC_FP128Ty(Context);
196   }
197 };
198 
199 // Creates a ShadowTypeConfig given its type id.
200 std::unique_ptr<ShadowTypeConfig>
201 ShadowTypeConfig::fromNsanTypeId(const char TypeId) {
202   switch (TypeId) {
203   case F64ShadowConfig::kNsanTypeId:
204     return std::make_unique<F64ShadowConfig>();
205   case F80ShadowConfig::kNsanTypeId:
206     return std::make_unique<F80ShadowConfig>();
207   case F128ShadowConfig::kNsanTypeId:
208     return std::make_unique<F128ShadowConfig>();
209   case PPC128ShadowConfig::kNsanTypeId:
210     return std::make_unique<PPC128ShadowConfig>();
211   }
212   report_fatal_error("nsan: invalid shadow type id '" + Twine(TypeId) + "'");
213 }
214 
215 // An enum corresponding to shadow value types. Used as indices in arrays, so
216 // not an `enum class`.
217 enum FTValueType { kFloat, kDouble, kLongDouble, kNumValueTypes };
218 
219 // If `FT` corresponds to a primitive FTValueType, return it.
220 static std::optional<FTValueType> ftValueTypeFromType(Type *FT) {
221   if (FT->isFloatTy())
222     return kFloat;
223   if (FT->isDoubleTy())
224     return kDouble;
225   if (FT->isX86_FP80Ty())
226     return kLongDouble;
227   return {};
228 }
229 
230 // Returns the LLVM type for an FTValueType.
231 static Type *typeFromFTValueType(FTValueType VT, LLVMContext &Context) {
232   switch (VT) {
233   case kFloat:
234     return Type::getFloatTy(Context);
235   case kDouble:
236     return Type::getDoubleTy(Context);
237   case kLongDouble:
238     return Type::getX86_FP80Ty(Context);
239   case kNumValueTypes:
240     return nullptr;
241   }
242 }
243 
244 // Returns the type name for an FTValueType.
245 static const char *typeNameFromFTValueType(FTValueType VT) {
246   switch (VT) {
247   case kFloat:
248     return "float";
249   case kDouble:
250     return "double";
251   case kLongDouble:
252     return "longdouble";
253   case kNumValueTypes:
254     return nullptr;
255   }
256 }
257 
258 // A specific mapping configuration of application type to shadow type for nsan
259 // (see -nsan-shadow-mapping flag).
260 class MappingConfig {
261 public:
262   explicit MappingConfig(LLVMContext &C) : Context(C) {
263     if (ClShadowMapping.size() != 3)
264       report_fatal_error("Invalid nsan mapping: " + Twine(ClShadowMapping));
265     unsigned ShadowTypeSizeBits[kNumValueTypes];
266     for (int VT = 0; VT < kNumValueTypes; ++VT) {
267       auto Config = ShadowTypeConfig::fromNsanTypeId(ClShadowMapping[VT]);
268       if (!Config)
269         report_fatal_error("Failed to get ShadowTypeConfig for " +
270                            Twine(ClShadowMapping[VT]));
271       const unsigned AppTypeSize =
272           typeFromFTValueType(static_cast<FTValueType>(VT), Context)
273               ->getScalarSizeInBits();
274       const unsigned ShadowTypeSize =
275           Config->getType(Context)->getScalarSizeInBits();
276       // Check that the shadow type size is at most kShadowScale times the
277       // application type size, so that shadow memory compoutations are valid.
278       if (ShadowTypeSize > kShadowScale * AppTypeSize)
279         report_fatal_error("Invalid nsan mapping f" + Twine(AppTypeSize) +
280                            "->f" + Twine(ShadowTypeSize) +
281                            ": The shadow type size should be at most " +
282                            Twine(kShadowScale) +
283                            " times the application type size");
284       ShadowTypeSizeBits[VT] = ShadowTypeSize;
285       Configs[VT] = std::move(Config);
286     }
287 
288     // Check that the mapping is monotonous. This is required because if one
289     // does an fpextend of `float->long double` in application code, nsan is
290     // going to do an fpextend of `shadow(float) -> shadow(long double)` in
291     // shadow code. This will fail in `qql` mode, since nsan would be
292     // fpextending `f128->long`, which is invalid.
293     // TODO: Relax this.
294     if (ShadowTypeSizeBits[kFloat] > ShadowTypeSizeBits[kDouble] ||
295         ShadowTypeSizeBits[kDouble] > ShadowTypeSizeBits[kLongDouble])
296       report_fatal_error("Invalid nsan mapping: { float->f" +
297                          Twine(ShadowTypeSizeBits[kFloat]) + "; double->f" +
298                          Twine(ShadowTypeSizeBits[kDouble]) +
299                          "; long double->f" +
300                          Twine(ShadowTypeSizeBits[kLongDouble]) + " }");
301   }
302 
303   const ShadowTypeConfig &byValueType(FTValueType VT) const {
304     assert(VT < FTValueType::kNumValueTypes && "invalid value type");
305     return *Configs[VT];
306   }
307 
308   // Returns the extended shadow type for a given application type.
309   Type *getExtendedFPType(Type *FT) const {
310     if (const auto VT = ftValueTypeFromType(FT))
311       return Configs[*VT]->getType(Context);
312     if (FT->isVectorTy()) {
313       auto *VecTy = cast<VectorType>(FT);
314       // TODO: add support for scalable vector types.
315       if (VecTy->isScalableTy())
316         return nullptr;
317       Type *ExtendedScalar = getExtendedFPType(VecTy->getElementType());
318       return ExtendedScalar
319                  ? VectorType::get(ExtendedScalar, VecTy->getElementCount())
320                  : nullptr;
321     }
322     return nullptr;
323   }
324 
325 private:
326   LLVMContext &Context;
327   std::unique_ptr<ShadowTypeConfig> Configs[FTValueType::kNumValueTypes];
328 };
329 
330 // The memory extents of a type specifies how many elements of a given
331 // FTValueType needs to be stored when storing this type.
332 struct MemoryExtents {
333   FTValueType ValueType;
334   uint64_t NumElts;
335 };
336 
337 static MemoryExtents getMemoryExtentsOrDie(Type *FT) {
338   if (const auto VT = ftValueTypeFromType(FT))
339     return {*VT, 1};
340   if (auto *VecTy = dyn_cast<VectorType>(FT)) {
341     const auto ScalarExtents = getMemoryExtentsOrDie(VecTy->getElementType());
342     return {ScalarExtents.ValueType,
343             ScalarExtents.NumElts * VecTy->getElementCount().getFixedValue()};
344   }
345   llvm_unreachable("invalid value type");
346 }
347 
348 // The location of a check. Passed as parameters to runtime checking functions.
349 class CheckLoc {
350 public:
351   // Creates a location that references an application memory location.
352   static CheckLoc makeStore(Value *Address) {
353     CheckLoc Result(kStore);
354     Result.Address = Address;
355     return Result;
356   }
357   static CheckLoc makeLoad(Value *Address) {
358     CheckLoc Result(kLoad);
359     Result.Address = Address;
360     return Result;
361   }
362 
363   // Creates a location that references an argument, given by id.
364   static CheckLoc makeArg(int ArgId) {
365     CheckLoc Result(kArg);
366     Result.ArgId = ArgId;
367     return Result;
368   }
369 
370   // Creates a location that references the return value of a function.
371   static CheckLoc makeRet() { return CheckLoc(kRet); }
372 
373   // Creates a location that references a vector insert.
374   static CheckLoc makeInsert() { return CheckLoc(kInsert); }
375 
376   // Returns the CheckType of location this refers to, as an integer-typed LLVM
377   // IR value.
378   Value *getType(LLVMContext &C) const {
379     return ConstantInt::get(Type::getInt32Ty(C), static_cast<int>(CheckTy));
380   }
381 
382   // Returns a CheckType-specific value representing details of the location
383   // (e.g. application address for loads or stores), as an `IntptrTy`-typed LLVM
384   // IR value.
385   Value *getValue(Type *IntptrTy, IRBuilder<> &Builder) const {
386     switch (CheckTy) {
387     case kUnknown:
388       llvm_unreachable("unknown type");
389     case kRet:
390     case kInsert:
391       return ConstantInt::get(IntptrTy, 0);
392     case kArg:
393       return ConstantInt::get(IntptrTy, ArgId);
394     case kLoad:
395     case kStore:
396       return Builder.CreatePtrToInt(Address, IntptrTy);
397     }
398   }
399 
400 private:
401   // Must be kept in sync with the runtime,
402   // see compiler-rt/lib/nsan/nsan_stats.h
403   enum CheckType {
404     kUnknown = 0,
405     kRet,
406     kArg,
407     kLoad,
408     kStore,
409     kInsert,
410   };
411   explicit CheckLoc(CheckType CheckTy) : CheckTy(CheckTy) {}
412 
413   Value *Address = nullptr;
414   const CheckType CheckTy;
415   int ArgId = -1;
416 };
417 
418 // A map of LLVM IR values to shadow LLVM IR values.
419 class ValueToShadowMap {
420 public:
421   explicit ValueToShadowMap(const MappingConfig &Config) : Config(Config) {}
422 
423   ValueToShadowMap(const ValueToShadowMap &) = delete;
424   ValueToShadowMap &operator=(const ValueToShadowMap &) = delete;
425 
426   // Sets the shadow value for a value. Asserts that the value does not already
427   // have a value.
428   void setShadow(Value &V, Value &Shadow) {
429     [[maybe_unused]] const bool Inserted = Map.try_emplace(&V, &Shadow).second;
430     LLVM_DEBUG({
431       if (!Inserted) {
432         if (auto *I = dyn_cast<Instruction>(&V))
433           errs() << I->getFunction()->getName() << ": ";
434         errs() << "duplicate shadow (" << &V << "): ";
435         V.dump();
436       }
437     });
438     assert(Inserted && "duplicate shadow");
439   }
440 
441   // Returns true if the value already has a shadow (including if the value is a
442   // constant). If true, calling getShadow() is valid.
443   bool hasShadow(Value *V) const {
444     return isa<Constant>(V) || (Map.find(V) != Map.end());
445   }
446 
447   // Returns the shadow value for a given value. Asserts that the value has
448   // a shadow value. Lazily creates shadows for constant values.
449   Value *getShadow(Value *V) const {
450     if (Constant *C = dyn_cast<Constant>(V))
451       return getShadowConstant(C);
452     return Map.find(V)->second;
453   }
454 
455   bool empty() const { return Map.empty(); }
456 
457 private:
458   // Extends a constant application value to its shadow counterpart.
459   APFloat extendConstantFP(APFloat CV, const fltSemantics &To) const {
460     bool LosesInfo = false;
461     CV.convert(To, APFloatBase::rmTowardZero, &LosesInfo);
462     return CV;
463   }
464 
465   // Returns the shadow constant for the given application constant.
466   Constant *getShadowConstant(Constant *C) const {
467     if (UndefValue *U = dyn_cast<UndefValue>(C)) {
468       return UndefValue::get(Config.getExtendedFPType(U->getType()));
469     }
470     if (ConstantFP *CFP = dyn_cast<ConstantFP>(C)) {
471       // Floating-point constants.
472       Type *Ty = Config.getExtendedFPType(CFP->getType());
473       return ConstantFP::get(
474           Ty, extendConstantFP(CFP->getValueAPF(), Ty->getFltSemantics()));
475     }
476     // Vector, array, or aggregate constants.
477     if (C->getType()->isVectorTy()) {
478       SmallVector<Constant *, 8> Elements;
479       for (int I = 0, E = cast<VectorType>(C->getType())
480                               ->getElementCount()
481                               .getFixedValue();
482            I < E; ++I)
483         Elements.push_back(getShadowConstant(C->getAggregateElement(I)));
484       return ConstantVector::get(Elements);
485     }
486     llvm_unreachable("unimplemented");
487   }
488 
489   const MappingConfig &Config;
490   DenseMap<Value *, Value *> Map;
491 };
492 
493 /// Instantiating NumericalStabilitySanitizer inserts the nsan runtime library
494 /// API function declarations into the module if they don't exist already.
495 /// Instantiating ensures the __nsan_init function is in the list of global
496 /// constructors for the module.
497 class NumericalStabilitySanitizer {
498 public:
499   NumericalStabilitySanitizer(Module &M);
500   bool sanitizeFunction(Function &F, const TargetLibraryInfo &TLI);
501 
502 private:
503   bool instrumentMemIntrinsic(MemIntrinsic *MI);
504   void maybeAddSuffixForNsanInterface(CallBase *CI);
505   bool addrPointsToConstantData(Value *Addr);
506   void maybeCreateShadowValue(Instruction &Root, const TargetLibraryInfo &TLI,
507                               ValueToShadowMap &Map);
508   Value *createShadowValueWithOperandsAvailable(Instruction &Inst,
509                                                 const TargetLibraryInfo &TLI,
510                                                 const ValueToShadowMap &Map);
511   PHINode *maybeCreateShadowPhi(PHINode &Phi, const TargetLibraryInfo &TLI);
512   void createShadowArguments(Function &F, const TargetLibraryInfo &TLI,
513                              ValueToShadowMap &Map);
514 
515   void populateShadowStack(CallBase &CI, const TargetLibraryInfo &TLI,
516                            const ValueToShadowMap &Map);
517 
518   void propagateShadowValues(Instruction &Inst, const TargetLibraryInfo &TLI,
519                              const ValueToShadowMap &Map);
520   Value *emitCheck(Value *V, Value *ShadowV, IRBuilder<> &Builder,
521                    CheckLoc Loc);
522   Value *emitCheckInternal(Value *V, Value *ShadowV, IRBuilder<> &Builder,
523                            CheckLoc Loc);
524   void emitFCmpCheck(FCmpInst &FCmp, const ValueToShadowMap &Map);
525 
526   // Value creation handlers.
527   Value *handleLoad(LoadInst &Load, Type *VT, Type *ExtendedVT);
528   Value *handleCallBase(CallBase &Call, Type *VT, Type *ExtendedVT,
529                         const TargetLibraryInfo &TLI,
530                         const ValueToShadowMap &Map, IRBuilder<> &Builder);
531   Value *maybeHandleKnownCallBase(CallBase &Call, Type *VT, Type *ExtendedVT,
532                                   const TargetLibraryInfo &TLI,
533                                   const ValueToShadowMap &Map,
534                                   IRBuilder<> &Builder);
535   Value *handleTrunc(const FPTruncInst &Trunc, Type *VT, Type *ExtendedVT,
536                      const ValueToShadowMap &Map, IRBuilder<> &Builder);
537   Value *handleExt(const FPExtInst &Ext, Type *VT, Type *ExtendedVT,
538                    const ValueToShadowMap &Map, IRBuilder<> &Builder);
539 
540   // Value propagation handlers.
541   void propagateFTStore(StoreInst &Store, Type *VT, Type *ExtendedVT,
542                         const ValueToShadowMap &Map);
543   void propagateNonFTStore(StoreInst &Store, Type *VT,
544                            const ValueToShadowMap &Map);
545 
546   const DataLayout &DL;
547   LLVMContext &Context;
548   MappingConfig Config;
549   IntegerType *IntptrTy = nullptr;
550   FunctionCallee NsanGetShadowPtrForStore[FTValueType::kNumValueTypes] = {};
551   FunctionCallee NsanGetShadowPtrForLoad[FTValueType::kNumValueTypes] = {};
552   FunctionCallee NsanCheckValue[FTValueType::kNumValueTypes] = {};
553   FunctionCallee NsanFCmpFail[FTValueType::kNumValueTypes] = {};
554   FunctionCallee NsanCopyValues;
555   FunctionCallee NsanSetValueUnknown;
556   FunctionCallee NsanGetRawShadowTypePtr;
557   FunctionCallee NsanGetRawShadowPtr;
558   GlobalValue *NsanShadowRetTag = nullptr;
559 
560   Type *NsanShadowRetType = nullptr;
561   GlobalValue *NsanShadowRetPtr = nullptr;
562 
563   GlobalValue *NsanShadowArgsTag = nullptr;
564 
565   Type *NsanShadowArgsType = nullptr;
566   GlobalValue *NsanShadowArgsPtr = nullptr;
567 
568   std::optional<Regex> CheckFunctionsFilter;
569 };
570 } // end anonymous namespace
571 
572 PreservedAnalyses
573 NumericalStabilitySanitizerPass::run(Module &M, ModuleAnalysisManager &MAM) {
574   getOrCreateSanitizerCtorAndInitFunctions(
575       M, kNsanModuleCtorName, kNsanInitName, /*InitArgTypes=*/{},
576       /*InitArgs=*/{},
577       // This callback is invoked when the functions are created the first
578       // time. Hook them into the global ctors list in that case:
579       [&](Function *Ctor, FunctionCallee) { appendToGlobalCtors(M, Ctor, 0); });
580 
581   NumericalStabilitySanitizer Nsan(M);
582   auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
583   for (Function &F : M)
584     Nsan.sanitizeFunction(F, FAM.getResult<TargetLibraryAnalysis>(F));
585 
586   return PreservedAnalyses::none();
587 }
588 
589 static GlobalValue *createThreadLocalGV(const char *Name, Module &M, Type *Ty) {
590   return dyn_cast<GlobalValue>(M.getOrInsertGlobal(Name, Ty, [&M, Ty, Name] {
591     return new GlobalVariable(M, Ty, false, GlobalVariable::ExternalLinkage,
592                               nullptr, Name, nullptr,
593                               GlobalVariable::InitialExecTLSModel);
594   }));
595 }
596 
597 NumericalStabilitySanitizer::NumericalStabilitySanitizer(Module &M)
598     : DL(M.getDataLayout()), Context(M.getContext()), Config(Context) {
599   IntptrTy = DL.getIntPtrType(Context);
600   Type *PtrTy = PointerType::getUnqual(Context);
601   Type *Int32Ty = Type::getInt32Ty(Context);
602   Type *Int1Ty = Type::getInt1Ty(Context);
603   Type *VoidTy = Type::getVoidTy(Context);
604 
605   AttributeList Attr;
606   Attr = Attr.addFnAttribute(Context, Attribute::NoUnwind);
607   // Initialize the runtime values (functions and global variables).
608   for (int I = 0; I < kNumValueTypes; ++I) {
609     const FTValueType VT = static_cast<FTValueType>(I);
610     const char *VTName = typeNameFromFTValueType(VT);
611     Type *VTTy = typeFromFTValueType(VT, Context);
612 
613     // Load/store.
614     const std::string GetterPrefix =
615         std::string("__nsan_get_shadow_ptr_for_") + VTName;
616     NsanGetShadowPtrForStore[VT] = M.getOrInsertFunction(
617         GetterPrefix + "_store", Attr, PtrTy, PtrTy, IntptrTy);
618     NsanGetShadowPtrForLoad[VT] = M.getOrInsertFunction(
619         GetterPrefix + "_load", Attr, PtrTy, PtrTy, IntptrTy);
620 
621     // Check.
622     const auto &ShadowConfig = Config.byValueType(VT);
623     Type *ShadowTy = ShadowConfig.getType(Context);
624     NsanCheckValue[VT] =
625         M.getOrInsertFunction(std::string("__nsan_internal_check_") + VTName +
626                                   "_" + ShadowConfig.getNsanTypeId(),
627                               Attr, Int32Ty, VTTy, ShadowTy, Int32Ty, IntptrTy);
628     NsanFCmpFail[VT] = M.getOrInsertFunction(
629         std::string("__nsan_fcmp_fail_") + VTName + "_" +
630             ShadowConfig.getNsanTypeId(),
631         Attr, VoidTy, VTTy, VTTy, ShadowTy, ShadowTy, Int32Ty, Int1Ty, Int1Ty);
632   }
633 
634   NsanCopyValues = M.getOrInsertFunction("__nsan_copy_values", Attr, VoidTy,
635                                          PtrTy, PtrTy, IntptrTy);
636   NsanSetValueUnknown = M.getOrInsertFunction("__nsan_set_value_unknown", Attr,
637                                               VoidTy, PtrTy, IntptrTy);
638 
639   // TODO: Add attributes nofree, nosync, readnone, readonly,
640   NsanGetRawShadowTypePtr = M.getOrInsertFunction(
641       "__nsan_internal_get_raw_shadow_type_ptr", Attr, PtrTy, PtrTy);
642   NsanGetRawShadowPtr = M.getOrInsertFunction(
643       "__nsan_internal_get_raw_shadow_ptr", Attr, PtrTy, PtrTy);
644 
645   NsanShadowRetTag = createThreadLocalGV("__nsan_shadow_ret_tag", M, IntptrTy);
646 
647   NsanShadowRetType = ArrayType::get(Type::getInt8Ty(Context),
648                                      kMaxVectorWidth * kMaxShadowTypeSizeBytes);
649   NsanShadowRetPtr =
650       createThreadLocalGV("__nsan_shadow_ret_ptr", M, NsanShadowRetType);
651 
652   NsanShadowArgsTag =
653       createThreadLocalGV("__nsan_shadow_args_tag", M, IntptrTy);
654 
655   NsanShadowArgsType =
656       ArrayType::get(Type::getInt8Ty(Context),
657                      kMaxVectorWidth * kMaxNumArgs * kMaxShadowTypeSizeBytes);
658 
659   NsanShadowArgsPtr =
660       createThreadLocalGV("__nsan_shadow_args_ptr", M, NsanShadowArgsType);
661 
662   if (!ClCheckFunctionsFilter.empty()) {
663     Regex R = Regex(ClCheckFunctionsFilter);
664     std::string RegexError;
665     assert(R.isValid(RegexError));
666     CheckFunctionsFilter = std::move(R);
667   }
668 }
669 
670 // Returns true if the given LLVM Value points to constant data (typically, a
671 // global variable reference).
672 bool NumericalStabilitySanitizer::addrPointsToConstantData(Value *Addr) {
673   // If this is a GEP, just analyze its pointer operand.
674   if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Addr))
675     Addr = GEP->getPointerOperand();
676 
677   if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Addr))
678     return GV->isConstant();
679   return false;
680 }
681 
682 // This instruments the function entry to create shadow arguments.
683 // Pseudocode:
684 //   if (this_fn_ptr == __nsan_shadow_args_tag) {
685 //     s(arg0) = LOAD<sizeof(arg0)>(__nsan_shadow_args);
686 //     s(arg1) = LOAD<sizeof(arg1)>(__nsan_shadow_args + sizeof(arg0));
687 //     ...
688 //     __nsan_shadow_args_tag = 0;
689 //   } else {
690 //     s(arg0) = fext(arg0);
691 //     s(arg1) = fext(arg1);
692 //     ...
693 //   }
694 void NumericalStabilitySanitizer::createShadowArguments(
695     Function &F, const TargetLibraryInfo &TLI, ValueToShadowMap &Map) {
696   assert(!F.getIntrinsicID() && "found a definition of an intrinsic");
697 
698   // Do not bother if there are no FP args.
699   if (all_of(F.args(), [this](const Argument &Arg) {
700         return Config.getExtendedFPType(Arg.getType()) == nullptr;
701       }))
702     return;
703 
704   IRBuilder<> Builder(F.getEntryBlock().getFirstNonPHI());
705   // The function has shadow args if the shadow args tag matches the function
706   // address.
707   Value *HasShadowArgs = Builder.CreateICmpEQ(
708       Builder.CreateLoad(IntptrTy, NsanShadowArgsTag, /*isVolatile=*/false),
709       Builder.CreatePtrToInt(&F, IntptrTy));
710 
711   unsigned ShadowArgsOffsetBytes = 0;
712   for (Argument &Arg : F.args()) {
713     Type *VT = Arg.getType();
714     Type *ExtendedVT = Config.getExtendedFPType(VT);
715     if (ExtendedVT == nullptr)
716       continue; // Not an FT value.
717     Value *L = Builder.CreateAlignedLoad(
718         ExtendedVT,
719         Builder.CreateConstGEP2_64(NsanShadowArgsType, NsanShadowArgsPtr, 0,
720                                    ShadowArgsOffsetBytes),
721         Align(1), /*isVolatile=*/false);
722     Value *Shadow = Builder.CreateSelect(HasShadowArgs, L,
723                                          Builder.CreateFPExt(&Arg, ExtendedVT));
724     Map.setShadow(Arg, *Shadow);
725     TypeSize SlotSize = DL.getTypeStoreSize(ExtendedVT);
726     assert(!SlotSize.isScalable() && "unsupported");
727     ShadowArgsOffsetBytes += SlotSize;
728   }
729   Builder.CreateStore(ConstantInt::get(IntptrTy, 0), NsanShadowArgsTag);
730 }
731 
732 // Returns true if the instrumentation should emit code to check arguments
733 // before a function call.
734 static bool shouldCheckArgs(CallBase &CI, const TargetLibraryInfo &TLI,
735                             const std::optional<Regex> &CheckFunctionsFilter) {
736 
737   Function *Fn = CI.getCalledFunction();
738 
739   if (CheckFunctionsFilter) {
740     // Skip checking args of indirect calls.
741     if (Fn == nullptr)
742       return false;
743     if (CheckFunctionsFilter->match(Fn->getName()))
744       return true;
745     return false;
746   }
747 
748   if (Fn == nullptr)
749     return true; // Always check args of indirect calls.
750 
751   // Never check nsan functions, the user called them for a reason.
752   if (Fn->getName().starts_with("__nsan_"))
753     return false;
754 
755   const auto ID = Fn->getIntrinsicID();
756   LibFunc LFunc = LibFunc::NumLibFuncs;
757   // Always check args of unknown functions.
758   if (ID == Intrinsic::ID() && !TLI.getLibFunc(*Fn, LFunc))
759     return true;
760 
761   // Do not check args of an `fabs` call that is used for a comparison.
762   // This is typically used for `fabs(a-b) < tolerance`, where what matters is
763   // the result of the comparison, which is already caught be the fcmp checks.
764   if (ID == Intrinsic::fabs || LFunc == LibFunc_fabsf ||
765       LFunc == LibFunc_fabs || LFunc == LibFunc_fabsl)
766     for (const auto &U : CI.users())
767       if (isa<CmpInst>(U))
768         return false;
769 
770   return true; // Default is check.
771 }
772 
773 // Populates the shadow call stack (which contains shadow values for every
774 // floating-point parameter to the function).
775 void NumericalStabilitySanitizer::populateShadowStack(
776     CallBase &CI, const TargetLibraryInfo &TLI, const ValueToShadowMap &Map) {
777   // Do not create a shadow stack for inline asm.
778   if (CI.isInlineAsm())
779     return;
780 
781   // Do not bother if there are no FP args.
782   if (all_of(CI.operands(), [this](const Value *Arg) {
783         return Config.getExtendedFPType(Arg->getType()) == nullptr;
784       }))
785     return;
786 
787   IRBuilder<> Builder(&CI);
788   SmallVector<Value *, 8> ArgShadows;
789   const bool ShouldCheckArgs = shouldCheckArgs(CI, TLI, CheckFunctionsFilter);
790   for (auto [ArgIdx, Arg] : enumerate(CI.operands())) {
791     if (Config.getExtendedFPType(Arg->getType()) == nullptr)
792       continue; // Not an FT value.
793     Value *ArgShadow = Map.getShadow(Arg);
794     ArgShadows.push_back(ShouldCheckArgs ? emitCheck(Arg, ArgShadow, Builder,
795                                                      CheckLoc::makeArg(ArgIdx))
796                                          : ArgShadow);
797   }
798 
799   // Do not create shadow stacks for intrinsics/known lib funcs.
800   if (Function *Fn = CI.getCalledFunction()) {
801     LibFunc LFunc;
802     if (Fn->isIntrinsic() || TLI.getLibFunc(*Fn, LFunc))
803       return;
804   }
805 
806   // Set the shadow stack tag.
807   Builder.CreateStore(CI.getCalledOperand(), NsanShadowArgsTag);
808   TypeSize ShadowArgsOffsetBytes = TypeSize::getFixed(0);
809 
810   unsigned ShadowArgId = 0;
811   for (const Value *Arg : CI.operands()) {
812     Type *VT = Arg->getType();
813     Type *ExtendedVT = Config.getExtendedFPType(VT);
814     if (ExtendedVT == nullptr)
815       continue; // Not an FT value.
816     Builder.CreateAlignedStore(
817         ArgShadows[ShadowArgId++],
818         Builder.CreateConstGEP2_64(NsanShadowArgsType, NsanShadowArgsPtr, 0,
819                                    ShadowArgsOffsetBytes),
820         Align(1), /*isVolatile=*/false);
821     TypeSize SlotSize = DL.getTypeStoreSize(ExtendedVT);
822     assert(!SlotSize.isScalable() && "unsupported");
823     ShadowArgsOffsetBytes += SlotSize;
824   }
825 }
826 
827 // Internal part of emitCheck(). Returns a value that indicates whether
828 // computation should continue with the shadow or resume by re-fextending the
829 // value.
830 enum class ContinuationType { // Keep in sync with runtime.
831   ContinueWithShadow = 0,
832   ResumeFromValue = 1,
833 };
834 
835 Value *NumericalStabilitySanitizer::emitCheckInternal(Value *V, Value *ShadowV,
836                                                       IRBuilder<> &Builder,
837                                                       CheckLoc Loc) {
838   // Do not emit checks for constant values, this is redundant.
839   if (isa<Constant>(V))
840     return ConstantInt::get(
841         Builder.getInt32Ty(),
842         static_cast<int>(ContinuationType::ContinueWithShadow));
843 
844   Type *Ty = V->getType();
845   if (const auto VT = ftValueTypeFromType(Ty))
846     return Builder.CreateCall(
847         NsanCheckValue[*VT],
848         {V, ShadowV, Loc.getType(Context), Loc.getValue(IntptrTy, Builder)});
849 
850   if (Ty->isVectorTy()) {
851     auto *VecTy = cast<VectorType>(Ty);
852     // We currently skip scalable vector types in MappingConfig,
853     // thus we should not encounter any such types here.
854     assert(!VecTy->isScalableTy() &&
855            "Scalable vector types are not supported yet");
856     Value *CheckResult = nullptr;
857     for (int I = 0, E = VecTy->getElementCount().getFixedValue(); I < E; ++I) {
858       // We resume if any element resumes. Another option would be to create a
859       // vector shuffle with the array of ContinueWithShadow, but that is too
860       // complex.
861       Value *ExtractV = Builder.CreateExtractElement(V, I);
862       Value *ExtractShadowV = Builder.CreateExtractElement(ShadowV, I);
863       Value *ComponentCheckResult =
864           emitCheckInternal(ExtractV, ExtractShadowV, Builder, Loc);
865       CheckResult = CheckResult
866                         ? Builder.CreateOr(CheckResult, ComponentCheckResult)
867                         : ComponentCheckResult;
868     }
869     return CheckResult;
870   }
871   if (Ty->isArrayTy()) {
872     Value *CheckResult = nullptr;
873     for (auto I : seq(Ty->getArrayNumElements())) {
874       Value *ExtractV = Builder.CreateExtractElement(V, I);
875       Value *ExtractShadowV = Builder.CreateExtractElement(ShadowV, I);
876       Value *ComponentCheckResult =
877           emitCheckInternal(ExtractV, ExtractShadowV, Builder, Loc);
878       CheckResult = CheckResult
879                         ? Builder.CreateOr(CheckResult, ComponentCheckResult)
880                         : ComponentCheckResult;
881     }
882     return CheckResult;
883   }
884   if (Ty->isStructTy()) {
885     Value *CheckResult = nullptr;
886     for (auto I : seq(Ty->getStructNumElements())) {
887       if (Config.getExtendedFPType(Ty->getStructElementType(I)) == nullptr)
888         continue; // Only check FT values.
889       Value *ExtractV = Builder.CreateExtractValue(V, I);
890       Value *ExtractShadowV = Builder.CreateExtractElement(ShadowV, I);
891       Value *ComponentCheckResult =
892           emitCheckInternal(ExtractV, ExtractShadowV, Builder, Loc);
893       CheckResult = CheckResult
894                         ? Builder.CreateOr(CheckResult, ComponentCheckResult)
895                         : ComponentCheckResult;
896     }
897     if (!CheckResult)
898       return ConstantInt::get(
899           Builder.getInt32Ty(),
900           static_cast<int>(ContinuationType::ContinueWithShadow));
901     return CheckResult;
902   }
903 
904   llvm_unreachable("not implemented");
905 }
906 
907 // Inserts a runtime check of V against its shadow value ShadowV.
908 // We check values whenever they escape: on return, call, stores, and
909 // insertvalue.
910 // Returns the shadow value that should be used to continue the computations,
911 // depending on the answer from the runtime.
912 // TODO: Should we check on select ? phi ?
913 Value *NumericalStabilitySanitizer::emitCheck(Value *V, Value *ShadowV,
914                                               IRBuilder<> &Builder,
915                                               CheckLoc Loc) {
916   // Do not emit checks for constant values, this is redundant.
917   if (isa<Constant>(V))
918     return ShadowV;
919 
920   if (Instruction *Inst = dyn_cast<Instruction>(V)) {
921     Function *F = Inst->getFunction();
922     if (CheckFunctionsFilter && !CheckFunctionsFilter->match(F->getName())) {
923       return ShadowV;
924     }
925   }
926 
927   Value *CheckResult = emitCheckInternal(V, ShadowV, Builder, Loc);
928   Value *ICmpEQ = Builder.CreateICmpEQ(
929       CheckResult,
930       ConstantInt::get(Builder.getInt32Ty(),
931                        static_cast<int>(ContinuationType::ResumeFromValue)));
932   return Builder.CreateSelect(
933       ICmpEQ, Builder.CreateFPExt(V, Config.getExtendedFPType(V->getType())),
934       ShadowV);
935 }
936 
937 // Inserts a check that fcmp on shadow values are consistent with that on base
938 // values.
939 void NumericalStabilitySanitizer::emitFCmpCheck(FCmpInst &FCmp,
940                                                 const ValueToShadowMap &Map) {
941   if (!ClInstrumentFCmp)
942     return;
943 
944   Function *F = FCmp.getFunction();
945   if (CheckFunctionsFilter && !CheckFunctionsFilter->match(F->getName()))
946     return;
947 
948   Value *LHS = FCmp.getOperand(0);
949   if (Config.getExtendedFPType(LHS->getType()) == nullptr)
950     return;
951   Value *RHS = FCmp.getOperand(1);
952 
953   // Split the basic block. On mismatch, we'll jump to the new basic block with
954   // a call to the runtime for error reporting.
955   BasicBlock *FCmpBB = FCmp.getParent();
956   BasicBlock *NextBB = FCmpBB->splitBasicBlock(FCmp.getNextNode());
957   // Remove the newly created terminator unconditional branch.
958   FCmpBB->back().eraseFromParent();
959   BasicBlock *FailBB =
960       BasicBlock::Create(Context, "", FCmpBB->getParent(), NextBB);
961 
962   // Create the shadow fcmp and comparison between the fcmps.
963   IRBuilder<> FCmpBuilder(FCmpBB);
964   FCmpBuilder.SetCurrentDebugLocation(FCmp.getDebugLoc());
965   Value *ShadowLHS = Map.getShadow(LHS);
966   Value *ShadowRHS = Map.getShadow(RHS);
967   // See comment on ClTruncateFCmpEq.
968   if (FCmp.isEquality() && ClTruncateFCmpEq) {
969     Type *Ty = ShadowLHS->getType();
970     ShadowLHS = FCmpBuilder.CreateFPExt(
971         FCmpBuilder.CreateFPTrunc(ShadowLHS, LHS->getType()), Ty);
972     ShadowRHS = FCmpBuilder.CreateFPExt(
973         FCmpBuilder.CreateFPTrunc(ShadowRHS, RHS->getType()), Ty);
974   }
975   Value *ShadowFCmp =
976       FCmpBuilder.CreateFCmp(FCmp.getPredicate(), ShadowLHS, ShadowRHS);
977   Value *OriginalAndShadowFcmpMatch =
978       FCmpBuilder.CreateICmpEQ(&FCmp, ShadowFCmp);
979 
980   if (OriginalAndShadowFcmpMatch->getType()->isVectorTy()) {
981     // If we have a vector type, `OriginalAndShadowFcmpMatch` is a vector of i1,
982     // where an element is true if the corresponding elements in original and
983     // shadow are the same. We want all elements to be 1.
984     OriginalAndShadowFcmpMatch =
985         FCmpBuilder.CreateAndReduce(OriginalAndShadowFcmpMatch);
986   }
987 
988   // Use MDBuilder(*C).createLikelyBranchWeights() because "match" is the common
989   // case.
990   FCmpBuilder.CreateCondBr(OriginalAndShadowFcmpMatch, NextBB, FailBB,
991                            MDBuilder(Context).createLikelyBranchWeights());
992 
993   // Fill in FailBB.
994   IRBuilder<> FailBuilder(FailBB);
995   FailBuilder.SetCurrentDebugLocation(FCmp.getDebugLoc());
996 
997   const auto EmitFailCall = [this, &FCmp, &FCmpBuilder,
998                              &FailBuilder](Value *L, Value *R, Value *ShadowL,
999                                            Value *ShadowR, Value *Result,
1000                                            Value *ShadowResult) {
1001     Type *FT = L->getType();
1002     FunctionCallee *Callee = nullptr;
1003     if (FT->isFloatTy()) {
1004       Callee = &(NsanFCmpFail[kFloat]);
1005     } else if (FT->isDoubleTy()) {
1006       Callee = &(NsanFCmpFail[kDouble]);
1007     } else if (FT->isX86_FP80Ty()) {
1008       // TODO: make NsanFCmpFailLongDouble work.
1009       Callee = &(NsanFCmpFail[kDouble]);
1010       L = FailBuilder.CreateFPTrunc(L, Type::getDoubleTy(Context));
1011       R = FailBuilder.CreateFPTrunc(L, Type::getDoubleTy(Context));
1012     } else {
1013       llvm_unreachable("not implemented");
1014     }
1015     FailBuilder.CreateCall(*Callee, {L, R, ShadowL, ShadowR,
1016                                      ConstantInt::get(FCmpBuilder.getInt32Ty(),
1017                                                       FCmp.getPredicate()),
1018                                      Result, ShadowResult});
1019   };
1020   if (LHS->getType()->isVectorTy()) {
1021     for (int I = 0, E = cast<VectorType>(LHS->getType())
1022                             ->getElementCount()
1023                             .getFixedValue();
1024          I < E; ++I) {
1025       Value *ExtractLHS = FailBuilder.CreateExtractElement(LHS, I);
1026       Value *ExtractRHS = FailBuilder.CreateExtractElement(RHS, I);
1027       Value *ExtractShaodwLHS = FailBuilder.CreateExtractElement(ShadowLHS, I);
1028       Value *ExtractShaodwRHS = FailBuilder.CreateExtractElement(ShadowRHS, I);
1029       Value *ExtractFCmp = FailBuilder.CreateExtractElement(&FCmp, I);
1030       Value *ExtractShadowFCmp =
1031           FailBuilder.CreateExtractElement(ShadowFCmp, I);
1032       EmitFailCall(ExtractLHS, ExtractRHS, ExtractShaodwLHS, ExtractShaodwRHS,
1033                    ExtractFCmp, ExtractShadowFCmp);
1034     }
1035   } else {
1036     EmitFailCall(LHS, RHS, ShadowLHS, ShadowRHS, &FCmp, ShadowFCmp);
1037   }
1038   FailBuilder.CreateBr(NextBB);
1039 
1040   ++NumInstrumentedFCmp;
1041 }
1042 
1043 // Creates a shadow phi value for any phi that defines a value of FT type.
1044 PHINode *NumericalStabilitySanitizer::maybeCreateShadowPhi(
1045     PHINode &Phi, const TargetLibraryInfo &TLI) {
1046   Type *VT = Phi.getType();
1047   Type *ExtendedVT = Config.getExtendedFPType(VT);
1048   if (ExtendedVT == nullptr)
1049     return nullptr; // Not an FT value.
1050   // The phi operands are shadow values and are not available when the phi is
1051   // created. They will be populated in a final phase, once all shadow values
1052   // have been created.
1053   PHINode *Shadow = PHINode::Create(ExtendedVT, Phi.getNumIncomingValues());
1054   Shadow->insertAfter(&Phi);
1055   return Shadow;
1056 }
1057 
1058 Value *NumericalStabilitySanitizer::handleLoad(LoadInst &Load, Type *VT,
1059                                                Type *ExtendedVT) {
1060   IRBuilder<> Builder(Load.getNextNode());
1061   Builder.SetCurrentDebugLocation(Load.getDebugLoc());
1062   if (addrPointsToConstantData(Load.getPointerOperand())) {
1063     // No need to look into the shadow memory, the value is a constant. Just
1064     // convert from FT to 2FT.
1065     return Builder.CreateFPExt(&Load, ExtendedVT);
1066   }
1067 
1068   // if (%shadowptr == &)
1069   //    %shadow = fpext %v
1070   // else
1071   //    %shadow = load (ptrcast %shadow_ptr))
1072   // Considered options here:
1073   //  - Have `NsanGetShadowPtrForLoad` return a fixed address
1074   //    &__nsan_unknown_value_shadow_address that is valid to load from, and
1075   //    use a select. This has the advantage that the generated IR is simpler.
1076   //  - Have `NsanGetShadowPtrForLoad` return nullptr.  Because `select` does
1077   //    not short-circuit, dereferencing the returned pointer is no longer an
1078   //    option, have to split and create a separate basic block. This has the
1079   //    advantage of being easier to debug because it crashes if we ever mess
1080   //    up.
1081 
1082   const auto Extents = getMemoryExtentsOrDie(VT);
1083   Value *ShadowPtr = Builder.CreateCall(
1084       NsanGetShadowPtrForLoad[Extents.ValueType],
1085       {Load.getPointerOperand(), ConstantInt::get(IntptrTy, Extents.NumElts)});
1086   ++NumInstrumentedFTLoads;
1087 
1088   // Split the basic block.
1089   BasicBlock *LoadBB = Load.getParent();
1090   BasicBlock *NextBB = LoadBB->splitBasicBlock(Builder.GetInsertPoint());
1091   // Create the two options for creating the shadow value.
1092   BasicBlock *ShadowLoadBB =
1093       BasicBlock::Create(Context, "", LoadBB->getParent(), NextBB);
1094   BasicBlock *FExtBB =
1095       BasicBlock::Create(Context, "", LoadBB->getParent(), NextBB);
1096 
1097   // Replace the newly created terminator unconditional branch by a conditional
1098   // branch to one of the options.
1099   {
1100     LoadBB->back().eraseFromParent();
1101     IRBuilder<> LoadBBBuilder(LoadBB); // The old builder has been invalidated.
1102     LoadBBBuilder.SetCurrentDebugLocation(Load.getDebugLoc());
1103     LoadBBBuilder.CreateCondBr(LoadBBBuilder.CreateIsNull(ShadowPtr), FExtBB,
1104                                ShadowLoadBB);
1105   }
1106 
1107   // Fill in ShadowLoadBB.
1108   IRBuilder<> ShadowLoadBBBuilder(ShadowLoadBB);
1109   ShadowLoadBBBuilder.SetCurrentDebugLocation(Load.getDebugLoc());
1110   Value *ShadowLoad = ShadowLoadBBBuilder.CreateAlignedLoad(
1111       ExtendedVT, ShadowPtr, Align(1), Load.isVolatile());
1112   if (ClCheckLoads) {
1113     ShadowLoad = emitCheck(&Load, ShadowLoad, ShadowLoadBBBuilder,
1114                            CheckLoc::makeLoad(Load.getPointerOperand()));
1115   }
1116   ShadowLoadBBBuilder.CreateBr(NextBB);
1117 
1118   // Fill in FExtBB.
1119   IRBuilder<> FExtBBBuilder(FExtBB);
1120   FExtBBBuilder.SetCurrentDebugLocation(Load.getDebugLoc());
1121   Value *FExt = FExtBBBuilder.CreateFPExt(&Load, ExtendedVT);
1122   FExtBBBuilder.CreateBr(NextBB);
1123 
1124   // The shadow value come from any of the options.
1125   IRBuilder<> NextBBBuilder(&*NextBB->begin());
1126   NextBBBuilder.SetCurrentDebugLocation(Load.getDebugLoc());
1127   PHINode *ShadowPhi = NextBBBuilder.CreatePHI(ExtendedVT, 2);
1128   ShadowPhi->addIncoming(ShadowLoad, ShadowLoadBB);
1129   ShadowPhi->addIncoming(FExt, FExtBB);
1130   return ShadowPhi;
1131 }
1132 
1133 Value *NumericalStabilitySanitizer::handleTrunc(const FPTruncInst &Trunc,
1134                                                 Type *VT, Type *ExtendedVT,
1135                                                 const ValueToShadowMap &Map,
1136                                                 IRBuilder<> &Builder) {
1137   Value *OrigSource = Trunc.getOperand(0);
1138   Type *OrigSourceTy = OrigSource->getType();
1139   Type *ExtendedSourceTy = Config.getExtendedFPType(OrigSourceTy);
1140 
1141   // When truncating:
1142   //  - (A) If the source has a shadow, we truncate from the shadow, else we
1143   //    truncate from the original source.
1144   //  - (B) If the shadow of the source is larger than the shadow of the dest,
1145   //    we still need a truncate. Else, the shadow of the source is the same
1146   //    type as the shadow of the dest (because mappings are non-decreasing), so
1147   //   we don't need to emit a truncate.
1148   // Examples,
1149   //   with a mapping of {f32->f64;f64->f80;f80->f128}
1150   //     fptrunc double   %1 to float     ->  fptrunc x86_fp80 s(%1) to double
1151   //     fptrunc x86_fp80 %1 to float     ->  fptrunc fp128    s(%1) to double
1152   //     fptrunc fp128    %1 to float     ->  fptrunc fp128    %1    to double
1153   //     fptrunc x86_fp80 %1 to double    ->  x86_fp80 s(%1)
1154   //     fptrunc fp128    %1 to double    ->  fptrunc fp128 %1 to x86_fp80
1155   //     fptrunc fp128    %1 to x86_fp80  ->  fp128 %1
1156   //   with a mapping of {f32->f64;f64->f128;f80->f128}
1157   //     fptrunc double   %1 to float     ->  fptrunc fp128    s(%1) to double
1158   //     fptrunc x86_fp80 %1 to float     ->  fptrunc fp128    s(%1) to double
1159   //     fptrunc fp128    %1 to float     ->  fptrunc fp128    %1    to double
1160   //     fptrunc x86_fp80 %1 to double    ->  fp128 %1
1161   //     fptrunc fp128    %1 to double    ->  fp128 %1
1162   //     fptrunc fp128    %1 to x86_fp80  ->  fp128 %1
1163   //   with a mapping of {f32->f32;f64->f32;f80->f64}
1164   //     fptrunc double   %1 to float     ->  float s(%1)
1165   //     fptrunc x86_fp80 %1 to float     ->  fptrunc double    s(%1) to float
1166   //     fptrunc fp128    %1 to float     ->  fptrunc fp128     %1    to float
1167   //     fptrunc x86_fp80 %1 to double    ->  fptrunc double    s(%1) to float
1168   //     fptrunc fp128    %1 to double    ->  fptrunc fp128     %1    to float
1169   //     fptrunc fp128    %1 to x86_fp80  ->  fptrunc fp128     %1    to double
1170 
1171   // See (A) above.
1172   Value *Source = ExtendedSourceTy ? Map.getShadow(OrigSource) : OrigSource;
1173   Type *SourceTy = ExtendedSourceTy ? ExtendedSourceTy : OrigSourceTy;
1174   // See (B) above.
1175   if (SourceTy == ExtendedVT)
1176     return Source;
1177 
1178   return Builder.CreateFPTrunc(Source, ExtendedVT);
1179 }
1180 
1181 Value *NumericalStabilitySanitizer::handleExt(const FPExtInst &Ext, Type *VT,
1182                                               Type *ExtendedVT,
1183                                               const ValueToShadowMap &Map,
1184                                               IRBuilder<> &Builder) {
1185   Value *OrigSource = Ext.getOperand(0);
1186   Type *OrigSourceTy = OrigSource->getType();
1187   Type *ExtendedSourceTy = Config.getExtendedFPType(OrigSourceTy);
1188   // When extending:
1189   //  - (A) If the source has a shadow, we extend from the shadow, else we
1190   //    extend from the original source.
1191   //  - (B) If the shadow of the dest is larger than the shadow of the source,
1192   //    we still need an extend. Else, the shadow of the source is the same
1193   //    type as the shadow of the dest (because mappings are non-decreasing), so
1194   //    we don't need to emit an extend.
1195   // Examples,
1196   //   with a mapping of {f32->f64;f64->f80;f80->f128}
1197   //     fpext half    %1 to float     ->  fpext half     %1    to double
1198   //     fpext half    %1 to double    ->  fpext half     %1    to x86_fp80
1199   //     fpext half    %1 to x86_fp80  ->  fpext half     %1    to fp128
1200   //     fpext float   %1 to double    ->  double s(%1)
1201   //     fpext float   %1 to x86_fp80  ->  fpext double   s(%1) to fp128
1202   //     fpext double  %1 to x86_fp80  ->  fpext x86_fp80 s(%1) to fp128
1203   //   with a mapping of {f32->f64;f64->f128;f80->f128}
1204   //     fpext half    %1 to float     ->  fpext half     %1    to double
1205   //     fpext half    %1 to double    ->  fpext half     %1    to fp128
1206   //     fpext half    %1 to x86_fp80  ->  fpext half     %1    to fp128
1207   //     fpext float   %1 to double    ->  fpext double   s(%1) to fp128
1208   //     fpext float   %1 to x86_fp80  ->  fpext double   s(%1) to fp128
1209   //     fpext double  %1 to x86_fp80  ->  fp128 s(%1)
1210   //   with a mapping of {f32->f32;f64->f32;f80->f64}
1211   //     fpext half    %1 to float     ->  fpext half     %1    to float
1212   //     fpext half    %1 to double    ->  fpext half     %1    to float
1213   //     fpext half    %1 to x86_fp80  ->  fpext half     %1    to double
1214   //     fpext float   %1 to double    ->  s(%1)
1215   //     fpext float   %1 to x86_fp80  ->  fpext float    s(%1) to double
1216   //     fpext double  %1 to x86_fp80  ->  fpext float    s(%1) to double
1217 
1218   // See (A) above.
1219   Value *Source = ExtendedSourceTy ? Map.getShadow(OrigSource) : OrigSource;
1220   Type *SourceTy = ExtendedSourceTy ? ExtendedSourceTy : OrigSourceTy;
1221   // See (B) above.
1222   if (SourceTy == ExtendedVT)
1223     return Source;
1224 
1225   return Builder.CreateFPExt(Source, ExtendedVT);
1226 }
1227 
1228 namespace {
1229 // TODO: This should be tablegen-ed.
1230 struct KnownIntrinsic {
1231   struct WidenedIntrinsic {
1232     const char *NarrowName;
1233     Intrinsic::ID ID; // wide id.
1234     using FnTypeFactory = FunctionType *(*)(LLVMContext &);
1235     FnTypeFactory MakeFnTy;
1236   };
1237 
1238   static const char *get(LibFunc LFunc);
1239 
1240   // Given an intrinsic with an `FT` argument, try to find a wider intrinsic
1241   // that applies the same operation on the shadow argument.
1242   // Options are:
1243   //  - pass in the ID and full function type,
1244   //  - pass in the name, which includes the function type through mangling.
1245   static const WidenedIntrinsic *widen(StringRef Name);
1246 
1247 private:
1248   struct LFEntry {
1249     LibFunc LFunc;
1250     const char *IntrinsicName;
1251   };
1252   static const LFEntry kLibfuncIntrinsics[];
1253 
1254   static const WidenedIntrinsic kWidenedIntrinsics[];
1255 };
1256 } // namespace
1257 
1258 static FunctionType *makeDoubleDouble(LLVMContext &C) {
1259   return FunctionType::get(Type::getDoubleTy(C), {Type::getDoubleTy(C)}, false);
1260 }
1261 
1262 static FunctionType *makeX86FP80X86FP80(LLVMContext &C) {
1263   return FunctionType::get(Type::getX86_FP80Ty(C), {Type::getX86_FP80Ty(C)},
1264                            false);
1265 }
1266 
1267 static FunctionType *makeDoubleDoubleI32(LLVMContext &C) {
1268   return FunctionType::get(Type::getDoubleTy(C),
1269                            {Type::getDoubleTy(C), Type::getInt32Ty(C)}, false);
1270 }
1271 
1272 static FunctionType *makeX86FP80X86FP80I32(LLVMContext &C) {
1273   return FunctionType::get(Type::getX86_FP80Ty(C),
1274                            {Type::getX86_FP80Ty(C), Type::getInt32Ty(C)},
1275                            false);
1276 }
1277 
1278 static FunctionType *makeDoubleDoubleDouble(LLVMContext &C) {
1279   return FunctionType::get(Type::getDoubleTy(C),
1280                            {Type::getDoubleTy(C), Type::getDoubleTy(C)}, false);
1281 }
1282 
1283 static FunctionType *makeX86FP80X86FP80X86FP80(LLVMContext &C) {
1284   return FunctionType::get(Type::getX86_FP80Ty(C),
1285                            {Type::getX86_FP80Ty(C), Type::getX86_FP80Ty(C)},
1286                            false);
1287 }
1288 
1289 static FunctionType *makeDoubleDoubleDoubleDouble(LLVMContext &C) {
1290   return FunctionType::get(
1291       Type::getDoubleTy(C),
1292       {Type::getDoubleTy(C), Type::getDoubleTy(C), Type::getDoubleTy(C)},
1293       false);
1294 }
1295 
1296 static FunctionType *makeX86FP80X86FP80X86FP80X86FP80(LLVMContext &C) {
1297   return FunctionType::get(
1298       Type::getX86_FP80Ty(C),
1299       {Type::getX86_FP80Ty(C), Type::getX86_FP80Ty(C), Type::getX86_FP80Ty(C)},
1300       false);
1301 }
1302 
1303 const KnownIntrinsic::WidenedIntrinsic KnownIntrinsic::kWidenedIntrinsics[] = {
1304     // TODO: Right now we ignore vector intrinsics.
1305     // This is hard because we have to model the semantics of the intrinsics,
1306     // e.g. llvm.x86.sse2.min.sd means extract first element, min, insert back.
1307     // Intrinsics that take any non-vector FT types:
1308     // NOTE: Right now because of
1309     // https://github.com/llvm/llvm-project/issues/44744
1310     // for f128 we need to use makeX86FP80X86FP80 (go to a lower precision and
1311     // come back).
1312     {"llvm.sqrt.f32", Intrinsic::sqrt, makeDoubleDouble},
1313     {"llvm.sqrt.f64", Intrinsic::sqrt, makeX86FP80X86FP80},
1314     {"llvm.sqrt.f80", Intrinsic::sqrt, makeX86FP80X86FP80},
1315     {"llvm.powi.f32", Intrinsic::powi, makeDoubleDoubleI32},
1316     {"llvm.powi.f64", Intrinsic::powi, makeX86FP80X86FP80I32},
1317     {"llvm.powi.f80", Intrinsic::powi, makeX86FP80X86FP80I32},
1318     {"llvm.sin.f32", Intrinsic::sin, makeDoubleDouble},
1319     {"llvm.sin.f64", Intrinsic::sin, makeX86FP80X86FP80},
1320     {"llvm.sin.f80", Intrinsic::sin, makeX86FP80X86FP80},
1321     {"llvm.cos.f32", Intrinsic::cos, makeDoubleDouble},
1322     {"llvm.cos.f64", Intrinsic::cos, makeX86FP80X86FP80},
1323     {"llvm.cos.f80", Intrinsic::cos, makeX86FP80X86FP80},
1324     {"llvm.pow.f32", Intrinsic::pow, makeDoubleDoubleDouble},
1325     {"llvm.pow.f64", Intrinsic::pow, makeX86FP80X86FP80X86FP80},
1326     {"llvm.pow.f80", Intrinsic::pow, makeX86FP80X86FP80X86FP80},
1327     {"llvm.exp.f32", Intrinsic::exp, makeDoubleDouble},
1328     {"llvm.exp.f64", Intrinsic::exp, makeX86FP80X86FP80},
1329     {"llvm.exp.f80", Intrinsic::exp, makeX86FP80X86FP80},
1330     {"llvm.exp2.f32", Intrinsic::exp2, makeDoubleDouble},
1331     {"llvm.exp2.f64", Intrinsic::exp2, makeX86FP80X86FP80},
1332     {"llvm.exp2.f80", Intrinsic::exp2, makeX86FP80X86FP80},
1333     {"llvm.log.f32", Intrinsic::log, makeDoubleDouble},
1334     {"llvm.log.f64", Intrinsic::log, makeX86FP80X86FP80},
1335     {"llvm.log.f80", Intrinsic::log, makeX86FP80X86FP80},
1336     {"llvm.log10.f32", Intrinsic::log10, makeDoubleDouble},
1337     {"llvm.log10.f64", Intrinsic::log10, makeX86FP80X86FP80},
1338     {"llvm.log10.f80", Intrinsic::log10, makeX86FP80X86FP80},
1339     {"llvm.log2.f32", Intrinsic::log2, makeDoubleDouble},
1340     {"llvm.log2.f64", Intrinsic::log2, makeX86FP80X86FP80},
1341     {"llvm.log2.f80", Intrinsic::log2, makeX86FP80X86FP80},
1342     {"llvm.fma.f32", Intrinsic::fma, makeDoubleDoubleDoubleDouble},
1343 
1344     {"llvm.fmuladd.f32", Intrinsic::fmuladd, makeDoubleDoubleDoubleDouble},
1345 
1346     {"llvm.fma.f64", Intrinsic::fma, makeX86FP80X86FP80X86FP80X86FP80},
1347 
1348     {"llvm.fmuladd.f64", Intrinsic::fma, makeX86FP80X86FP80X86FP80X86FP80},
1349 
1350     {"llvm.fma.f80", Intrinsic::fma, makeX86FP80X86FP80X86FP80X86FP80},
1351     {"llvm.fabs.f32", Intrinsic::fabs, makeDoubleDouble},
1352     {"llvm.fabs.f64", Intrinsic::fabs, makeX86FP80X86FP80},
1353     {"llvm.fabs.f80", Intrinsic::fabs, makeX86FP80X86FP80},
1354     {"llvm.minnum.f32", Intrinsic::minnum, makeDoubleDoubleDouble},
1355     {"llvm.minnum.f64", Intrinsic::minnum, makeX86FP80X86FP80X86FP80},
1356     {"llvm.minnum.f80", Intrinsic::minnum, makeX86FP80X86FP80X86FP80},
1357     {"llvm.maxnum.f32", Intrinsic::maxnum, makeDoubleDoubleDouble},
1358     {"llvm.maxnum.f64", Intrinsic::maxnum, makeX86FP80X86FP80X86FP80},
1359     {"llvm.maxnum.f80", Intrinsic::maxnum, makeX86FP80X86FP80X86FP80},
1360     {"llvm.minimum.f32", Intrinsic::minimum, makeDoubleDoubleDouble},
1361     {"llvm.minimum.f64", Intrinsic::minimum, makeX86FP80X86FP80X86FP80},
1362     {"llvm.minimum.f80", Intrinsic::minimum, makeX86FP80X86FP80X86FP80},
1363     {"llvm.maximum.f32", Intrinsic::maximum, makeDoubleDoubleDouble},
1364     {"llvm.maximum.f64", Intrinsic::maximum, makeX86FP80X86FP80X86FP80},
1365     {"llvm.maximum.f80", Intrinsic::maximum, makeX86FP80X86FP80X86FP80},
1366     {"llvm.copysign.f32", Intrinsic::copysign, makeDoubleDoubleDouble},
1367     {"llvm.copysign.f64", Intrinsic::copysign, makeX86FP80X86FP80X86FP80},
1368     {"llvm.copysign.f80", Intrinsic::copysign, makeX86FP80X86FP80X86FP80},
1369     {"llvm.floor.f32", Intrinsic::floor, makeDoubleDouble},
1370     {"llvm.floor.f64", Intrinsic::floor, makeX86FP80X86FP80},
1371     {"llvm.floor.f80", Intrinsic::floor, makeX86FP80X86FP80},
1372     {"llvm.ceil.f32", Intrinsic::ceil, makeDoubleDouble},
1373     {"llvm.ceil.f64", Intrinsic::ceil, makeX86FP80X86FP80},
1374     {"llvm.ceil.f80", Intrinsic::ceil, makeX86FP80X86FP80},
1375     {"llvm.trunc.f32", Intrinsic::trunc, makeDoubleDouble},
1376     {"llvm.trunc.f64", Intrinsic::trunc, makeX86FP80X86FP80},
1377     {"llvm.trunc.f80", Intrinsic::trunc, makeX86FP80X86FP80},
1378     {"llvm.rint.f32", Intrinsic::rint, makeDoubleDouble},
1379     {"llvm.rint.f64", Intrinsic::rint, makeX86FP80X86FP80},
1380     {"llvm.rint.f80", Intrinsic::rint, makeX86FP80X86FP80},
1381     {"llvm.nearbyint.f32", Intrinsic::nearbyint, makeDoubleDouble},
1382     {"llvm.nearbyint.f64", Intrinsic::nearbyint, makeX86FP80X86FP80},
1383     {"llvm.nearbyin80f64", Intrinsic::nearbyint, makeX86FP80X86FP80},
1384     {"llvm.round.f32", Intrinsic::round, makeDoubleDouble},
1385     {"llvm.round.f64", Intrinsic::round, makeX86FP80X86FP80},
1386     {"llvm.round.f80", Intrinsic::round, makeX86FP80X86FP80},
1387     {"llvm.lround.f32", Intrinsic::lround, makeDoubleDouble},
1388     {"llvm.lround.f64", Intrinsic::lround, makeX86FP80X86FP80},
1389     {"llvm.lround.f80", Intrinsic::lround, makeX86FP80X86FP80},
1390     {"llvm.llround.f32", Intrinsic::llround, makeDoubleDouble},
1391     {"llvm.llround.f64", Intrinsic::llround, makeX86FP80X86FP80},
1392     {"llvm.llround.f80", Intrinsic::llround, makeX86FP80X86FP80},
1393     {"llvm.lrint.f32", Intrinsic::lrint, makeDoubleDouble},
1394     {"llvm.lrint.f64", Intrinsic::lrint, makeX86FP80X86FP80},
1395     {"llvm.lrint.f80", Intrinsic::lrint, makeX86FP80X86FP80},
1396     {"llvm.llrint.f32", Intrinsic::llrint, makeDoubleDouble},
1397     {"llvm.llrint.f64", Intrinsic::llrint, makeX86FP80X86FP80},
1398     {"llvm.llrint.f80", Intrinsic::llrint, makeX86FP80X86FP80},
1399 };
1400 
1401 const KnownIntrinsic::LFEntry KnownIntrinsic::kLibfuncIntrinsics[] = {
1402     {LibFunc_sqrtf, "llvm.sqrt.f32"},
1403     {LibFunc_sqrt, "llvm.sqrt.f64"},
1404     {LibFunc_sqrtl, "llvm.sqrt.f80"},
1405     {LibFunc_sinf, "llvm.sin.f32"},
1406     {LibFunc_sin, "llvm.sin.f64"},
1407     {LibFunc_sinl, "llvm.sin.f80"},
1408     {LibFunc_cosf, "llvm.cos.f32"},
1409     {LibFunc_cos, "llvm.cos.f64"},
1410     {LibFunc_cosl, "llvm.cos.f80"},
1411     {LibFunc_powf, "llvm.pow.f32"},
1412     {LibFunc_pow, "llvm.pow.f64"},
1413     {LibFunc_powl, "llvm.pow.f80"},
1414     {LibFunc_expf, "llvm.exp.f32"},
1415     {LibFunc_exp, "llvm.exp.f64"},
1416     {LibFunc_expl, "llvm.exp.f80"},
1417     {LibFunc_exp2f, "llvm.exp2.f32"},
1418     {LibFunc_exp2, "llvm.exp2.f64"},
1419     {LibFunc_exp2l, "llvm.exp2.f80"},
1420     {LibFunc_logf, "llvm.log.f32"},
1421     {LibFunc_log, "llvm.log.f64"},
1422     {LibFunc_logl, "llvm.log.f80"},
1423     {LibFunc_log10f, "llvm.log10.f32"},
1424     {LibFunc_log10, "llvm.log10.f64"},
1425     {LibFunc_log10l, "llvm.log10.f80"},
1426     {LibFunc_log2f, "llvm.log2.f32"},
1427     {LibFunc_log2, "llvm.log2.f64"},
1428     {LibFunc_log2l, "llvm.log2.f80"},
1429     {LibFunc_fabsf, "llvm.fabs.f32"},
1430     {LibFunc_fabs, "llvm.fabs.f64"},
1431     {LibFunc_fabsl, "llvm.fabs.f80"},
1432     {LibFunc_copysignf, "llvm.copysign.f32"},
1433     {LibFunc_copysign, "llvm.copysign.f64"},
1434     {LibFunc_copysignl, "llvm.copysign.f80"},
1435     {LibFunc_floorf, "llvm.floor.f32"},
1436     {LibFunc_floor, "llvm.floor.f64"},
1437     {LibFunc_floorl, "llvm.floor.f80"},
1438     {LibFunc_fmaxf, "llvm.maxnum.f32"},
1439     {LibFunc_fmax, "llvm.maxnum.f64"},
1440     {LibFunc_fmaxl, "llvm.maxnum.f80"},
1441     {LibFunc_fminf, "llvm.minnum.f32"},
1442     {LibFunc_fmin, "llvm.minnum.f64"},
1443     {LibFunc_fminl, "llvm.minnum.f80"},
1444     {LibFunc_ceilf, "llvm.ceil.f32"},
1445     {LibFunc_ceil, "llvm.ceil.f64"},
1446     {LibFunc_ceill, "llvm.ceil.f80"},
1447     {LibFunc_truncf, "llvm.trunc.f32"},
1448     {LibFunc_trunc, "llvm.trunc.f64"},
1449     {LibFunc_truncl, "llvm.trunc.f80"},
1450     {LibFunc_rintf, "llvm.rint.f32"},
1451     {LibFunc_rint, "llvm.rint.f64"},
1452     {LibFunc_rintl, "llvm.rint.f80"},
1453     {LibFunc_nearbyintf, "llvm.nearbyint.f32"},
1454     {LibFunc_nearbyint, "llvm.nearbyint.f64"},
1455     {LibFunc_nearbyintl, "llvm.nearbyint.f80"},
1456     {LibFunc_roundf, "llvm.round.f32"},
1457     {LibFunc_round, "llvm.round.f64"},
1458     {LibFunc_roundl, "llvm.round.f80"},
1459 };
1460 
1461 const char *KnownIntrinsic::get(LibFunc LFunc) {
1462   for (const auto &E : kLibfuncIntrinsics) {
1463     if (E.LFunc == LFunc)
1464       return E.IntrinsicName;
1465   }
1466   return nullptr;
1467 }
1468 
1469 const KnownIntrinsic::WidenedIntrinsic *KnownIntrinsic::widen(StringRef Name) {
1470   for (const auto &E : kWidenedIntrinsics) {
1471     if (E.NarrowName == Name)
1472       return &E;
1473   }
1474   return nullptr;
1475 }
1476 
1477 // Returns the name of the LLVM intrinsic corresponding to the given function.
1478 static const char *getIntrinsicFromLibfunc(Function &Fn, Type *VT,
1479                                            const TargetLibraryInfo &TLI) {
1480   LibFunc LFunc;
1481   if (!TLI.getLibFunc(Fn, LFunc))
1482     return nullptr;
1483 
1484   if (const char *Name = KnownIntrinsic::get(LFunc))
1485     return Name;
1486 
1487   LLVM_DEBUG(errs() << "TODO: LibFunc: " << TLI.getName(LFunc) << "\n");
1488   return nullptr;
1489 }
1490 
1491 // Try to handle a known function call.
1492 Value *NumericalStabilitySanitizer::maybeHandleKnownCallBase(
1493     CallBase &Call, Type *VT, Type *ExtendedVT, const TargetLibraryInfo &TLI,
1494     const ValueToShadowMap &Map, IRBuilder<> &Builder) {
1495   Function *Fn = Call.getCalledFunction();
1496   if (Fn == nullptr)
1497     return nullptr;
1498 
1499   Intrinsic::ID WidenedId = Intrinsic::ID();
1500   FunctionType *WidenedFnTy = nullptr;
1501   if (const auto ID = Fn->getIntrinsicID()) {
1502     const auto *Widened = KnownIntrinsic::widen(Fn->getName());
1503     if (Widened) {
1504       WidenedId = Widened->ID;
1505       WidenedFnTy = Widened->MakeFnTy(Context);
1506     } else {
1507       // If we don't know how to widen the intrinsic, we have no choice but to
1508       // call the non-wide version on a truncated shadow and extend again
1509       // afterwards.
1510       WidenedId = ID;
1511       WidenedFnTy = Fn->getFunctionType();
1512     }
1513   } else if (const char *Name = getIntrinsicFromLibfunc(*Fn, VT, TLI)) {
1514     // We might have a call to a library function that we can replace with a
1515     // wider Intrinsic.
1516     const auto *Widened = KnownIntrinsic::widen(Name);
1517     assert(Widened && "make sure KnownIntrinsic entries are consistent");
1518     WidenedId = Widened->ID;
1519     WidenedFnTy = Widened->MakeFnTy(Context);
1520   } else {
1521     // This is not a known library function or intrinsic.
1522     return nullptr;
1523   }
1524 
1525   // Check that the widened intrinsic is valid.
1526   SmallVector<Intrinsic::IITDescriptor, 8> Table;
1527   getIntrinsicInfoTableEntries(WidenedId, Table);
1528   SmallVector<Type *, 4> ArgTys;
1529   ArrayRef<Intrinsic::IITDescriptor> TableRef = Table;
1530   [[maybe_unused]] Intrinsic::MatchIntrinsicTypesResult MatchResult =
1531       Intrinsic::matchIntrinsicSignature(WidenedFnTy, TableRef, ArgTys);
1532   assert(MatchResult == Intrinsic::MatchIntrinsicTypes_Match &&
1533          "invalid widened intrinsic");
1534   // For known intrinsic functions, we create a second call to the same
1535   // intrinsic with a different type.
1536   SmallVector<Value *, 4> Args;
1537   // The last operand is the intrinsic itself, skip it.
1538   for (unsigned I = 0, E = Call.getNumOperands() - 1; I < E; ++I) {
1539     Value *Arg = Call.getOperand(I);
1540     Type *OrigArgTy = Arg->getType();
1541     Type *IntrinsicArgTy = WidenedFnTy->getParamType(I);
1542     if (OrigArgTy == IntrinsicArgTy) {
1543       Args.push_back(Arg); // The arg is passed as is.
1544       continue;
1545     }
1546     Type *ShadowArgTy = Config.getExtendedFPType(Arg->getType());
1547     assert(ShadowArgTy &&
1548            "don't know how to get the shadow value for a non-FT");
1549     Value *Shadow = Map.getShadow(Arg);
1550     if (ShadowArgTy == IntrinsicArgTy) {
1551       // The shadow is the right type for the intrinsic.
1552       assert(Shadow->getType() == ShadowArgTy);
1553       Args.push_back(Shadow);
1554       continue;
1555     }
1556     // There is no intrinsic with his level of precision, truncate the shadow.
1557     Args.push_back(Builder.CreateFPTrunc(Shadow, IntrinsicArgTy));
1558   }
1559   Value *IntrinsicCall = Builder.CreateIntrinsic(WidenedId, ArgTys, Args);
1560   return WidenedFnTy->getReturnType() == ExtendedVT
1561              ? IntrinsicCall
1562              : Builder.CreateFPExt(IntrinsicCall, ExtendedVT);
1563 }
1564 
1565 // Handle a CallBase, i.e. a function call, an inline asm sequence, or an
1566 // invoke.
1567 Value *NumericalStabilitySanitizer::handleCallBase(CallBase &Call, Type *VT,
1568                                                    Type *ExtendedVT,
1569                                                    const TargetLibraryInfo &TLI,
1570                                                    const ValueToShadowMap &Map,
1571                                                    IRBuilder<> &Builder) {
1572   // We cannot look inside inline asm, just expand the result again.
1573   if (Call.isInlineAsm())
1574     return Builder.CreateFPExt(&Call, ExtendedVT);
1575 
1576   // Intrinsics and library functions (e.g. sin, exp) are handled
1577   // specifically, because we know their semantics and can do better than
1578   // blindly calling them (e.g. compute the sinus in the actual shadow domain).
1579   if (Value *V =
1580           maybeHandleKnownCallBase(Call, VT, ExtendedVT, TLI, Map, Builder))
1581     return V;
1582 
1583   // If the return tag matches that of the called function, read the extended
1584   // return value from the shadow ret ptr. Else, just extend the return value.
1585   Value *L =
1586       Builder.CreateLoad(IntptrTy, NsanShadowRetTag, /*isVolatile=*/false);
1587   Value *HasShadowRet = Builder.CreateICmpEQ(
1588       L, Builder.CreatePtrToInt(Call.getCalledOperand(), IntptrTy));
1589 
1590   Value *ShadowRetVal = Builder.CreateLoad(
1591       ExtendedVT,
1592       Builder.CreateConstGEP2_64(NsanShadowRetType, NsanShadowRetPtr, 0, 0),
1593       /*isVolatile=*/false);
1594   Value *Shadow = Builder.CreateSelect(HasShadowRet, ShadowRetVal,
1595                                        Builder.CreateFPExt(&Call, ExtendedVT));
1596   ++NumInstrumentedFTCalls;
1597   return Shadow;
1598 }
1599 
1600 // Creates a shadow value for the given FT value. At that point all operands are
1601 // guaranteed to be available.
1602 Value *NumericalStabilitySanitizer::createShadowValueWithOperandsAvailable(
1603     Instruction &Inst, const TargetLibraryInfo &TLI,
1604     const ValueToShadowMap &Map) {
1605   Type *VT = Inst.getType();
1606   Type *ExtendedVT = Config.getExtendedFPType(VT);
1607   assert(ExtendedVT != nullptr && "trying to create a shadow for a non-FT");
1608 
1609   if (auto *Load = dyn_cast<LoadInst>(&Inst))
1610     return handleLoad(*Load, VT, ExtendedVT);
1611 
1612   if (auto *Call = dyn_cast<CallInst>(&Inst)) {
1613     // Insert after the call.
1614     BasicBlock::iterator It(Inst);
1615     IRBuilder<> Builder(Call->getParent(), ++It);
1616     Builder.SetCurrentDebugLocation(Call->getDebugLoc());
1617     return handleCallBase(*Call, VT, ExtendedVT, TLI, Map, Builder);
1618   }
1619 
1620   if (auto *Invoke = dyn_cast<InvokeInst>(&Inst)) {
1621     // The Invoke terminates the basic block, create a new basic block in
1622     // between the successful invoke and the next block.
1623     BasicBlock *InvokeBB = Invoke->getParent();
1624     BasicBlock *NextBB = Invoke->getNormalDest();
1625     BasicBlock *NewBB =
1626         BasicBlock::Create(Context, "", NextBB->getParent(), NextBB);
1627     Inst.replaceSuccessorWith(NextBB, NewBB);
1628 
1629     IRBuilder<> Builder(NewBB);
1630     Builder.SetCurrentDebugLocation(Invoke->getDebugLoc());
1631     Value *Shadow = handleCallBase(*Invoke, VT, ExtendedVT, TLI, Map, Builder);
1632     Builder.CreateBr(NextBB);
1633     NewBB->replaceSuccessorsPhiUsesWith(InvokeBB, NewBB);
1634     return Shadow;
1635   }
1636 
1637   IRBuilder<> Builder(Inst.getNextNode());
1638   Builder.SetCurrentDebugLocation(Inst.getDebugLoc());
1639 
1640   if (auto *Trunc = dyn_cast<FPTruncInst>(&Inst))
1641     return handleTrunc(*Trunc, VT, ExtendedVT, Map, Builder);
1642   if (auto *Ext = dyn_cast<FPExtInst>(&Inst))
1643     return handleExt(*Ext, VT, ExtendedVT, Map, Builder);
1644 
1645   if (auto *UnaryOp = dyn_cast<UnaryOperator>(&Inst))
1646     return Builder.CreateUnOp(UnaryOp->getOpcode(),
1647                               Map.getShadow(UnaryOp->getOperand(0)));
1648 
1649   if (auto *BinOp = dyn_cast<BinaryOperator>(&Inst))
1650     return Builder.CreateBinOp(BinOp->getOpcode(),
1651                                Map.getShadow(BinOp->getOperand(0)),
1652                                Map.getShadow(BinOp->getOperand(1)));
1653 
1654   if (isa<UIToFPInst>(&Inst) || isa<SIToFPInst>(&Inst)) {
1655     auto *Cast = dyn_cast<CastInst>(&Inst);
1656     return Builder.CreateCast(Cast->getOpcode(), Cast->getOperand(0),
1657                               ExtendedVT);
1658   }
1659 
1660   if (auto *S = dyn_cast<SelectInst>(&Inst))
1661     return Builder.CreateSelect(S->getCondition(),
1662                                 Map.getShadow(S->getTrueValue()),
1663                                 Map.getShadow(S->getFalseValue()));
1664 
1665   if (auto *Extract = dyn_cast<ExtractElementInst>(&Inst))
1666     return Builder.CreateExtractElement(
1667         Map.getShadow(Extract->getVectorOperand()), Extract->getIndexOperand());
1668 
1669   if (auto *Insert = dyn_cast<InsertElementInst>(&Inst))
1670     return Builder.CreateInsertElement(Map.getShadow(Insert->getOperand(0)),
1671                                        Map.getShadow(Insert->getOperand(1)),
1672                                        Insert->getOperand(2));
1673 
1674   if (auto *Shuffle = dyn_cast<ShuffleVectorInst>(&Inst))
1675     return Builder.CreateShuffleVector(Map.getShadow(Shuffle->getOperand(0)),
1676                                        Map.getShadow(Shuffle->getOperand(1)),
1677                                        Shuffle->getShuffleMask());
1678   // TODO: We could make aggregate object first class citizens. For now we
1679   // just extend the extracted value.
1680   if (auto *Extract = dyn_cast<ExtractValueInst>(&Inst))
1681     return Builder.CreateFPExt(Extract, ExtendedVT);
1682 
1683   if (auto *BC = dyn_cast<BitCastInst>(&Inst))
1684     return Builder.CreateFPExt(BC, ExtendedVT);
1685 
1686   report_fatal_error("Unimplemented support for " +
1687                      Twine(Inst.getOpcodeName()));
1688 }
1689 
1690 // Creates a shadow value for an instruction that defines a value of FT type.
1691 // FT operands that do not already have shadow values are created recursively.
1692 // The DFS is guaranteed to not loop as phis and arguments already have
1693 // shadows.
1694 void NumericalStabilitySanitizer::maybeCreateShadowValue(
1695     Instruction &Root, const TargetLibraryInfo &TLI, ValueToShadowMap &Map) {
1696   Type *VT = Root.getType();
1697   Type *ExtendedVT = Config.getExtendedFPType(VT);
1698   if (ExtendedVT == nullptr)
1699     return; // Not an FT value.
1700 
1701   if (Map.hasShadow(&Root))
1702     return; // Shadow already exists.
1703 
1704   assert(!isa<PHINode>(Root) && "phi nodes should already have shadows");
1705 
1706   std::vector<Instruction *> DfsStack(1, &Root);
1707   while (!DfsStack.empty()) {
1708     // Ensure that all operands to the instruction have shadows before
1709     // proceeding.
1710     Instruction *I = DfsStack.back();
1711     // The shadow for the instruction might have been created deeper in the DFS,
1712     // see `forward_use_with_two_uses` test.
1713     if (Map.hasShadow(I)) {
1714       DfsStack.pop_back();
1715       continue;
1716     }
1717 
1718     bool MissingShadow = false;
1719     for (Value *Op : I->operands()) {
1720       Type *VT = Op->getType();
1721       if (!Config.getExtendedFPType(VT))
1722         continue; // Not an FT value.
1723       if (Map.hasShadow(Op))
1724         continue; // Shadow is already available.
1725       MissingShadow = true;
1726       DfsStack.push_back(cast<Instruction>(Op));
1727     }
1728     if (MissingShadow)
1729       continue; // Process operands and come back to this instruction later.
1730 
1731     // All operands have shadows. Create a shadow for the current value.
1732     Value *Shadow = createShadowValueWithOperandsAvailable(*I, TLI, Map);
1733     Map.setShadow(*I, *Shadow);
1734     DfsStack.pop_back();
1735   }
1736 }
1737 
1738 // A floating-point store needs its value and type written to shadow memory.
1739 void NumericalStabilitySanitizer::propagateFTStore(
1740     StoreInst &Store, Type *VT, Type *ExtendedVT, const ValueToShadowMap &Map) {
1741   Value *StoredValue = Store.getValueOperand();
1742   IRBuilder<> Builder(&Store);
1743   Builder.SetCurrentDebugLocation(Store.getDebugLoc());
1744   const auto Extents = getMemoryExtentsOrDie(VT);
1745   Value *ShadowPtr = Builder.CreateCall(
1746       NsanGetShadowPtrForStore[Extents.ValueType],
1747       {Store.getPointerOperand(), ConstantInt::get(IntptrTy, Extents.NumElts)});
1748 
1749   Value *StoredShadow = Map.getShadow(StoredValue);
1750   if (!Store.getParent()->getParent()->hasOptNone()) {
1751     // Only check stores when optimizing, because non-optimized code generates
1752     // too many stores to the stack, creating false positives.
1753     if (ClCheckStores) {
1754       StoredShadow = emitCheck(StoredValue, StoredShadow, Builder,
1755                                CheckLoc::makeStore(Store.getPointerOperand()));
1756       ++NumInstrumentedFTStores;
1757     }
1758   }
1759 
1760   Builder.CreateAlignedStore(StoredShadow, ShadowPtr, Align(1),
1761                              Store.isVolatile());
1762 }
1763 
1764 // A non-ft store needs to invalidate shadow memory. Exceptions are:
1765 //   - memory transfers of floating-point data through other pointer types (llvm
1766 //     optimization passes transform `*(float*)a = *(float*)b` into
1767 //     `*(i32*)a = *(i32*)b` ). These have the same semantics as memcpy.
1768 //   - Writes of FT-sized constants. LLVM likes to do float stores as bitcasted
1769 //     ints. Note that this is not really necessary because if the value is
1770 //     unknown the framework will re-extend it on load anyway. It just felt
1771 //     easier to debug tests with vectors of FTs.
1772 void NumericalStabilitySanitizer::propagateNonFTStore(
1773     StoreInst &Store, Type *VT, const ValueToShadowMap &Map) {
1774   Value *PtrOp = Store.getPointerOperand();
1775   IRBuilder<> Builder(Store.getNextNode());
1776   Builder.SetCurrentDebugLocation(Store.getDebugLoc());
1777   Value *Dst = PtrOp;
1778   TypeSize SlotSize = DL.getTypeStoreSize(VT);
1779   assert(!SlotSize.isScalable() && "unsupported");
1780   const auto LoadSizeBytes = SlotSize.getFixedValue();
1781   Value *ValueSize = Constant::getIntegerValue(
1782       IntptrTy, APInt(IntptrTy->getPrimitiveSizeInBits(), LoadSizeBytes));
1783 
1784   ++NumInstrumentedNonFTStores;
1785   Value *StoredValue = Store.getValueOperand();
1786   if (LoadInst *Load = dyn_cast<LoadInst>(StoredValue)) {
1787     // TODO: Handle the case when the value is from a phi.
1788     // This is a memory transfer with memcpy semantics. Copy the type and
1789     // value from the source. Note that we cannot use __nsan_copy_values()
1790     // here, because that will not work when there is a write to memory in
1791     // between the load and the store, e.g. in the case of a swap.
1792     Type *ShadowTypeIntTy = Type::getIntNTy(Context, 8 * LoadSizeBytes);
1793     Type *ShadowValueIntTy =
1794         Type::getIntNTy(Context, 8 * kShadowScale * LoadSizeBytes);
1795     IRBuilder<> LoadBuilder(Load->getNextNode());
1796     Builder.SetCurrentDebugLocation(Store.getDebugLoc());
1797     Value *LoadSrc = Load->getPointerOperand();
1798     // Read the shadow type and value at load time. The type has the same size
1799     // as the FT value, the value has twice its size.
1800     // TODO: cache them to avoid re-creating them when a load is used by
1801     // several stores. Maybe create them like the FT shadows when a load is
1802     // encountered.
1803     Value *RawShadowType = LoadBuilder.CreateAlignedLoad(
1804         ShadowTypeIntTy,
1805         LoadBuilder.CreateCall(NsanGetRawShadowTypePtr, {LoadSrc}), Align(1),
1806         /*isVolatile=*/false);
1807     Value *RawShadowValue = LoadBuilder.CreateAlignedLoad(
1808         ShadowValueIntTy,
1809         LoadBuilder.CreateCall(NsanGetRawShadowPtr, {LoadSrc}), Align(1),
1810         /*isVolatile=*/false);
1811 
1812     // Write back the shadow type and value at store time.
1813     Builder.CreateAlignedStore(
1814         RawShadowType, Builder.CreateCall(NsanGetRawShadowTypePtr, {Dst}),
1815         Align(1),
1816         /*isVolatile=*/false);
1817     Builder.CreateAlignedStore(RawShadowValue,
1818                                Builder.CreateCall(NsanGetRawShadowPtr, {Dst}),
1819                                Align(1),
1820                                /*isVolatile=*/false);
1821 
1822     ++NumInstrumentedNonFTMemcpyStores;
1823     return;
1824   }
1825   // ClPropagateNonFTConstStoresAsFT is by default false.
1826   if (Constant *C; ClPropagateNonFTConstStoresAsFT &&
1827                    (C = dyn_cast<Constant>(StoredValue))) {
1828     // This might be a fp constant stored as an int. Bitcast and store if it has
1829     // appropriate size.
1830     Type *BitcastTy = nullptr; // The FT type to bitcast to.
1831     if (auto *CInt = dyn_cast<ConstantInt>(C)) {
1832       switch (CInt->getType()->getScalarSizeInBits()) {
1833       case 32:
1834         BitcastTy = Type::getFloatTy(Context);
1835         break;
1836       case 64:
1837         BitcastTy = Type::getDoubleTy(Context);
1838         break;
1839       case 80:
1840         BitcastTy = Type::getX86_FP80Ty(Context);
1841         break;
1842       default:
1843         break;
1844       }
1845     } else if (auto *CDV = dyn_cast<ConstantDataVector>(C)) {
1846       const int NumElements =
1847           cast<VectorType>(CDV->getType())->getElementCount().getFixedValue();
1848       switch (CDV->getType()->getScalarSizeInBits()) {
1849       case 32:
1850         BitcastTy =
1851             VectorType::get(Type::getFloatTy(Context), NumElements, false);
1852         break;
1853       case 64:
1854         BitcastTy =
1855             VectorType::get(Type::getDoubleTy(Context), NumElements, false);
1856         break;
1857       case 80:
1858         BitcastTy =
1859             VectorType::get(Type::getX86_FP80Ty(Context), NumElements, false);
1860         break;
1861       default:
1862         break;
1863       }
1864     }
1865     if (BitcastTy) {
1866       const MemoryExtents Extents = getMemoryExtentsOrDie(BitcastTy);
1867       Value *ShadowPtr = Builder.CreateCall(
1868           NsanGetShadowPtrForStore[Extents.ValueType],
1869           {PtrOp, ConstantInt::get(IntptrTy, Extents.NumElts)});
1870       // Bitcast the integer value to the appropriate FT type and extend to 2FT.
1871       Type *ExtVT = Config.getExtendedFPType(BitcastTy);
1872       Value *Shadow =
1873           Builder.CreateFPExt(Builder.CreateBitCast(C, BitcastTy), ExtVT);
1874       Builder.CreateAlignedStore(Shadow, ShadowPtr, Align(1),
1875                                  Store.isVolatile());
1876       return;
1877     }
1878   }
1879   // All other stores just reset the shadow value to unknown.
1880   Builder.CreateCall(NsanSetValueUnknown, {Dst, ValueSize});
1881 }
1882 
1883 void NumericalStabilitySanitizer::propagateShadowValues(
1884     Instruction &Inst, const TargetLibraryInfo &TLI,
1885     const ValueToShadowMap &Map) {
1886   if (auto *Store = dyn_cast<StoreInst>(&Inst)) {
1887     Value *StoredValue = Store->getValueOperand();
1888     Type *VT = StoredValue->getType();
1889     Type *ExtendedVT = Config.getExtendedFPType(VT);
1890     if (ExtendedVT == nullptr)
1891       return propagateNonFTStore(*Store, VT, Map);
1892     return propagateFTStore(*Store, VT, ExtendedVT, Map);
1893   }
1894 
1895   if (auto *FCmp = dyn_cast<FCmpInst>(&Inst)) {
1896     emitFCmpCheck(*FCmp, Map);
1897     return;
1898   }
1899 
1900   if (auto *CB = dyn_cast<CallBase>(&Inst)) {
1901     maybeAddSuffixForNsanInterface(CB);
1902     if (CallInst *CI = dyn_cast<CallInst>(&Inst))
1903       maybeMarkSanitizerLibraryCallNoBuiltin(CI, &TLI);
1904     if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(&Inst)) {
1905       instrumentMemIntrinsic(MI);
1906       return;
1907     }
1908     populateShadowStack(*CB, TLI, Map);
1909     return;
1910   }
1911 
1912   if (auto *RetInst = dyn_cast<ReturnInst>(&Inst)) {
1913     if (!ClCheckRet)
1914       return;
1915 
1916     Value *RV = RetInst->getReturnValue();
1917     if (RV == nullptr)
1918       return; // This is a `ret void`.
1919     Type *VT = RV->getType();
1920     Type *ExtendedVT = Config.getExtendedFPType(VT);
1921     if (ExtendedVT == nullptr)
1922       return; // Not an FT ret.
1923     Value *RVShadow = Map.getShadow(RV);
1924     IRBuilder<> Builder(RetInst);
1925 
1926     RVShadow = emitCheck(RV, RVShadow, Builder, CheckLoc::makeRet());
1927     ++NumInstrumentedFTRets;
1928     // Store tag.
1929     Value *FnAddr =
1930         Builder.CreatePtrToInt(Inst.getParent()->getParent(), IntptrTy);
1931     Builder.CreateStore(FnAddr, NsanShadowRetTag);
1932     // Store value.
1933     Value *ShadowRetValPtr =
1934         Builder.CreateConstGEP2_64(NsanShadowRetType, NsanShadowRetPtr, 0, 0);
1935     Builder.CreateStore(RVShadow, ShadowRetValPtr);
1936     return;
1937   }
1938 
1939   if (InsertValueInst *Insert = dyn_cast<InsertValueInst>(&Inst)) {
1940     Value *V = Insert->getOperand(1);
1941     Type *VT = V->getType();
1942     Type *ExtendedVT = Config.getExtendedFPType(VT);
1943     if (ExtendedVT == nullptr)
1944       return;
1945     IRBuilder<> Builder(Insert);
1946     emitCheck(V, Map.getShadow(V), Builder, CheckLoc::makeInsert());
1947     return;
1948   }
1949 }
1950 
1951 // Moves fast math flags from the function to individual instructions, and
1952 // removes the attribute from the function.
1953 // TODO: Make this controllable with a flag.
1954 static void moveFastMathFlags(Function &F,
1955                               std::vector<Instruction *> &Instructions) {
1956   FastMathFlags FMF;
1957 #define MOVE_FLAG(attr, setter)                                                \
1958   if (F.getFnAttribute(attr).getValueAsString() == "true") {                   \
1959     F.removeFnAttr(attr);                                                      \
1960     FMF.set##setter();                                                         \
1961   }
1962   MOVE_FLAG("unsafe-fp-math", Fast)
1963   MOVE_FLAG("no-infs-fp-math", NoInfs)
1964   MOVE_FLAG("no-nans-fp-math", NoNaNs)
1965   MOVE_FLAG("no-signed-zeros-fp-math", NoSignedZeros)
1966 #undef MOVE_FLAG
1967 
1968   for (Instruction *I : Instructions)
1969     if (isa<FPMathOperator>(I))
1970       I->setFastMathFlags(FMF);
1971 }
1972 
1973 bool NumericalStabilitySanitizer::sanitizeFunction(
1974     Function &F, const TargetLibraryInfo &TLI) {
1975   if (!F.hasFnAttribute(Attribute::SanitizeNumericalStability))
1976     return false;
1977 
1978   // This is required to prevent instrumenting call to __nsan_init from within
1979   // the module constructor.
1980   if (F.getName() == kNsanModuleCtorName)
1981     return false;
1982   SmallVector<Instruction *, 8> AllLoadsAndStores;
1983   SmallVector<Instruction *, 8> LocalLoadsAndStores;
1984 
1985   // The instrumentation maintains:
1986   //  - for each IR value `v` of floating-point (or vector floating-point) type
1987   //    FT, a shadow IR value `s(v)` with twice the precision 2FT (e.g.
1988   //    double for float and f128 for double).
1989   //  - A shadow memory, which stores `s(v)` for any `v` that has been stored,
1990   //    along with a shadow memory tag, which stores whether the value in the
1991   //    corresponding shadow memory is valid. Note that this might be
1992   //    incorrect if a non-instrumented function stores to memory, or if
1993   //    memory is stored to through a char pointer.
1994   //  - A shadow stack, which holds `s(v)` for any floating-point argument `v`
1995   //    of a call to an instrumented function. This allows
1996   //    instrumented functions to retrieve the shadow values for their
1997   //    arguments.
1998   //    Because instrumented functions can be called from non-instrumented
1999   //    functions, the stack needs to include a tag so that the instrumented
2000   //    function knows whether shadow values are available for their
2001   //    parameters (i.e. whether is was called by an instrumented function).
2002   //    When shadow arguments are not available, they have to be recreated by
2003   //    extending the precision of the non-shadow arguments to the non-shadow
2004   //    value. Non-instrumented functions do not modify (or even know about) the
2005   //    shadow stack. The shadow stack pointer is __nsan_shadow_args. The shadow
2006   //    stack tag is __nsan_shadow_args_tag. The tag is any unique identifier
2007   //    for the function (we use the address of the function). Both variables
2008   //    are thread local.
2009   //    Example:
2010   //     calls                             shadow stack tag      shadow stack
2011   //     =======================================================================
2012   //     non_instrumented_1()              0                     0
2013   //             |
2014   //             v
2015   //     instrumented_2(float a)           0                     0
2016   //             |
2017   //             v
2018   //     instrumented_3(float b, double c) &instrumented_3       s(b),s(c)
2019   //             |
2020   //             v
2021   //     instrumented_4(float d)           &instrumented_4       s(d)
2022   //             |
2023   //             v
2024   //     non_instrumented_5(float e)       &non_instrumented_5   s(e)
2025   //             |
2026   //             v
2027   //     instrumented_6(float f)           &non_instrumented_5   s(e)
2028   //
2029   //   On entry, instrumented_2 checks whether the tag corresponds to its
2030   //   function ptr.
2031   //   Note that functions reset the tag to 0 after reading shadow parameters.
2032   //   This ensures that the function does not erroneously read invalid data if
2033   //   called twice in the same stack, once from an instrumented function and
2034   //   once from an uninstrumented one. For example, in the following example,
2035   //   resetting the tag in (A) ensures that (B) does not reuse the same the
2036   //   shadow arguments (which would be incorrect).
2037   //      instrumented_1(float a)
2038   //             |
2039   //             v
2040   //      instrumented_2(float b)  (A)
2041   //             |
2042   //             v
2043   //      non_instrumented_3()
2044   //             |
2045   //             v
2046   //      instrumented_2(float b)  (B)
2047   //
2048   //  - A shadow return slot. Any function that returns a floating-point value
2049   //    places a shadow return value in __nsan_shadow_ret_val. Again, because
2050   //    we might be calling non-instrumented functions, this value is guarded
2051   //    by __nsan_shadow_ret_tag marker indicating which instrumented function
2052   //    placed the value in __nsan_shadow_ret_val, so that the caller can check
2053   //    that this corresponds to the callee. Both variables are thread local.
2054   //
2055   //    For example, in the following example, the instrumentation in
2056   //    `instrumented_1` rejects the shadow return value from `instrumented_3`
2057   //    because is is not tagged as expected (`&instrumented_3` instead of
2058   //    `non_instrumented_2`):
2059   //
2060   //        instrumented_1()
2061   //            |
2062   //            v
2063   //        float non_instrumented_2()
2064   //            |
2065   //            v
2066   //        float instrumented_3()
2067   //
2068   // Calls of known math functions (sin, cos, exp, ...) are duplicated to call
2069   // their overload on the shadow type.
2070 
2071   // Collect all instructions before processing, as creating shadow values
2072   // creates new instructions inside the function.
2073   std::vector<Instruction *> OriginalInstructions;
2074   for (BasicBlock &BB : F)
2075     for (Instruction &Inst : BB)
2076       OriginalInstructions.emplace_back(&Inst);
2077 
2078   moveFastMathFlags(F, OriginalInstructions);
2079   ValueToShadowMap ValueToShadow(Config);
2080 
2081   // In the first pass, we create shadow values for all FT function arguments
2082   // and all phis. This ensures that the DFS of the next pass does not have
2083   // any loops.
2084   std::vector<PHINode *> OriginalPhis;
2085   createShadowArguments(F, TLI, ValueToShadow);
2086   for (Instruction *I : OriginalInstructions) {
2087     if (PHINode *Phi = dyn_cast<PHINode>(I)) {
2088       if (PHINode *Shadow = maybeCreateShadowPhi(*Phi, TLI)) {
2089         OriginalPhis.push_back(Phi);
2090         ValueToShadow.setShadow(*Phi, *Shadow);
2091       }
2092     }
2093   }
2094 
2095   // Create shadow values for all instructions creating FT values.
2096   for (Instruction *I : OriginalInstructions)
2097     maybeCreateShadowValue(*I, TLI, ValueToShadow);
2098 
2099   // Propagate shadow values across stores, calls and rets.
2100   for (Instruction *I : OriginalInstructions)
2101     propagateShadowValues(*I, TLI, ValueToShadow);
2102 
2103   // The last pass populates shadow phis with shadow values.
2104   for (PHINode *Phi : OriginalPhis) {
2105     PHINode *ShadowPhi = dyn_cast<PHINode>(ValueToShadow.getShadow(Phi));
2106     for (unsigned I : seq(Phi->getNumOperands())) {
2107       Value *V = Phi->getOperand(I);
2108       Value *Shadow = ValueToShadow.getShadow(V);
2109       BasicBlock *IncomingBB = Phi->getIncomingBlock(I);
2110       // For some instructions (e.g. invoke), we create the shadow in a separate
2111       // block, different from the block where the original value is created.
2112       // In that case, the shadow phi might need to refer to this block instead
2113       // of the original block.
2114       // Note that this can only happen for instructions as constant shadows are
2115       // always created in the same block.
2116       ShadowPhi->addIncoming(Shadow, IncomingBB);
2117     }
2118   }
2119 
2120   return !ValueToShadow.empty();
2121 }
2122 
2123 // Instrument the memory intrinsics so that they properly modify the shadow
2124 // memory.
2125 bool NumericalStabilitySanitizer::instrumentMemIntrinsic(MemIntrinsic *MI) {
2126   IRBuilder<> Builder(MI);
2127   if (auto *M = dyn_cast<MemSetInst>(MI)) {
2128     Builder.CreateCall(
2129         NsanSetValueUnknown,
2130         {/*Address=*/M->getArgOperand(0),
2131          /*Size=*/Builder.CreateIntCast(M->getArgOperand(2), IntptrTy, false)});
2132   } else if (auto *M = dyn_cast<MemTransferInst>(MI)) {
2133     Builder.CreateCall(
2134         NsanCopyValues,
2135         {/*Destination=*/M->getArgOperand(0),
2136          /*Source=*/M->getArgOperand(1),
2137          /*Size=*/Builder.CreateIntCast(M->getArgOperand(2), IntptrTy, false)});
2138   }
2139   return false;
2140 }
2141 
2142 void NumericalStabilitySanitizer::maybeAddSuffixForNsanInterface(CallBase *CI) {
2143   Function *Fn = CI->getCalledFunction();
2144   if (Fn == nullptr)
2145     return;
2146 
2147   if (!Fn->getName().starts_with("__nsan_"))
2148     return;
2149 
2150   if (Fn->getName() == "__nsan_dump_shadow_mem") {
2151     assert(CI->arg_size() == 4 &&
2152            "invalid prototype for __nsan_dump_shadow_mem");
2153     // __nsan_dump_shadow_mem requires an extra parameter with the dynamic
2154     // configuration:
2155     // (shadow_type_id_for_long_double << 16) | (shadow_type_id_for_double << 8)
2156     // | shadow_type_id_for_double
2157     const uint64_t shadow_value_type_ids =
2158         (static_cast<size_t>(Config.byValueType(kLongDouble).getNsanTypeId())
2159          << 16) |
2160         (static_cast<size_t>(Config.byValueType(kDouble).getNsanTypeId())
2161          << 8) |
2162         static_cast<size_t>(Config.byValueType(kFloat).getNsanTypeId());
2163     CI->setArgOperand(3, ConstantInt::get(IntptrTy, shadow_value_type_ids));
2164   }
2165 }
2166