xref: /llvm-project/llvm/lib/Transforms/Instrumentation/NumericalStabilitySanitizer.cpp (revision d23c24f336674727d281258157fc5b15ce9040a4)
1 //===-- NumericalStabilitySanitizer.cpp -----------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the instrumentation pass for the numerical sanitizer.
10 // Conceptually the pass injects shadow computations using higher precision
11 // types and inserts consistency checks. For details see the paper
12 // https://arxiv.org/abs/2102.12782.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "llvm/Transforms/Instrumentation/NumericalStabilitySanitizer.h"
17 
18 #include "llvm/ADT/DenseMap.h"
19 #include "llvm/ADT/SmallString.h"
20 #include "llvm/ADT/SmallVector.h"
21 #include "llvm/ADT/Statistic.h"
22 #include "llvm/ADT/StringExtras.h"
23 #include "llvm/Analysis/TargetLibraryInfo.h"
24 #include "llvm/Analysis/ValueTracking.h"
25 #include "llvm/IR/DataLayout.h"
26 #include "llvm/IR/Function.h"
27 #include "llvm/IR/IRBuilder.h"
28 #include "llvm/IR/IntrinsicInst.h"
29 #include "llvm/IR/Intrinsics.h"
30 #include "llvm/IR/LLVMContext.h"
31 #include "llvm/IR/MDBuilder.h"
32 #include "llvm/IR/Metadata.h"
33 #include "llvm/IR/Module.h"
34 #include "llvm/IR/Type.h"
35 #include "llvm/InitializePasses.h"
36 #include "llvm/Support/CommandLine.h"
37 #include "llvm/Support/Debug.h"
38 #include "llvm/Support/MathExtras.h"
39 #include "llvm/Support/Regex.h"
40 #include "llvm/Support/raw_ostream.h"
41 #include "llvm/Transforms/Instrumentation.h"
42 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
43 #include "llvm/Transforms/Utils/EscapeEnumerator.h"
44 #include "llvm/Transforms/Utils/Local.h"
45 #include "llvm/Transforms/Utils/ModuleUtils.h"
46 
47 #include <cstdint>
48 
49 using namespace llvm;
50 
51 #define DEBUG_TYPE "nsan"
52 
53 STATISTIC(NumInstrumentedFTLoads,
54           "Number of instrumented floating-point loads");
55 
56 STATISTIC(NumInstrumentedFTCalls,
57           "Number of instrumented floating-point calls");
58 STATISTIC(NumInstrumentedFTRets,
59           "Number of instrumented floating-point returns");
60 STATISTIC(NumInstrumentedFTStores,
61           "Number of instrumented floating-point stores");
62 STATISTIC(NumInstrumentedNonFTStores,
63           "Number of instrumented non floating-point stores");
64 STATISTIC(
65     NumInstrumentedNonFTMemcpyStores,
66     "Number of instrumented non floating-point stores with memcpy semantics");
67 STATISTIC(NumInstrumentedFCmp, "Number of instrumented fcmps");
68 
69 // Using smaller shadow types types can help improve speed. For example, `dlq`
70 // is 3x slower to 5x faster in opt mode and 2-6x faster in dbg mode compared to
71 // `dqq`.
72 static cl::opt<std::string> ClShadowMapping(
73     "nsan-shadow-type-mapping", cl::init("dqq"),
74     cl::desc("One shadow type id for each of `float`, `double`, `long double`. "
75              "`d`,`l`,`q`,`e` mean double, x86_fp80, fp128 (quad) and "
76              "ppc_fp128 (extended double) respectively. The default is to "
77              "shadow `float` as `double`, and `double` and `x86_fp80` as "
78              "`fp128`"),
79     cl::Hidden);
80 
81 static cl::opt<bool>
82     ClInstrumentFCmp("nsan-instrument-fcmp", cl::init(true),
83                      cl::desc("Instrument floating-point comparisons"),
84                      cl::Hidden);
85 
86 static cl::opt<std::string> ClCheckFunctionsFilter(
87     "check-functions-filter",
88     cl::desc("Only emit checks for arguments of functions "
89              "whose names match the given regular expression"),
90     cl::value_desc("regex"));
91 
92 static cl::opt<bool> ClTruncateFCmpEq(
93     "nsan-truncate-fcmp-eq", cl::init(true),
94     cl::desc(
95         "This flag controls the behaviour of fcmp equality comparisons."
96         "For equality comparisons such as `x == 0.0f`, we can perform the "
97         "shadow check in the shadow (`x_shadow == 0.0) == (x == 0.0f)`) or app "
98         " domain (`(trunc(x_shadow) == 0.0f) == (x == 0.0f)`). This helps "
99         "catch the case when `x_shadow` is accurate enough (and therefore "
100         "close enough to zero) so that `trunc(x_shadow)` is zero even though "
101         "both `x` and `x_shadow` are not"),
102     cl::Hidden);
103 
104 // When there is external, uninstrumented code writing to memory, the shadow
105 // memory can get out of sync with the application memory. Enabling this flag
106 // emits consistency checks for loads to catch this situation.
107 // When everything is instrumented, this is not strictly necessary because any
108 // load should have a corresponding store, but can help debug cases when the
109 // framework did a bad job at tracking shadow memory modifications by failing on
110 // load rather than store.
111 // TODO: provide a way to resume computations from the FT value when the load
112 // is inconsistent. This ensures that further computations are not polluted.
113 static cl::opt<bool> ClCheckLoads("nsan-check-loads",
114                                   cl::desc("Check floating-point load"),
115                                   cl::Hidden);
116 
117 static cl::opt<bool> ClCheckStores("nsan-check-stores", cl::init(true),
118                                    cl::desc("Check floating-point stores"),
119                                    cl::Hidden);
120 
121 static cl::opt<bool> ClCheckRet("nsan-check-ret", cl::init(true),
122                                 cl::desc("Check floating-point return values"),
123                                 cl::Hidden);
124 
125 // LLVM may store constant floats as bitcasted ints.
126 // It's not really necessary to shadow such stores,
127 // if the shadow value is unknown the framework will re-extend it on load
128 // anyway. Moreover, because of size collisions (e.g. bf16 vs f16) it is
129 // impossible to determine the floating-point type based on the size.
130 // However, for debugging purposes it can be useful to model such stores.
131 static cl::opt<bool> ClPropagateNonFTConstStoresAsFT(
132     "nsan-propagate-non-ft-const-stores-as-ft",
133     cl::desc(
134         "Propagate non floating-point const stores as floating point values."
135         "For debugging purposes only"),
136     cl::Hidden);
137 
138 constexpr StringLiteral kNsanModuleCtorName("nsan.module_ctor");
139 constexpr StringLiteral kNsanInitName("__nsan_init");
140 
141 // The following values must be kept in sync with the runtime.
142 constexpr int kShadowScale = 2;
143 constexpr int kMaxVectorWidth = 8;
144 constexpr int kMaxNumArgs = 128;
145 constexpr int kMaxShadowTypeSizeBytes = 16; // fp128
146 
147 namespace {
148 
149 // Defines the characteristics (type id, type, and floating-point semantics)
150 // attached for all possible shadow types.
151 class ShadowTypeConfig {
152 public:
153   static std::unique_ptr<ShadowTypeConfig> fromNsanTypeId(char TypeId);
154 
155   // The LLVM Type corresponding to the shadow type.
156   virtual Type *getType(LLVMContext &Context) const = 0;
157 
158   // The nsan type id of the shadow type (`d`, `l`, `q`, ...).
159   virtual char getNsanTypeId() const = 0;
160 
161   virtual ~ShadowTypeConfig() = default;
162 };
163 
164 template <char NsanTypeId>
165 class ShadowTypeConfigImpl : public ShadowTypeConfig {
166 public:
167   char getNsanTypeId() const override { return NsanTypeId; }
168   static constexpr const char kNsanTypeId = NsanTypeId;
169 };
170 
171 // `double` (`d`) shadow type.
172 class F64ShadowConfig : public ShadowTypeConfigImpl<'d'> {
173   Type *getType(LLVMContext &Context) const override {
174     return Type::getDoubleTy(Context);
175   }
176 };
177 
178 // `x86_fp80` (`l`) shadow type: X86 long double.
179 class F80ShadowConfig : public ShadowTypeConfigImpl<'l'> {
180   Type *getType(LLVMContext &Context) const override {
181     return Type::getX86_FP80Ty(Context);
182   }
183 };
184 
185 // `fp128` (`q`) shadow type.
186 class F128ShadowConfig : public ShadowTypeConfigImpl<'q'> {
187   Type *getType(LLVMContext &Context) const override {
188     return Type::getFP128Ty(Context);
189   }
190 };
191 
192 // `ppc_fp128` (`e`) shadow type: IBM extended double with 106 bits of mantissa.
193 class PPC128ShadowConfig : public ShadowTypeConfigImpl<'e'> {
194   Type *getType(LLVMContext &Context) const override {
195     return Type::getPPC_FP128Ty(Context);
196   }
197 };
198 
199 // Creates a ShadowTypeConfig given its type id.
200 std::unique_ptr<ShadowTypeConfig>
201 ShadowTypeConfig::fromNsanTypeId(const char TypeId) {
202   switch (TypeId) {
203   case F64ShadowConfig::kNsanTypeId:
204     return std::make_unique<F64ShadowConfig>();
205   case F80ShadowConfig::kNsanTypeId:
206     return std::make_unique<F80ShadowConfig>();
207   case F128ShadowConfig::kNsanTypeId:
208     return std::make_unique<F128ShadowConfig>();
209   case PPC128ShadowConfig::kNsanTypeId:
210     return std::make_unique<PPC128ShadowConfig>();
211   }
212   report_fatal_error("nsan: invalid shadow type id '" + Twine(TypeId) + "'");
213 }
214 
215 // An enum corresponding to shadow value types. Used as indices in arrays, so
216 // not an `enum class`.
217 enum FTValueType { kFloat, kDouble, kLongDouble, kNumValueTypes };
218 
219 // If `FT` corresponds to a primitive FTValueType, return it.
220 static std::optional<FTValueType> ftValueTypeFromType(Type *FT) {
221   if (FT->isFloatTy())
222     return kFloat;
223   if (FT->isDoubleTy())
224     return kDouble;
225   if (FT->isX86_FP80Ty())
226     return kLongDouble;
227   return {};
228 }
229 
230 // Returns the LLVM type for an FTValueType.
231 static Type *typeFromFTValueType(FTValueType VT, LLVMContext &Context) {
232   switch (VT) {
233   case kFloat:
234     return Type::getFloatTy(Context);
235   case kDouble:
236     return Type::getDoubleTy(Context);
237   case kLongDouble:
238     return Type::getX86_FP80Ty(Context);
239   case kNumValueTypes:
240     return nullptr;
241   }
242   llvm_unreachable("Unhandled FTValueType enum");
243 }
244 
245 // Returns the type name for an FTValueType.
246 static const char *typeNameFromFTValueType(FTValueType VT) {
247   switch (VT) {
248   case kFloat:
249     return "float";
250   case kDouble:
251     return "double";
252   case kLongDouble:
253     return "longdouble";
254   case kNumValueTypes:
255     return nullptr;
256   }
257   llvm_unreachable("Unhandled FTValueType enum");
258 }
259 
260 // A specific mapping configuration of application type to shadow type for nsan
261 // (see -nsan-shadow-mapping flag).
262 class MappingConfig {
263 public:
264   explicit MappingConfig(LLVMContext &C) : Context(C) {
265     if (ClShadowMapping.size() != 3)
266       report_fatal_error("Invalid nsan mapping: " + Twine(ClShadowMapping));
267     unsigned ShadowTypeSizeBits[kNumValueTypes];
268     for (int VT = 0; VT < kNumValueTypes; ++VT) {
269       auto Config = ShadowTypeConfig::fromNsanTypeId(ClShadowMapping[VT]);
270       if (!Config)
271         report_fatal_error("Failed to get ShadowTypeConfig for " +
272                            Twine(ClShadowMapping[VT]));
273       const unsigned AppTypeSize =
274           typeFromFTValueType(static_cast<FTValueType>(VT), Context)
275               ->getScalarSizeInBits();
276       const unsigned ShadowTypeSize =
277           Config->getType(Context)->getScalarSizeInBits();
278       // Check that the shadow type size is at most kShadowScale times the
279       // application type size, so that shadow memory compoutations are valid.
280       if (ShadowTypeSize > kShadowScale * AppTypeSize)
281         report_fatal_error("Invalid nsan mapping f" + Twine(AppTypeSize) +
282                            "->f" + Twine(ShadowTypeSize) +
283                            ": The shadow type size should be at most " +
284                            Twine(kShadowScale) +
285                            " times the application type size");
286       ShadowTypeSizeBits[VT] = ShadowTypeSize;
287       Configs[VT] = std::move(Config);
288     }
289 
290     // Check that the mapping is monotonous. This is required because if one
291     // does an fpextend of `float->long double` in application code, nsan is
292     // going to do an fpextend of `shadow(float) -> shadow(long double)` in
293     // shadow code. This will fail in `qql` mode, since nsan would be
294     // fpextending `f128->long`, which is invalid.
295     // TODO: Relax this.
296     if (ShadowTypeSizeBits[kFloat] > ShadowTypeSizeBits[kDouble] ||
297         ShadowTypeSizeBits[kDouble] > ShadowTypeSizeBits[kLongDouble])
298       report_fatal_error("Invalid nsan mapping: { float->f" +
299                          Twine(ShadowTypeSizeBits[kFloat]) + "; double->f" +
300                          Twine(ShadowTypeSizeBits[kDouble]) +
301                          "; long double->f" +
302                          Twine(ShadowTypeSizeBits[kLongDouble]) + " }");
303   }
304 
305   const ShadowTypeConfig &byValueType(FTValueType VT) const {
306     assert(VT < FTValueType::kNumValueTypes && "invalid value type");
307     return *Configs[VT];
308   }
309 
310   // Returns the extended shadow type for a given application type.
311   Type *getExtendedFPType(Type *FT) const {
312     if (const auto VT = ftValueTypeFromType(FT))
313       return Configs[*VT]->getType(Context);
314     if (FT->isVectorTy()) {
315       auto *VecTy = cast<VectorType>(FT);
316       // TODO: add support for scalable vector types.
317       if (VecTy->isScalableTy())
318         return nullptr;
319       Type *ExtendedScalar = getExtendedFPType(VecTy->getElementType());
320       return ExtendedScalar
321                  ? VectorType::get(ExtendedScalar, VecTy->getElementCount())
322                  : nullptr;
323     }
324     return nullptr;
325   }
326 
327 private:
328   LLVMContext &Context;
329   std::unique_ptr<ShadowTypeConfig> Configs[FTValueType::kNumValueTypes];
330 };
331 
332 // The memory extents of a type specifies how many elements of a given
333 // FTValueType needs to be stored when storing this type.
334 struct MemoryExtents {
335   FTValueType ValueType;
336   uint64_t NumElts;
337 };
338 
339 static MemoryExtents getMemoryExtentsOrDie(Type *FT) {
340   if (const auto VT = ftValueTypeFromType(FT))
341     return {*VT, 1};
342   if (auto *VecTy = dyn_cast<VectorType>(FT)) {
343     const auto ScalarExtents = getMemoryExtentsOrDie(VecTy->getElementType());
344     return {ScalarExtents.ValueType,
345             ScalarExtents.NumElts * VecTy->getElementCount().getFixedValue()};
346   }
347   llvm_unreachable("invalid value type");
348 }
349 
350 // The location of a check. Passed as parameters to runtime checking functions.
351 class CheckLoc {
352 public:
353   // Creates a location that references an application memory location.
354   static CheckLoc makeStore(Value *Address) {
355     CheckLoc Result(kStore);
356     Result.Address = Address;
357     return Result;
358   }
359   static CheckLoc makeLoad(Value *Address) {
360     CheckLoc Result(kLoad);
361     Result.Address = Address;
362     return Result;
363   }
364 
365   // Creates a location that references an argument, given by id.
366   static CheckLoc makeArg(int ArgId) {
367     CheckLoc Result(kArg);
368     Result.ArgId = ArgId;
369     return Result;
370   }
371 
372   // Creates a location that references the return value of a function.
373   static CheckLoc makeRet() { return CheckLoc(kRet); }
374 
375   // Creates a location that references a vector insert.
376   static CheckLoc makeInsert() { return CheckLoc(kInsert); }
377 
378   // Returns the CheckType of location this refers to, as an integer-typed LLVM
379   // IR value.
380   Value *getType(LLVMContext &C) const {
381     return ConstantInt::get(Type::getInt32Ty(C), static_cast<int>(CheckTy));
382   }
383 
384   // Returns a CheckType-specific value representing details of the location
385   // (e.g. application address for loads or stores), as an `IntptrTy`-typed LLVM
386   // IR value.
387   Value *getValue(Type *IntptrTy, IRBuilder<> &Builder) const {
388     switch (CheckTy) {
389     case kUnknown:
390       llvm_unreachable("unknown type");
391     case kRet:
392     case kInsert:
393       return ConstantInt::get(IntptrTy, 0);
394     case kArg:
395       return ConstantInt::get(IntptrTy, ArgId);
396     case kLoad:
397     case kStore:
398       return Builder.CreatePtrToInt(Address, IntptrTy);
399     }
400     llvm_unreachable("Unhandled CheckType enum");
401   }
402 
403 private:
404   // Must be kept in sync with the runtime,
405   // see compiler-rt/lib/nsan/nsan_stats.h
406   enum CheckType {
407     kUnknown = 0,
408     kRet,
409     kArg,
410     kLoad,
411     kStore,
412     kInsert,
413   };
414   explicit CheckLoc(CheckType CheckTy) : CheckTy(CheckTy) {}
415 
416   Value *Address = nullptr;
417   const CheckType CheckTy;
418   int ArgId = -1;
419 };
420 
421 // A map of LLVM IR values to shadow LLVM IR values.
422 class ValueToShadowMap {
423 public:
424   explicit ValueToShadowMap(const MappingConfig &Config) : Config(Config) {}
425 
426   ValueToShadowMap(const ValueToShadowMap &) = delete;
427   ValueToShadowMap &operator=(const ValueToShadowMap &) = delete;
428 
429   // Sets the shadow value for a value. Asserts that the value does not already
430   // have a value.
431   void setShadow(Value &V, Value &Shadow) {
432     [[maybe_unused]] const bool Inserted = Map.try_emplace(&V, &Shadow).second;
433     LLVM_DEBUG({
434       if (!Inserted) {
435         if (auto *I = dyn_cast<Instruction>(&V))
436           errs() << I->getFunction()->getName() << ": ";
437         errs() << "duplicate shadow (" << &V << "): ";
438         V.dump();
439       }
440     });
441     assert(Inserted && "duplicate shadow");
442   }
443 
444   // Returns true if the value already has a shadow (including if the value is a
445   // constant). If true, calling getShadow() is valid.
446   bool hasShadow(Value *V) const {
447     return isa<Constant>(V) || (Map.find(V) != Map.end());
448   }
449 
450   // Returns the shadow value for a given value. Asserts that the value has
451   // a shadow value. Lazily creates shadows for constant values.
452   Value *getShadow(Value *V) const {
453     if (Constant *C = dyn_cast<Constant>(V))
454       return getShadowConstant(C);
455     return Map.find(V)->second;
456   }
457 
458   bool empty() const { return Map.empty(); }
459 
460 private:
461   // Extends a constant application value to its shadow counterpart.
462   APFloat extendConstantFP(APFloat CV, const fltSemantics &To) const {
463     bool LosesInfo = false;
464     CV.convert(To, APFloatBase::rmTowardZero, &LosesInfo);
465     return CV;
466   }
467 
468   // Returns the shadow constant for the given application constant.
469   Constant *getShadowConstant(Constant *C) const {
470     if (UndefValue *U = dyn_cast<UndefValue>(C)) {
471       return UndefValue::get(Config.getExtendedFPType(U->getType()));
472     }
473     if (ConstantFP *CFP = dyn_cast<ConstantFP>(C)) {
474       // Floating-point constants.
475       Type *Ty = Config.getExtendedFPType(CFP->getType());
476       return ConstantFP::get(
477           Ty, extendConstantFP(CFP->getValueAPF(), Ty->getFltSemantics()));
478     }
479     // Vector, array, or aggregate constants.
480     if (C->getType()->isVectorTy()) {
481       SmallVector<Constant *, 8> Elements;
482       for (int I = 0, E = cast<VectorType>(C->getType())
483                               ->getElementCount()
484                               .getFixedValue();
485            I < E; ++I)
486         Elements.push_back(getShadowConstant(C->getAggregateElement(I)));
487       return ConstantVector::get(Elements);
488     }
489     llvm_unreachable("unimplemented");
490   }
491 
492   const MappingConfig &Config;
493   DenseMap<Value *, Value *> Map;
494 };
495 
496 class NsanMemOpFn {
497 public:
498   NsanMemOpFn(Module &M, ArrayRef<StringRef> Sized, StringRef Fallback,
499               size_t NumArgs);
500   FunctionCallee getFunctionFor(uint64_t MemOpSize) const;
501   FunctionCallee getFallback() const;
502 
503 private:
504   SmallVector<FunctionCallee> Funcs;
505   size_t NumSizedFuncs;
506 };
507 
508 NsanMemOpFn::NsanMemOpFn(Module &M, ArrayRef<StringRef> Sized,
509                          StringRef Fallback, size_t NumArgs) {
510   LLVMContext &Ctx = M.getContext();
511   AttributeList Attr;
512   Attr = Attr.addFnAttribute(Ctx, Attribute::NoUnwind);
513   Type *PtrTy = PointerType::getUnqual(Ctx);
514   Type *VoidTy = Type::getVoidTy(Ctx);
515   IntegerType *IntptrTy = M.getDataLayout().getIntPtrType(Ctx);
516   FunctionType *SizedFnTy = nullptr;
517 
518   NumSizedFuncs = Sized.size();
519 
520   // First entry is fallback function
521   if (NumArgs == 3) {
522     Funcs.push_back(
523         M.getOrInsertFunction(Fallback, Attr, VoidTy, PtrTy, PtrTy, IntptrTy));
524     SizedFnTy = FunctionType::get(VoidTy, {PtrTy, PtrTy}, false);
525   } else if (NumArgs == 2) {
526     Funcs.push_back(
527         M.getOrInsertFunction(Fallback, Attr, VoidTy, PtrTy, IntptrTy));
528     SizedFnTy = FunctionType::get(VoidTy, {PtrTy}, false);
529   } else {
530     llvm_unreachable("Unexpected value of sized functions arguments");
531   }
532 
533   for (size_t i = 0; i < NumSizedFuncs; ++i)
534     Funcs.push_back(M.getOrInsertFunction(Sized[i], SizedFnTy, Attr));
535 }
536 
537 FunctionCallee NsanMemOpFn::getFunctionFor(uint64_t MemOpSize) const {
538   // Now `getFunctionFor` operates on `Funcs` of size 4 (at least) and the
539   // following code assumes that the number of functions in `Func` is sufficient
540   assert(NumSizedFuncs >= 3 && "Unexpected number of sized functions");
541 
542   size_t Idx =
543       MemOpSize == 4 ? 1 : (MemOpSize == 8 ? 2 : (MemOpSize == 16 ? 3 : 0));
544 
545   return Funcs[Idx];
546 }
547 
548 FunctionCallee NsanMemOpFn::getFallback() const { return Funcs[0]; }
549 
550 /// Instantiating NumericalStabilitySanitizer inserts the nsan runtime library
551 /// API function declarations into the module if they don't exist already.
552 /// Instantiating ensures the __nsan_init function is in the list of global
553 /// constructors for the module.
554 class NumericalStabilitySanitizer {
555 public:
556   NumericalStabilitySanitizer(Module &M);
557   bool sanitizeFunction(Function &F, const TargetLibraryInfo &TLI);
558 
559 private:
560   bool instrumentMemIntrinsic(MemIntrinsic *MI);
561   void maybeAddSuffixForNsanInterface(CallBase *CI);
562   bool addrPointsToConstantData(Value *Addr);
563   void maybeCreateShadowValue(Instruction &Root, const TargetLibraryInfo &TLI,
564                               ValueToShadowMap &Map);
565   Value *createShadowValueWithOperandsAvailable(Instruction &Inst,
566                                                 const TargetLibraryInfo &TLI,
567                                                 const ValueToShadowMap &Map);
568   PHINode *maybeCreateShadowPhi(PHINode &Phi, const TargetLibraryInfo &TLI);
569   void createShadowArguments(Function &F, const TargetLibraryInfo &TLI,
570                              ValueToShadowMap &Map);
571 
572   void populateShadowStack(CallBase &CI, const TargetLibraryInfo &TLI,
573                            const ValueToShadowMap &Map);
574 
575   void propagateShadowValues(Instruction &Inst, const TargetLibraryInfo &TLI,
576                              const ValueToShadowMap &Map);
577   Value *emitCheck(Value *V, Value *ShadowV, IRBuilder<> &Builder,
578                    CheckLoc Loc);
579   Value *emitCheckInternal(Value *V, Value *ShadowV, IRBuilder<> &Builder,
580                            CheckLoc Loc);
581   void emitFCmpCheck(FCmpInst &FCmp, const ValueToShadowMap &Map);
582 
583   // Value creation handlers.
584   Value *handleLoad(LoadInst &Load, Type *VT, Type *ExtendedVT);
585   Value *handleCallBase(CallBase &Call, Type *VT, Type *ExtendedVT,
586                         const TargetLibraryInfo &TLI,
587                         const ValueToShadowMap &Map, IRBuilder<> &Builder);
588   Value *maybeHandleKnownCallBase(CallBase &Call, Type *VT, Type *ExtendedVT,
589                                   const TargetLibraryInfo &TLI,
590                                   const ValueToShadowMap &Map,
591                                   IRBuilder<> &Builder);
592   Value *handleTrunc(const FPTruncInst &Trunc, Type *VT, Type *ExtendedVT,
593                      const ValueToShadowMap &Map, IRBuilder<> &Builder);
594   Value *handleExt(const FPExtInst &Ext, Type *VT, Type *ExtendedVT,
595                    const ValueToShadowMap &Map, IRBuilder<> &Builder);
596 
597   // Value propagation handlers.
598   void propagateFTStore(StoreInst &Store, Type *VT, Type *ExtendedVT,
599                         const ValueToShadowMap &Map);
600   void propagateNonFTStore(StoreInst &Store, Type *VT,
601                            const ValueToShadowMap &Map);
602 
603   const DataLayout &DL;
604   LLVMContext &Context;
605   MappingConfig Config;
606   IntegerType *IntptrTy = nullptr;
607 
608   // TODO: Use std::array instead?
609   FunctionCallee NsanGetShadowPtrForStore[FTValueType::kNumValueTypes] = {};
610   FunctionCallee NsanGetShadowPtrForLoad[FTValueType::kNumValueTypes] = {};
611   FunctionCallee NsanCheckValue[FTValueType::kNumValueTypes] = {};
612   FunctionCallee NsanFCmpFail[FTValueType::kNumValueTypes] = {};
613 
614   NsanMemOpFn NsanCopyFns;
615   NsanMemOpFn NsanSetUnknownFns;
616 
617   FunctionCallee NsanGetRawShadowTypePtr;
618   FunctionCallee NsanGetRawShadowPtr;
619   GlobalValue *NsanShadowRetTag = nullptr;
620 
621   Type *NsanShadowRetType = nullptr;
622   GlobalValue *NsanShadowRetPtr = nullptr;
623 
624   GlobalValue *NsanShadowArgsTag = nullptr;
625 
626   Type *NsanShadowArgsType = nullptr;
627   GlobalValue *NsanShadowArgsPtr = nullptr;
628 
629   std::optional<Regex> CheckFunctionsFilter;
630 };
631 } // end anonymous namespace
632 
633 PreservedAnalyses
634 NumericalStabilitySanitizerPass::run(Module &M, ModuleAnalysisManager &MAM) {
635   getOrCreateSanitizerCtorAndInitFunctions(
636       M, kNsanModuleCtorName, kNsanInitName, /*InitArgTypes=*/{},
637       /*InitArgs=*/{},
638       // This callback is invoked when the functions are created the first
639       // time. Hook them into the global ctors list in that case:
640       [&](Function *Ctor, FunctionCallee) { appendToGlobalCtors(M, Ctor, 0); });
641 
642   NumericalStabilitySanitizer Nsan(M);
643   auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
644   for (Function &F : M)
645     Nsan.sanitizeFunction(F, FAM.getResult<TargetLibraryAnalysis>(F));
646 
647   return PreservedAnalyses::none();
648 }
649 
650 static GlobalValue *createThreadLocalGV(const char *Name, Module &M, Type *Ty) {
651   return dyn_cast<GlobalValue>(M.getOrInsertGlobal(Name, Ty, [&M, Ty, Name] {
652     return new GlobalVariable(M, Ty, false, GlobalVariable::ExternalLinkage,
653                               nullptr, Name, nullptr,
654                               GlobalVariable::InitialExecTLSModel);
655   }));
656 }
657 
658 NumericalStabilitySanitizer::NumericalStabilitySanitizer(Module &M)
659     : DL(M.getDataLayout()), Context(M.getContext()), Config(Context),
660       NsanCopyFns(M, {"__nsan_copy_4", "__nsan_copy_8", "__nsan_copy_16"},
661                   "__nsan_copy_values", /*NumArgs=*/3),
662       NsanSetUnknownFns(M,
663                         {"__nsan_set_value_unknown_4",
664                          "__nsan_set_value_unknown_8",
665                          "__nsan_set_value_unknown_16"},
666                         "__nsan_set_value_unknown", /*NumArgs=*/2) {
667   IntptrTy = DL.getIntPtrType(Context);
668   Type *PtrTy = PointerType::getUnqual(Context);
669   Type *Int32Ty = Type::getInt32Ty(Context);
670   Type *Int1Ty = Type::getInt1Ty(Context);
671   Type *VoidTy = Type::getVoidTy(Context);
672 
673   AttributeList Attr;
674   Attr = Attr.addFnAttribute(Context, Attribute::NoUnwind);
675   // Initialize the runtime values (functions and global variables).
676   for (int I = 0; I < kNumValueTypes; ++I) {
677     const FTValueType VT = static_cast<FTValueType>(I);
678     const char *VTName = typeNameFromFTValueType(VT);
679     Type *VTTy = typeFromFTValueType(VT, Context);
680 
681     // Load/store.
682     const std::string GetterPrefix =
683         std::string("__nsan_get_shadow_ptr_for_") + VTName;
684     NsanGetShadowPtrForStore[VT] = M.getOrInsertFunction(
685         GetterPrefix + "_store", Attr, PtrTy, PtrTy, IntptrTy);
686     NsanGetShadowPtrForLoad[VT] = M.getOrInsertFunction(
687         GetterPrefix + "_load", Attr, PtrTy, PtrTy, IntptrTy);
688 
689     // Check.
690     const auto &ShadowConfig = Config.byValueType(VT);
691     Type *ShadowTy = ShadowConfig.getType(Context);
692     NsanCheckValue[VT] =
693         M.getOrInsertFunction(std::string("__nsan_internal_check_") + VTName +
694                                   "_" + ShadowConfig.getNsanTypeId(),
695                               Attr, Int32Ty, VTTy, ShadowTy, Int32Ty, IntptrTy);
696     NsanFCmpFail[VT] = M.getOrInsertFunction(
697         std::string("__nsan_fcmp_fail_") + VTName + "_" +
698             ShadowConfig.getNsanTypeId(),
699         Attr, VoidTy, VTTy, VTTy, ShadowTy, ShadowTy, Int32Ty, Int1Ty, Int1Ty);
700   }
701 
702   // TODO: Add attributes nofree, nosync, readnone, readonly,
703   NsanGetRawShadowTypePtr = M.getOrInsertFunction(
704       "__nsan_internal_get_raw_shadow_type_ptr", Attr, PtrTy, PtrTy);
705   NsanGetRawShadowPtr = M.getOrInsertFunction(
706       "__nsan_internal_get_raw_shadow_ptr", Attr, PtrTy, PtrTy);
707 
708   NsanShadowRetTag = createThreadLocalGV("__nsan_shadow_ret_tag", M, IntptrTy);
709 
710   NsanShadowRetType = ArrayType::get(Type::getInt8Ty(Context),
711                                      kMaxVectorWidth * kMaxShadowTypeSizeBytes);
712   NsanShadowRetPtr =
713       createThreadLocalGV("__nsan_shadow_ret_ptr", M, NsanShadowRetType);
714 
715   NsanShadowArgsTag =
716       createThreadLocalGV("__nsan_shadow_args_tag", M, IntptrTy);
717 
718   NsanShadowArgsType =
719       ArrayType::get(Type::getInt8Ty(Context),
720                      kMaxVectorWidth * kMaxNumArgs * kMaxShadowTypeSizeBytes);
721 
722   NsanShadowArgsPtr =
723       createThreadLocalGV("__nsan_shadow_args_ptr", M, NsanShadowArgsType);
724 
725   if (!ClCheckFunctionsFilter.empty()) {
726     Regex R = Regex(ClCheckFunctionsFilter);
727     std::string RegexError;
728     assert(R.isValid(RegexError));
729     CheckFunctionsFilter = std::move(R);
730   }
731 }
732 
733 // Returns true if the given LLVM Value points to constant data (typically, a
734 // global variable reference).
735 bool NumericalStabilitySanitizer::addrPointsToConstantData(Value *Addr) {
736   // If this is a GEP, just analyze its pointer operand.
737   if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Addr))
738     Addr = GEP->getPointerOperand();
739 
740   if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Addr))
741     return GV->isConstant();
742   return false;
743 }
744 
745 // This instruments the function entry to create shadow arguments.
746 // Pseudocode:
747 //   if (this_fn_ptr == __nsan_shadow_args_tag) {
748 //     s(arg0) = LOAD<sizeof(arg0)>(__nsan_shadow_args);
749 //     s(arg1) = LOAD<sizeof(arg1)>(__nsan_shadow_args + sizeof(arg0));
750 //     ...
751 //     __nsan_shadow_args_tag = 0;
752 //   } else {
753 //     s(arg0) = fext(arg0);
754 //     s(arg1) = fext(arg1);
755 //     ...
756 //   }
757 void NumericalStabilitySanitizer::createShadowArguments(
758     Function &F, const TargetLibraryInfo &TLI, ValueToShadowMap &Map) {
759   assert(!F.getIntrinsicID() && "found a definition of an intrinsic");
760 
761   // Do not bother if there are no FP args.
762   if (all_of(F.args(), [this](const Argument &Arg) {
763         return Config.getExtendedFPType(Arg.getType()) == nullptr;
764       }))
765     return;
766 
767   IRBuilder<> Builder(F.getEntryBlock().getFirstNonPHI());
768   // The function has shadow args if the shadow args tag matches the function
769   // address.
770   Value *HasShadowArgs = Builder.CreateICmpEQ(
771       Builder.CreateLoad(IntptrTy, NsanShadowArgsTag, /*isVolatile=*/false),
772       Builder.CreatePtrToInt(&F, IntptrTy));
773 
774   unsigned ShadowArgsOffsetBytes = 0;
775   for (Argument &Arg : F.args()) {
776     Type *VT = Arg.getType();
777     Type *ExtendedVT = Config.getExtendedFPType(VT);
778     if (ExtendedVT == nullptr)
779       continue; // Not an FT value.
780     Value *L = Builder.CreateAlignedLoad(
781         ExtendedVT,
782         Builder.CreateConstGEP2_64(NsanShadowArgsType, NsanShadowArgsPtr, 0,
783                                    ShadowArgsOffsetBytes),
784         Align(1), /*isVolatile=*/false);
785     Value *Shadow = Builder.CreateSelect(HasShadowArgs, L,
786                                          Builder.CreateFPExt(&Arg, ExtendedVT));
787     Map.setShadow(Arg, *Shadow);
788     TypeSize SlotSize = DL.getTypeStoreSize(ExtendedVT);
789     assert(!SlotSize.isScalable() && "unsupported");
790     ShadowArgsOffsetBytes += SlotSize;
791   }
792   Builder.CreateStore(ConstantInt::get(IntptrTy, 0), NsanShadowArgsTag);
793 }
794 
795 // Returns true if the instrumentation should emit code to check arguments
796 // before a function call.
797 static bool shouldCheckArgs(CallBase &CI, const TargetLibraryInfo &TLI,
798                             const std::optional<Regex> &CheckFunctionsFilter) {
799 
800   Function *Fn = CI.getCalledFunction();
801 
802   if (CheckFunctionsFilter) {
803     // Skip checking args of indirect calls.
804     if (Fn == nullptr)
805       return false;
806     if (CheckFunctionsFilter->match(Fn->getName()))
807       return true;
808     return false;
809   }
810 
811   if (Fn == nullptr)
812     return true; // Always check args of indirect calls.
813 
814   // Never check nsan functions, the user called them for a reason.
815   if (Fn->getName().starts_with("__nsan_"))
816     return false;
817 
818   const auto ID = Fn->getIntrinsicID();
819   LibFunc LFunc = LibFunc::NumLibFuncs;
820   // Always check args of unknown functions.
821   if (ID == Intrinsic::ID() && !TLI.getLibFunc(*Fn, LFunc))
822     return true;
823 
824   // Do not check args of an `fabs` call that is used for a comparison.
825   // This is typically used for `fabs(a-b) < tolerance`, where what matters is
826   // the result of the comparison, which is already caught be the fcmp checks.
827   if (ID == Intrinsic::fabs || LFunc == LibFunc_fabsf ||
828       LFunc == LibFunc_fabs || LFunc == LibFunc_fabsl)
829     for (const auto &U : CI.users())
830       if (isa<CmpInst>(U))
831         return false;
832 
833   return true; // Default is check.
834 }
835 
836 // Populates the shadow call stack (which contains shadow values for every
837 // floating-point parameter to the function).
838 void NumericalStabilitySanitizer::populateShadowStack(
839     CallBase &CI, const TargetLibraryInfo &TLI, const ValueToShadowMap &Map) {
840   // Do not create a shadow stack for inline asm.
841   if (CI.isInlineAsm())
842     return;
843 
844   // Do not bother if there are no FP args.
845   if (all_of(CI.operands(), [this](const Value *Arg) {
846         return Config.getExtendedFPType(Arg->getType()) == nullptr;
847       }))
848     return;
849 
850   IRBuilder<> Builder(&CI);
851   SmallVector<Value *, 8> ArgShadows;
852   const bool ShouldCheckArgs = shouldCheckArgs(CI, TLI, CheckFunctionsFilter);
853   for (auto [ArgIdx, Arg] : enumerate(CI.operands())) {
854     if (Config.getExtendedFPType(Arg->getType()) == nullptr)
855       continue; // Not an FT value.
856     Value *ArgShadow = Map.getShadow(Arg);
857     ArgShadows.push_back(ShouldCheckArgs ? emitCheck(Arg, ArgShadow, Builder,
858                                                      CheckLoc::makeArg(ArgIdx))
859                                          : ArgShadow);
860   }
861 
862   // Do not create shadow stacks for intrinsics/known lib funcs.
863   if (Function *Fn = CI.getCalledFunction()) {
864     LibFunc LFunc;
865     if (Fn->isIntrinsic() || TLI.getLibFunc(*Fn, LFunc))
866       return;
867   }
868 
869   // Set the shadow stack tag.
870   Builder.CreateStore(CI.getCalledOperand(), NsanShadowArgsTag);
871   TypeSize ShadowArgsOffsetBytes = TypeSize::getFixed(0);
872 
873   unsigned ShadowArgId = 0;
874   for (const Value *Arg : CI.operands()) {
875     Type *VT = Arg->getType();
876     Type *ExtendedVT = Config.getExtendedFPType(VT);
877     if (ExtendedVT == nullptr)
878       continue; // Not an FT value.
879     Builder.CreateAlignedStore(
880         ArgShadows[ShadowArgId++],
881         Builder.CreateConstGEP2_64(NsanShadowArgsType, NsanShadowArgsPtr, 0,
882                                    ShadowArgsOffsetBytes),
883         Align(1), /*isVolatile=*/false);
884     TypeSize SlotSize = DL.getTypeStoreSize(ExtendedVT);
885     assert(!SlotSize.isScalable() && "unsupported");
886     ShadowArgsOffsetBytes += SlotSize;
887   }
888 }
889 
890 // Internal part of emitCheck(). Returns a value that indicates whether
891 // computation should continue with the shadow or resume by re-fextending the
892 // value.
893 enum class ContinuationType { // Keep in sync with runtime.
894   ContinueWithShadow = 0,
895   ResumeFromValue = 1,
896 };
897 
898 Value *NumericalStabilitySanitizer::emitCheckInternal(Value *V, Value *ShadowV,
899                                                       IRBuilder<> &Builder,
900                                                       CheckLoc Loc) {
901   // Do not emit checks for constant values, this is redundant.
902   if (isa<Constant>(V))
903     return ConstantInt::get(
904         Builder.getInt32Ty(),
905         static_cast<int>(ContinuationType::ContinueWithShadow));
906 
907   Type *Ty = V->getType();
908   if (const auto VT = ftValueTypeFromType(Ty))
909     return Builder.CreateCall(
910         NsanCheckValue[*VT],
911         {V, ShadowV, Loc.getType(Context), Loc.getValue(IntptrTy, Builder)});
912 
913   if (Ty->isVectorTy()) {
914     auto *VecTy = cast<VectorType>(Ty);
915     // We currently skip scalable vector types in MappingConfig,
916     // thus we should not encounter any such types here.
917     assert(!VecTy->isScalableTy() &&
918            "Scalable vector types are not supported yet");
919     Value *CheckResult = nullptr;
920     for (int I = 0, E = VecTy->getElementCount().getFixedValue(); I < E; ++I) {
921       // We resume if any element resumes. Another option would be to create a
922       // vector shuffle with the array of ContinueWithShadow, but that is too
923       // complex.
924       Value *ExtractV = Builder.CreateExtractElement(V, I);
925       Value *ExtractShadowV = Builder.CreateExtractElement(ShadowV, I);
926       Value *ComponentCheckResult =
927           emitCheckInternal(ExtractV, ExtractShadowV, Builder, Loc);
928       CheckResult = CheckResult
929                         ? Builder.CreateOr(CheckResult, ComponentCheckResult)
930                         : ComponentCheckResult;
931     }
932     return CheckResult;
933   }
934   if (Ty->isArrayTy()) {
935     Value *CheckResult = nullptr;
936     for (auto I : seq(Ty->getArrayNumElements())) {
937       Value *ExtractV = Builder.CreateExtractElement(V, I);
938       Value *ExtractShadowV = Builder.CreateExtractElement(ShadowV, I);
939       Value *ComponentCheckResult =
940           emitCheckInternal(ExtractV, ExtractShadowV, Builder, Loc);
941       CheckResult = CheckResult
942                         ? Builder.CreateOr(CheckResult, ComponentCheckResult)
943                         : ComponentCheckResult;
944     }
945     return CheckResult;
946   }
947   if (Ty->isStructTy()) {
948     Value *CheckResult = nullptr;
949     for (auto I : seq(Ty->getStructNumElements())) {
950       if (Config.getExtendedFPType(Ty->getStructElementType(I)) == nullptr)
951         continue; // Only check FT values.
952       Value *ExtractV = Builder.CreateExtractValue(V, I);
953       Value *ExtractShadowV = Builder.CreateExtractElement(ShadowV, I);
954       Value *ComponentCheckResult =
955           emitCheckInternal(ExtractV, ExtractShadowV, Builder, Loc);
956       CheckResult = CheckResult
957                         ? Builder.CreateOr(CheckResult, ComponentCheckResult)
958                         : ComponentCheckResult;
959     }
960     if (!CheckResult)
961       return ConstantInt::get(
962           Builder.getInt32Ty(),
963           static_cast<int>(ContinuationType::ContinueWithShadow));
964     return CheckResult;
965   }
966 
967   llvm_unreachable("not implemented");
968 }
969 
970 // Inserts a runtime check of V against its shadow value ShadowV.
971 // We check values whenever they escape: on return, call, stores, and
972 // insertvalue.
973 // Returns the shadow value that should be used to continue the computations,
974 // depending on the answer from the runtime.
975 // TODO: Should we check on select ? phi ?
976 Value *NumericalStabilitySanitizer::emitCheck(Value *V, Value *ShadowV,
977                                               IRBuilder<> &Builder,
978                                               CheckLoc Loc) {
979   // Do not emit checks for constant values, this is redundant.
980   if (isa<Constant>(V))
981     return ShadowV;
982 
983   if (Instruction *Inst = dyn_cast<Instruction>(V)) {
984     Function *F = Inst->getFunction();
985     if (CheckFunctionsFilter && !CheckFunctionsFilter->match(F->getName())) {
986       return ShadowV;
987     }
988   }
989 
990   Value *CheckResult = emitCheckInternal(V, ShadowV, Builder, Loc);
991   Value *ICmpEQ = Builder.CreateICmpEQ(
992       CheckResult,
993       ConstantInt::get(Builder.getInt32Ty(),
994                        static_cast<int>(ContinuationType::ResumeFromValue)));
995   return Builder.CreateSelect(
996       ICmpEQ, Builder.CreateFPExt(V, Config.getExtendedFPType(V->getType())),
997       ShadowV);
998 }
999 
1000 // Inserts a check that fcmp on shadow values are consistent with that on base
1001 // values.
1002 void NumericalStabilitySanitizer::emitFCmpCheck(FCmpInst &FCmp,
1003                                                 const ValueToShadowMap &Map) {
1004   if (!ClInstrumentFCmp)
1005     return;
1006 
1007   Function *F = FCmp.getFunction();
1008   if (CheckFunctionsFilter && !CheckFunctionsFilter->match(F->getName()))
1009     return;
1010 
1011   Value *LHS = FCmp.getOperand(0);
1012   if (Config.getExtendedFPType(LHS->getType()) == nullptr)
1013     return;
1014   Value *RHS = FCmp.getOperand(1);
1015 
1016   // Split the basic block. On mismatch, we'll jump to the new basic block with
1017   // a call to the runtime for error reporting.
1018   BasicBlock *FCmpBB = FCmp.getParent();
1019   BasicBlock *NextBB = FCmpBB->splitBasicBlock(FCmp.getNextNode());
1020   // Remove the newly created terminator unconditional branch.
1021   FCmpBB->back().eraseFromParent();
1022   BasicBlock *FailBB =
1023       BasicBlock::Create(Context, "", FCmpBB->getParent(), NextBB);
1024 
1025   // Create the shadow fcmp and comparison between the fcmps.
1026   IRBuilder<> FCmpBuilder(FCmpBB);
1027   FCmpBuilder.SetCurrentDebugLocation(FCmp.getDebugLoc());
1028   Value *ShadowLHS = Map.getShadow(LHS);
1029   Value *ShadowRHS = Map.getShadow(RHS);
1030   // See comment on ClTruncateFCmpEq.
1031   if (FCmp.isEquality() && ClTruncateFCmpEq) {
1032     Type *Ty = ShadowLHS->getType();
1033     ShadowLHS = FCmpBuilder.CreateFPExt(
1034         FCmpBuilder.CreateFPTrunc(ShadowLHS, LHS->getType()), Ty);
1035     ShadowRHS = FCmpBuilder.CreateFPExt(
1036         FCmpBuilder.CreateFPTrunc(ShadowRHS, RHS->getType()), Ty);
1037   }
1038   Value *ShadowFCmp =
1039       FCmpBuilder.CreateFCmp(FCmp.getPredicate(), ShadowLHS, ShadowRHS);
1040   Value *OriginalAndShadowFcmpMatch =
1041       FCmpBuilder.CreateICmpEQ(&FCmp, ShadowFCmp);
1042 
1043   if (OriginalAndShadowFcmpMatch->getType()->isVectorTy()) {
1044     // If we have a vector type, `OriginalAndShadowFcmpMatch` is a vector of i1,
1045     // where an element is true if the corresponding elements in original and
1046     // shadow are the same. We want all elements to be 1.
1047     OriginalAndShadowFcmpMatch =
1048         FCmpBuilder.CreateAndReduce(OriginalAndShadowFcmpMatch);
1049   }
1050 
1051   // Use MDBuilder(*C).createLikelyBranchWeights() because "match" is the common
1052   // case.
1053   FCmpBuilder.CreateCondBr(OriginalAndShadowFcmpMatch, NextBB, FailBB,
1054                            MDBuilder(Context).createLikelyBranchWeights());
1055 
1056   // Fill in FailBB.
1057   IRBuilder<> FailBuilder(FailBB);
1058   FailBuilder.SetCurrentDebugLocation(FCmp.getDebugLoc());
1059 
1060   const auto EmitFailCall = [this, &FCmp, &FCmpBuilder,
1061                              &FailBuilder](Value *L, Value *R, Value *ShadowL,
1062                                            Value *ShadowR, Value *Result,
1063                                            Value *ShadowResult) {
1064     Type *FT = L->getType();
1065     FunctionCallee *Callee = nullptr;
1066     if (FT->isFloatTy()) {
1067       Callee = &(NsanFCmpFail[kFloat]);
1068     } else if (FT->isDoubleTy()) {
1069       Callee = &(NsanFCmpFail[kDouble]);
1070     } else if (FT->isX86_FP80Ty()) {
1071       // TODO: make NsanFCmpFailLongDouble work.
1072       Callee = &(NsanFCmpFail[kDouble]);
1073       L = FailBuilder.CreateFPTrunc(L, Type::getDoubleTy(Context));
1074       R = FailBuilder.CreateFPTrunc(L, Type::getDoubleTy(Context));
1075     } else {
1076       llvm_unreachable("not implemented");
1077     }
1078     FailBuilder.CreateCall(*Callee, {L, R, ShadowL, ShadowR,
1079                                      ConstantInt::get(FCmpBuilder.getInt32Ty(),
1080                                                       FCmp.getPredicate()),
1081                                      Result, ShadowResult});
1082   };
1083   if (LHS->getType()->isVectorTy()) {
1084     for (int I = 0, E = cast<VectorType>(LHS->getType())
1085                             ->getElementCount()
1086                             .getFixedValue();
1087          I < E; ++I) {
1088       Value *ExtractLHS = FailBuilder.CreateExtractElement(LHS, I);
1089       Value *ExtractRHS = FailBuilder.CreateExtractElement(RHS, I);
1090       Value *ExtractShaodwLHS = FailBuilder.CreateExtractElement(ShadowLHS, I);
1091       Value *ExtractShaodwRHS = FailBuilder.CreateExtractElement(ShadowRHS, I);
1092       Value *ExtractFCmp = FailBuilder.CreateExtractElement(&FCmp, I);
1093       Value *ExtractShadowFCmp =
1094           FailBuilder.CreateExtractElement(ShadowFCmp, I);
1095       EmitFailCall(ExtractLHS, ExtractRHS, ExtractShaodwLHS, ExtractShaodwRHS,
1096                    ExtractFCmp, ExtractShadowFCmp);
1097     }
1098   } else {
1099     EmitFailCall(LHS, RHS, ShadowLHS, ShadowRHS, &FCmp, ShadowFCmp);
1100   }
1101   FailBuilder.CreateBr(NextBB);
1102 
1103   ++NumInstrumentedFCmp;
1104 }
1105 
1106 // Creates a shadow phi value for any phi that defines a value of FT type.
1107 PHINode *NumericalStabilitySanitizer::maybeCreateShadowPhi(
1108     PHINode &Phi, const TargetLibraryInfo &TLI) {
1109   Type *VT = Phi.getType();
1110   Type *ExtendedVT = Config.getExtendedFPType(VT);
1111   if (ExtendedVT == nullptr)
1112     return nullptr; // Not an FT value.
1113   // The phi operands are shadow values and are not available when the phi is
1114   // created. They will be populated in a final phase, once all shadow values
1115   // have been created.
1116   PHINode *Shadow = PHINode::Create(ExtendedVT, Phi.getNumIncomingValues());
1117   Shadow->insertAfter(&Phi);
1118   return Shadow;
1119 }
1120 
1121 Value *NumericalStabilitySanitizer::handleLoad(LoadInst &Load, Type *VT,
1122                                                Type *ExtendedVT) {
1123   IRBuilder<> Builder(Load.getNextNode());
1124   Builder.SetCurrentDebugLocation(Load.getDebugLoc());
1125   if (addrPointsToConstantData(Load.getPointerOperand())) {
1126     // No need to look into the shadow memory, the value is a constant. Just
1127     // convert from FT to 2FT.
1128     return Builder.CreateFPExt(&Load, ExtendedVT);
1129   }
1130 
1131   // if (%shadowptr == &)
1132   //    %shadow = fpext %v
1133   // else
1134   //    %shadow = load (ptrcast %shadow_ptr))
1135   // Considered options here:
1136   //  - Have `NsanGetShadowPtrForLoad` return a fixed address
1137   //    &__nsan_unknown_value_shadow_address that is valid to load from, and
1138   //    use a select. This has the advantage that the generated IR is simpler.
1139   //  - Have `NsanGetShadowPtrForLoad` return nullptr.  Because `select` does
1140   //    not short-circuit, dereferencing the returned pointer is no longer an
1141   //    option, have to split and create a separate basic block. This has the
1142   //    advantage of being easier to debug because it crashes if we ever mess
1143   //    up.
1144 
1145   const auto Extents = getMemoryExtentsOrDie(VT);
1146   Value *ShadowPtr = Builder.CreateCall(
1147       NsanGetShadowPtrForLoad[Extents.ValueType],
1148       {Load.getPointerOperand(), ConstantInt::get(IntptrTy, Extents.NumElts)});
1149   ++NumInstrumentedFTLoads;
1150 
1151   // Split the basic block.
1152   BasicBlock *LoadBB = Load.getParent();
1153   BasicBlock *NextBB = LoadBB->splitBasicBlock(Builder.GetInsertPoint());
1154   // Create the two options for creating the shadow value.
1155   BasicBlock *ShadowLoadBB =
1156       BasicBlock::Create(Context, "", LoadBB->getParent(), NextBB);
1157   BasicBlock *FExtBB =
1158       BasicBlock::Create(Context, "", LoadBB->getParent(), NextBB);
1159 
1160   // Replace the newly created terminator unconditional branch by a conditional
1161   // branch to one of the options.
1162   {
1163     LoadBB->back().eraseFromParent();
1164     IRBuilder<> LoadBBBuilder(LoadBB); // The old builder has been invalidated.
1165     LoadBBBuilder.SetCurrentDebugLocation(Load.getDebugLoc());
1166     LoadBBBuilder.CreateCondBr(LoadBBBuilder.CreateIsNull(ShadowPtr), FExtBB,
1167                                ShadowLoadBB);
1168   }
1169 
1170   // Fill in ShadowLoadBB.
1171   IRBuilder<> ShadowLoadBBBuilder(ShadowLoadBB);
1172   ShadowLoadBBBuilder.SetCurrentDebugLocation(Load.getDebugLoc());
1173   Value *ShadowLoad = ShadowLoadBBBuilder.CreateAlignedLoad(
1174       ExtendedVT, ShadowPtr, Align(1), Load.isVolatile());
1175   if (ClCheckLoads) {
1176     ShadowLoad = emitCheck(&Load, ShadowLoad, ShadowLoadBBBuilder,
1177                            CheckLoc::makeLoad(Load.getPointerOperand()));
1178   }
1179   ShadowLoadBBBuilder.CreateBr(NextBB);
1180 
1181   // Fill in FExtBB.
1182   IRBuilder<> FExtBBBuilder(FExtBB);
1183   FExtBBBuilder.SetCurrentDebugLocation(Load.getDebugLoc());
1184   Value *FExt = FExtBBBuilder.CreateFPExt(&Load, ExtendedVT);
1185   FExtBBBuilder.CreateBr(NextBB);
1186 
1187   // The shadow value come from any of the options.
1188   IRBuilder<> NextBBBuilder(&*NextBB->begin());
1189   NextBBBuilder.SetCurrentDebugLocation(Load.getDebugLoc());
1190   PHINode *ShadowPhi = NextBBBuilder.CreatePHI(ExtendedVT, 2);
1191   ShadowPhi->addIncoming(ShadowLoad, ShadowLoadBB);
1192   ShadowPhi->addIncoming(FExt, FExtBB);
1193   return ShadowPhi;
1194 }
1195 
1196 Value *NumericalStabilitySanitizer::handleTrunc(const FPTruncInst &Trunc,
1197                                                 Type *VT, Type *ExtendedVT,
1198                                                 const ValueToShadowMap &Map,
1199                                                 IRBuilder<> &Builder) {
1200   Value *OrigSource = Trunc.getOperand(0);
1201   Type *OrigSourceTy = OrigSource->getType();
1202   Type *ExtendedSourceTy = Config.getExtendedFPType(OrigSourceTy);
1203 
1204   // When truncating:
1205   //  - (A) If the source has a shadow, we truncate from the shadow, else we
1206   //    truncate from the original source.
1207   //  - (B) If the shadow of the source is larger than the shadow of the dest,
1208   //    we still need a truncate. Else, the shadow of the source is the same
1209   //    type as the shadow of the dest (because mappings are non-decreasing), so
1210   //   we don't need to emit a truncate.
1211   // Examples,
1212   //   with a mapping of {f32->f64;f64->f80;f80->f128}
1213   //     fptrunc double   %1 to float     ->  fptrunc x86_fp80 s(%1) to double
1214   //     fptrunc x86_fp80 %1 to float     ->  fptrunc fp128    s(%1) to double
1215   //     fptrunc fp128    %1 to float     ->  fptrunc fp128    %1    to double
1216   //     fptrunc x86_fp80 %1 to double    ->  x86_fp80 s(%1)
1217   //     fptrunc fp128    %1 to double    ->  fptrunc fp128 %1 to x86_fp80
1218   //     fptrunc fp128    %1 to x86_fp80  ->  fp128 %1
1219   //   with a mapping of {f32->f64;f64->f128;f80->f128}
1220   //     fptrunc double   %1 to float     ->  fptrunc fp128    s(%1) to double
1221   //     fptrunc x86_fp80 %1 to float     ->  fptrunc fp128    s(%1) to double
1222   //     fptrunc fp128    %1 to float     ->  fptrunc fp128    %1    to double
1223   //     fptrunc x86_fp80 %1 to double    ->  fp128 %1
1224   //     fptrunc fp128    %1 to double    ->  fp128 %1
1225   //     fptrunc fp128    %1 to x86_fp80  ->  fp128 %1
1226   //   with a mapping of {f32->f32;f64->f32;f80->f64}
1227   //     fptrunc double   %1 to float     ->  float s(%1)
1228   //     fptrunc x86_fp80 %1 to float     ->  fptrunc double    s(%1) to float
1229   //     fptrunc fp128    %1 to float     ->  fptrunc fp128     %1    to float
1230   //     fptrunc x86_fp80 %1 to double    ->  fptrunc double    s(%1) to float
1231   //     fptrunc fp128    %1 to double    ->  fptrunc fp128     %1    to float
1232   //     fptrunc fp128    %1 to x86_fp80  ->  fptrunc fp128     %1    to double
1233 
1234   // See (A) above.
1235   Value *Source = ExtendedSourceTy ? Map.getShadow(OrigSource) : OrigSource;
1236   Type *SourceTy = ExtendedSourceTy ? ExtendedSourceTy : OrigSourceTy;
1237   // See (B) above.
1238   if (SourceTy == ExtendedVT)
1239     return Source;
1240 
1241   return Builder.CreateFPTrunc(Source, ExtendedVT);
1242 }
1243 
1244 Value *NumericalStabilitySanitizer::handleExt(const FPExtInst &Ext, Type *VT,
1245                                               Type *ExtendedVT,
1246                                               const ValueToShadowMap &Map,
1247                                               IRBuilder<> &Builder) {
1248   Value *OrigSource = Ext.getOperand(0);
1249   Type *OrigSourceTy = OrigSource->getType();
1250   Type *ExtendedSourceTy = Config.getExtendedFPType(OrigSourceTy);
1251   // When extending:
1252   //  - (A) If the source has a shadow, we extend from the shadow, else we
1253   //    extend from the original source.
1254   //  - (B) If the shadow of the dest is larger than the shadow of the source,
1255   //    we still need an extend. Else, the shadow of the source is the same
1256   //    type as the shadow of the dest (because mappings are non-decreasing), so
1257   //    we don't need to emit an extend.
1258   // Examples,
1259   //   with a mapping of {f32->f64;f64->f80;f80->f128}
1260   //     fpext half    %1 to float     ->  fpext half     %1    to double
1261   //     fpext half    %1 to double    ->  fpext half     %1    to x86_fp80
1262   //     fpext half    %1 to x86_fp80  ->  fpext half     %1    to fp128
1263   //     fpext float   %1 to double    ->  double s(%1)
1264   //     fpext float   %1 to x86_fp80  ->  fpext double   s(%1) to fp128
1265   //     fpext double  %1 to x86_fp80  ->  fpext x86_fp80 s(%1) to fp128
1266   //   with a mapping of {f32->f64;f64->f128;f80->f128}
1267   //     fpext half    %1 to float     ->  fpext half     %1    to double
1268   //     fpext half    %1 to double    ->  fpext half     %1    to fp128
1269   //     fpext half    %1 to x86_fp80  ->  fpext half     %1    to fp128
1270   //     fpext float   %1 to double    ->  fpext double   s(%1) to fp128
1271   //     fpext float   %1 to x86_fp80  ->  fpext double   s(%1) to fp128
1272   //     fpext double  %1 to x86_fp80  ->  fp128 s(%1)
1273   //   with a mapping of {f32->f32;f64->f32;f80->f64}
1274   //     fpext half    %1 to float     ->  fpext half     %1    to float
1275   //     fpext half    %1 to double    ->  fpext half     %1    to float
1276   //     fpext half    %1 to x86_fp80  ->  fpext half     %1    to double
1277   //     fpext float   %1 to double    ->  s(%1)
1278   //     fpext float   %1 to x86_fp80  ->  fpext float    s(%1) to double
1279   //     fpext double  %1 to x86_fp80  ->  fpext float    s(%1) to double
1280 
1281   // See (A) above.
1282   Value *Source = ExtendedSourceTy ? Map.getShadow(OrigSource) : OrigSource;
1283   Type *SourceTy = ExtendedSourceTy ? ExtendedSourceTy : OrigSourceTy;
1284   // See (B) above.
1285   if (SourceTy == ExtendedVT)
1286     return Source;
1287 
1288   return Builder.CreateFPExt(Source, ExtendedVT);
1289 }
1290 
1291 namespace {
1292 // TODO: This should be tablegen-ed.
1293 struct KnownIntrinsic {
1294   struct WidenedIntrinsic {
1295     const char *NarrowName;
1296     Intrinsic::ID ID; // wide id.
1297     using FnTypeFactory = FunctionType *(*)(LLVMContext &);
1298     FnTypeFactory MakeFnTy;
1299   };
1300 
1301   static const char *get(LibFunc LFunc);
1302 
1303   // Given an intrinsic with an `FT` argument, try to find a wider intrinsic
1304   // that applies the same operation on the shadow argument.
1305   // Options are:
1306   //  - pass in the ID and full function type,
1307   //  - pass in the name, which includes the function type through mangling.
1308   static const WidenedIntrinsic *widen(StringRef Name);
1309 
1310 private:
1311   struct LFEntry {
1312     LibFunc LFunc;
1313     const char *IntrinsicName;
1314   };
1315   static const LFEntry kLibfuncIntrinsics[];
1316 
1317   static const WidenedIntrinsic kWidenedIntrinsics[];
1318 };
1319 } // namespace
1320 
1321 static FunctionType *makeDoubleDouble(LLVMContext &C) {
1322   return FunctionType::get(Type::getDoubleTy(C), {Type::getDoubleTy(C)}, false);
1323 }
1324 
1325 static FunctionType *makeX86FP80X86FP80(LLVMContext &C) {
1326   return FunctionType::get(Type::getX86_FP80Ty(C), {Type::getX86_FP80Ty(C)},
1327                            false);
1328 }
1329 
1330 static FunctionType *makeDoubleDoubleI32(LLVMContext &C) {
1331   return FunctionType::get(Type::getDoubleTy(C),
1332                            {Type::getDoubleTy(C), Type::getInt32Ty(C)}, false);
1333 }
1334 
1335 static FunctionType *makeX86FP80X86FP80I32(LLVMContext &C) {
1336   return FunctionType::get(Type::getX86_FP80Ty(C),
1337                            {Type::getX86_FP80Ty(C), Type::getInt32Ty(C)},
1338                            false);
1339 }
1340 
1341 static FunctionType *makeDoubleDoubleDouble(LLVMContext &C) {
1342   return FunctionType::get(Type::getDoubleTy(C),
1343                            {Type::getDoubleTy(C), Type::getDoubleTy(C)}, false);
1344 }
1345 
1346 static FunctionType *makeX86FP80X86FP80X86FP80(LLVMContext &C) {
1347   return FunctionType::get(Type::getX86_FP80Ty(C),
1348                            {Type::getX86_FP80Ty(C), Type::getX86_FP80Ty(C)},
1349                            false);
1350 }
1351 
1352 static FunctionType *makeDoubleDoubleDoubleDouble(LLVMContext &C) {
1353   return FunctionType::get(
1354       Type::getDoubleTy(C),
1355       {Type::getDoubleTy(C), Type::getDoubleTy(C), Type::getDoubleTy(C)},
1356       false);
1357 }
1358 
1359 static FunctionType *makeX86FP80X86FP80X86FP80X86FP80(LLVMContext &C) {
1360   return FunctionType::get(
1361       Type::getX86_FP80Ty(C),
1362       {Type::getX86_FP80Ty(C), Type::getX86_FP80Ty(C), Type::getX86_FP80Ty(C)},
1363       false);
1364 }
1365 
1366 const KnownIntrinsic::WidenedIntrinsic KnownIntrinsic::kWidenedIntrinsics[] = {
1367     // TODO: Right now we ignore vector intrinsics.
1368     // This is hard because we have to model the semantics of the intrinsics,
1369     // e.g. llvm.x86.sse2.min.sd means extract first element, min, insert back.
1370     // Intrinsics that take any non-vector FT types:
1371     // NOTE: Right now because of
1372     // https://github.com/llvm/llvm-project/issues/44744
1373     // for f128 we need to use makeX86FP80X86FP80 (go to a lower precision and
1374     // come back).
1375     {"llvm.sqrt.f32", Intrinsic::sqrt, makeDoubleDouble},
1376     {"llvm.sqrt.f64", Intrinsic::sqrt, makeX86FP80X86FP80},
1377     {"llvm.sqrt.f80", Intrinsic::sqrt, makeX86FP80X86FP80},
1378     {"llvm.powi.f32", Intrinsic::powi, makeDoubleDoubleI32},
1379     {"llvm.powi.f64", Intrinsic::powi, makeX86FP80X86FP80I32},
1380     {"llvm.powi.f80", Intrinsic::powi, makeX86FP80X86FP80I32},
1381     {"llvm.sin.f32", Intrinsic::sin, makeDoubleDouble},
1382     {"llvm.sin.f64", Intrinsic::sin, makeX86FP80X86FP80},
1383     {"llvm.sin.f80", Intrinsic::sin, makeX86FP80X86FP80},
1384     {"llvm.cos.f32", Intrinsic::cos, makeDoubleDouble},
1385     {"llvm.cos.f64", Intrinsic::cos, makeX86FP80X86FP80},
1386     {"llvm.cos.f80", Intrinsic::cos, makeX86FP80X86FP80},
1387     {"llvm.pow.f32", Intrinsic::pow, makeDoubleDoubleDouble},
1388     {"llvm.pow.f64", Intrinsic::pow, makeX86FP80X86FP80X86FP80},
1389     {"llvm.pow.f80", Intrinsic::pow, makeX86FP80X86FP80X86FP80},
1390     {"llvm.exp.f32", Intrinsic::exp, makeDoubleDouble},
1391     {"llvm.exp.f64", Intrinsic::exp, makeX86FP80X86FP80},
1392     {"llvm.exp.f80", Intrinsic::exp, makeX86FP80X86FP80},
1393     {"llvm.exp2.f32", Intrinsic::exp2, makeDoubleDouble},
1394     {"llvm.exp2.f64", Intrinsic::exp2, makeX86FP80X86FP80},
1395     {"llvm.exp2.f80", Intrinsic::exp2, makeX86FP80X86FP80},
1396     {"llvm.log.f32", Intrinsic::log, makeDoubleDouble},
1397     {"llvm.log.f64", Intrinsic::log, makeX86FP80X86FP80},
1398     {"llvm.log.f80", Intrinsic::log, makeX86FP80X86FP80},
1399     {"llvm.log10.f32", Intrinsic::log10, makeDoubleDouble},
1400     {"llvm.log10.f64", Intrinsic::log10, makeX86FP80X86FP80},
1401     {"llvm.log10.f80", Intrinsic::log10, makeX86FP80X86FP80},
1402     {"llvm.log2.f32", Intrinsic::log2, makeDoubleDouble},
1403     {"llvm.log2.f64", Intrinsic::log2, makeX86FP80X86FP80},
1404     {"llvm.log2.f80", Intrinsic::log2, makeX86FP80X86FP80},
1405     {"llvm.fma.f32", Intrinsic::fma, makeDoubleDoubleDoubleDouble},
1406 
1407     {"llvm.fmuladd.f32", Intrinsic::fmuladd, makeDoubleDoubleDoubleDouble},
1408 
1409     {"llvm.fma.f64", Intrinsic::fma, makeX86FP80X86FP80X86FP80X86FP80},
1410 
1411     {"llvm.fmuladd.f64", Intrinsic::fma, makeX86FP80X86FP80X86FP80X86FP80},
1412 
1413     {"llvm.fma.f80", Intrinsic::fma, makeX86FP80X86FP80X86FP80X86FP80},
1414     {"llvm.fabs.f32", Intrinsic::fabs, makeDoubleDouble},
1415     {"llvm.fabs.f64", Intrinsic::fabs, makeX86FP80X86FP80},
1416     {"llvm.fabs.f80", Intrinsic::fabs, makeX86FP80X86FP80},
1417     {"llvm.minnum.f32", Intrinsic::minnum, makeDoubleDoubleDouble},
1418     {"llvm.minnum.f64", Intrinsic::minnum, makeX86FP80X86FP80X86FP80},
1419     {"llvm.minnum.f80", Intrinsic::minnum, makeX86FP80X86FP80X86FP80},
1420     {"llvm.maxnum.f32", Intrinsic::maxnum, makeDoubleDoubleDouble},
1421     {"llvm.maxnum.f64", Intrinsic::maxnum, makeX86FP80X86FP80X86FP80},
1422     {"llvm.maxnum.f80", Intrinsic::maxnum, makeX86FP80X86FP80X86FP80},
1423     {"llvm.minimum.f32", Intrinsic::minimum, makeDoubleDoubleDouble},
1424     {"llvm.minimum.f64", Intrinsic::minimum, makeX86FP80X86FP80X86FP80},
1425     {"llvm.minimum.f80", Intrinsic::minimum, makeX86FP80X86FP80X86FP80},
1426     {"llvm.maximum.f32", Intrinsic::maximum, makeDoubleDoubleDouble},
1427     {"llvm.maximum.f64", Intrinsic::maximum, makeX86FP80X86FP80X86FP80},
1428     {"llvm.maximum.f80", Intrinsic::maximum, makeX86FP80X86FP80X86FP80},
1429     {"llvm.copysign.f32", Intrinsic::copysign, makeDoubleDoubleDouble},
1430     {"llvm.copysign.f64", Intrinsic::copysign, makeX86FP80X86FP80X86FP80},
1431     {"llvm.copysign.f80", Intrinsic::copysign, makeX86FP80X86FP80X86FP80},
1432     {"llvm.floor.f32", Intrinsic::floor, makeDoubleDouble},
1433     {"llvm.floor.f64", Intrinsic::floor, makeX86FP80X86FP80},
1434     {"llvm.floor.f80", Intrinsic::floor, makeX86FP80X86FP80},
1435     {"llvm.ceil.f32", Intrinsic::ceil, makeDoubleDouble},
1436     {"llvm.ceil.f64", Intrinsic::ceil, makeX86FP80X86FP80},
1437     {"llvm.ceil.f80", Intrinsic::ceil, makeX86FP80X86FP80},
1438     {"llvm.trunc.f32", Intrinsic::trunc, makeDoubleDouble},
1439     {"llvm.trunc.f64", Intrinsic::trunc, makeX86FP80X86FP80},
1440     {"llvm.trunc.f80", Intrinsic::trunc, makeX86FP80X86FP80},
1441     {"llvm.rint.f32", Intrinsic::rint, makeDoubleDouble},
1442     {"llvm.rint.f64", Intrinsic::rint, makeX86FP80X86FP80},
1443     {"llvm.rint.f80", Intrinsic::rint, makeX86FP80X86FP80},
1444     {"llvm.nearbyint.f32", Intrinsic::nearbyint, makeDoubleDouble},
1445     {"llvm.nearbyint.f64", Intrinsic::nearbyint, makeX86FP80X86FP80},
1446     {"llvm.nearbyin80f64", Intrinsic::nearbyint, makeX86FP80X86FP80},
1447     {"llvm.round.f32", Intrinsic::round, makeDoubleDouble},
1448     {"llvm.round.f64", Intrinsic::round, makeX86FP80X86FP80},
1449     {"llvm.round.f80", Intrinsic::round, makeX86FP80X86FP80},
1450     {"llvm.lround.f32", Intrinsic::lround, makeDoubleDouble},
1451     {"llvm.lround.f64", Intrinsic::lround, makeX86FP80X86FP80},
1452     {"llvm.lround.f80", Intrinsic::lround, makeX86FP80X86FP80},
1453     {"llvm.llround.f32", Intrinsic::llround, makeDoubleDouble},
1454     {"llvm.llround.f64", Intrinsic::llround, makeX86FP80X86FP80},
1455     {"llvm.llround.f80", Intrinsic::llround, makeX86FP80X86FP80},
1456     {"llvm.lrint.f32", Intrinsic::lrint, makeDoubleDouble},
1457     {"llvm.lrint.f64", Intrinsic::lrint, makeX86FP80X86FP80},
1458     {"llvm.lrint.f80", Intrinsic::lrint, makeX86FP80X86FP80},
1459     {"llvm.llrint.f32", Intrinsic::llrint, makeDoubleDouble},
1460     {"llvm.llrint.f64", Intrinsic::llrint, makeX86FP80X86FP80},
1461     {"llvm.llrint.f80", Intrinsic::llrint, makeX86FP80X86FP80},
1462 };
1463 
1464 const KnownIntrinsic::LFEntry KnownIntrinsic::kLibfuncIntrinsics[] = {
1465     {LibFunc_sqrtf, "llvm.sqrt.f32"},
1466     {LibFunc_sqrt, "llvm.sqrt.f64"},
1467     {LibFunc_sqrtl, "llvm.sqrt.f80"},
1468     {LibFunc_sinf, "llvm.sin.f32"},
1469     {LibFunc_sin, "llvm.sin.f64"},
1470     {LibFunc_sinl, "llvm.sin.f80"},
1471     {LibFunc_cosf, "llvm.cos.f32"},
1472     {LibFunc_cos, "llvm.cos.f64"},
1473     {LibFunc_cosl, "llvm.cos.f80"},
1474     {LibFunc_powf, "llvm.pow.f32"},
1475     {LibFunc_pow, "llvm.pow.f64"},
1476     {LibFunc_powl, "llvm.pow.f80"},
1477     {LibFunc_expf, "llvm.exp.f32"},
1478     {LibFunc_exp, "llvm.exp.f64"},
1479     {LibFunc_expl, "llvm.exp.f80"},
1480     {LibFunc_exp2f, "llvm.exp2.f32"},
1481     {LibFunc_exp2, "llvm.exp2.f64"},
1482     {LibFunc_exp2l, "llvm.exp2.f80"},
1483     {LibFunc_logf, "llvm.log.f32"},
1484     {LibFunc_log, "llvm.log.f64"},
1485     {LibFunc_logl, "llvm.log.f80"},
1486     {LibFunc_log10f, "llvm.log10.f32"},
1487     {LibFunc_log10, "llvm.log10.f64"},
1488     {LibFunc_log10l, "llvm.log10.f80"},
1489     {LibFunc_log2f, "llvm.log2.f32"},
1490     {LibFunc_log2, "llvm.log2.f64"},
1491     {LibFunc_log2l, "llvm.log2.f80"},
1492     {LibFunc_fabsf, "llvm.fabs.f32"},
1493     {LibFunc_fabs, "llvm.fabs.f64"},
1494     {LibFunc_fabsl, "llvm.fabs.f80"},
1495     {LibFunc_copysignf, "llvm.copysign.f32"},
1496     {LibFunc_copysign, "llvm.copysign.f64"},
1497     {LibFunc_copysignl, "llvm.copysign.f80"},
1498     {LibFunc_floorf, "llvm.floor.f32"},
1499     {LibFunc_floor, "llvm.floor.f64"},
1500     {LibFunc_floorl, "llvm.floor.f80"},
1501     {LibFunc_fmaxf, "llvm.maxnum.f32"},
1502     {LibFunc_fmax, "llvm.maxnum.f64"},
1503     {LibFunc_fmaxl, "llvm.maxnum.f80"},
1504     {LibFunc_fminf, "llvm.minnum.f32"},
1505     {LibFunc_fmin, "llvm.minnum.f64"},
1506     {LibFunc_fminl, "llvm.minnum.f80"},
1507     {LibFunc_ceilf, "llvm.ceil.f32"},
1508     {LibFunc_ceil, "llvm.ceil.f64"},
1509     {LibFunc_ceill, "llvm.ceil.f80"},
1510     {LibFunc_truncf, "llvm.trunc.f32"},
1511     {LibFunc_trunc, "llvm.trunc.f64"},
1512     {LibFunc_truncl, "llvm.trunc.f80"},
1513     {LibFunc_rintf, "llvm.rint.f32"},
1514     {LibFunc_rint, "llvm.rint.f64"},
1515     {LibFunc_rintl, "llvm.rint.f80"},
1516     {LibFunc_nearbyintf, "llvm.nearbyint.f32"},
1517     {LibFunc_nearbyint, "llvm.nearbyint.f64"},
1518     {LibFunc_nearbyintl, "llvm.nearbyint.f80"},
1519     {LibFunc_roundf, "llvm.round.f32"},
1520     {LibFunc_round, "llvm.round.f64"},
1521     {LibFunc_roundl, "llvm.round.f80"},
1522 };
1523 
1524 const char *KnownIntrinsic::get(LibFunc LFunc) {
1525   for (const auto &E : kLibfuncIntrinsics) {
1526     if (E.LFunc == LFunc)
1527       return E.IntrinsicName;
1528   }
1529   return nullptr;
1530 }
1531 
1532 const KnownIntrinsic::WidenedIntrinsic *KnownIntrinsic::widen(StringRef Name) {
1533   for (const auto &E : kWidenedIntrinsics) {
1534     if (E.NarrowName == Name)
1535       return &E;
1536   }
1537   return nullptr;
1538 }
1539 
1540 // Returns the name of the LLVM intrinsic corresponding to the given function.
1541 static const char *getIntrinsicFromLibfunc(Function &Fn, Type *VT,
1542                                            const TargetLibraryInfo &TLI) {
1543   LibFunc LFunc;
1544   if (!TLI.getLibFunc(Fn, LFunc))
1545     return nullptr;
1546 
1547   if (const char *Name = KnownIntrinsic::get(LFunc))
1548     return Name;
1549 
1550   LLVM_DEBUG(errs() << "TODO: LibFunc: " << TLI.getName(LFunc) << "\n");
1551   return nullptr;
1552 }
1553 
1554 // Try to handle a known function call.
1555 Value *NumericalStabilitySanitizer::maybeHandleKnownCallBase(
1556     CallBase &Call, Type *VT, Type *ExtendedVT, const TargetLibraryInfo &TLI,
1557     const ValueToShadowMap &Map, IRBuilder<> &Builder) {
1558   Function *Fn = Call.getCalledFunction();
1559   if (Fn == nullptr)
1560     return nullptr;
1561 
1562   Intrinsic::ID WidenedId = Intrinsic::ID();
1563   FunctionType *WidenedFnTy = nullptr;
1564   if (const auto ID = Fn->getIntrinsicID()) {
1565     const auto *Widened = KnownIntrinsic::widen(Fn->getName());
1566     if (Widened) {
1567       WidenedId = Widened->ID;
1568       WidenedFnTy = Widened->MakeFnTy(Context);
1569     } else {
1570       // If we don't know how to widen the intrinsic, we have no choice but to
1571       // call the non-wide version on a truncated shadow and extend again
1572       // afterwards.
1573       WidenedId = ID;
1574       WidenedFnTy = Fn->getFunctionType();
1575     }
1576   } else if (const char *Name = getIntrinsicFromLibfunc(*Fn, VT, TLI)) {
1577     // We might have a call to a library function that we can replace with a
1578     // wider Intrinsic.
1579     const auto *Widened = KnownIntrinsic::widen(Name);
1580     assert(Widened && "make sure KnownIntrinsic entries are consistent");
1581     WidenedId = Widened->ID;
1582     WidenedFnTy = Widened->MakeFnTy(Context);
1583   } else {
1584     // This is not a known library function or intrinsic.
1585     return nullptr;
1586   }
1587 
1588   // Check that the widened intrinsic is valid.
1589   SmallVector<Intrinsic::IITDescriptor, 8> Table;
1590   getIntrinsicInfoTableEntries(WidenedId, Table);
1591   SmallVector<Type *, 4> ArgTys;
1592   ArrayRef<Intrinsic::IITDescriptor> TableRef = Table;
1593   [[maybe_unused]] Intrinsic::MatchIntrinsicTypesResult MatchResult =
1594       Intrinsic::matchIntrinsicSignature(WidenedFnTy, TableRef, ArgTys);
1595   assert(MatchResult == Intrinsic::MatchIntrinsicTypes_Match &&
1596          "invalid widened intrinsic");
1597   // For known intrinsic functions, we create a second call to the same
1598   // intrinsic with a different type.
1599   SmallVector<Value *, 4> Args;
1600   // The last operand is the intrinsic itself, skip it.
1601   for (unsigned I = 0, E = Call.getNumOperands() - 1; I < E; ++I) {
1602     Value *Arg = Call.getOperand(I);
1603     Type *OrigArgTy = Arg->getType();
1604     Type *IntrinsicArgTy = WidenedFnTy->getParamType(I);
1605     if (OrigArgTy == IntrinsicArgTy) {
1606       Args.push_back(Arg); // The arg is passed as is.
1607       continue;
1608     }
1609     Type *ShadowArgTy = Config.getExtendedFPType(Arg->getType());
1610     assert(ShadowArgTy &&
1611            "don't know how to get the shadow value for a non-FT");
1612     Value *Shadow = Map.getShadow(Arg);
1613     if (ShadowArgTy == IntrinsicArgTy) {
1614       // The shadow is the right type for the intrinsic.
1615       assert(Shadow->getType() == ShadowArgTy);
1616       Args.push_back(Shadow);
1617       continue;
1618     }
1619     // There is no intrinsic with his level of precision, truncate the shadow.
1620     Args.push_back(Builder.CreateFPTrunc(Shadow, IntrinsicArgTy));
1621   }
1622   Value *IntrinsicCall = Builder.CreateIntrinsic(WidenedId, ArgTys, Args);
1623   return WidenedFnTy->getReturnType() == ExtendedVT
1624              ? IntrinsicCall
1625              : Builder.CreateFPExt(IntrinsicCall, ExtendedVT);
1626 }
1627 
1628 // Handle a CallBase, i.e. a function call, an inline asm sequence, or an
1629 // invoke.
1630 Value *NumericalStabilitySanitizer::handleCallBase(CallBase &Call, Type *VT,
1631                                                    Type *ExtendedVT,
1632                                                    const TargetLibraryInfo &TLI,
1633                                                    const ValueToShadowMap &Map,
1634                                                    IRBuilder<> &Builder) {
1635   // We cannot look inside inline asm, just expand the result again.
1636   if (Call.isInlineAsm())
1637     return Builder.CreateFPExt(&Call, ExtendedVT);
1638 
1639   // Intrinsics and library functions (e.g. sin, exp) are handled
1640   // specifically, because we know their semantics and can do better than
1641   // blindly calling them (e.g. compute the sinus in the actual shadow domain).
1642   if (Value *V =
1643           maybeHandleKnownCallBase(Call, VT, ExtendedVT, TLI, Map, Builder))
1644     return V;
1645 
1646   // If the return tag matches that of the called function, read the extended
1647   // return value from the shadow ret ptr. Else, just extend the return value.
1648   Value *L =
1649       Builder.CreateLoad(IntptrTy, NsanShadowRetTag, /*isVolatile=*/false);
1650   Value *HasShadowRet = Builder.CreateICmpEQ(
1651       L, Builder.CreatePtrToInt(Call.getCalledOperand(), IntptrTy));
1652 
1653   Value *ShadowRetVal = Builder.CreateLoad(
1654       ExtendedVT,
1655       Builder.CreateConstGEP2_64(NsanShadowRetType, NsanShadowRetPtr, 0, 0),
1656       /*isVolatile=*/false);
1657   Value *Shadow = Builder.CreateSelect(HasShadowRet, ShadowRetVal,
1658                                        Builder.CreateFPExt(&Call, ExtendedVT));
1659   ++NumInstrumentedFTCalls;
1660   return Shadow;
1661 }
1662 
1663 // Creates a shadow value for the given FT value. At that point all operands are
1664 // guaranteed to be available.
1665 Value *NumericalStabilitySanitizer::createShadowValueWithOperandsAvailable(
1666     Instruction &Inst, const TargetLibraryInfo &TLI,
1667     const ValueToShadowMap &Map) {
1668   Type *VT = Inst.getType();
1669   Type *ExtendedVT = Config.getExtendedFPType(VT);
1670   assert(ExtendedVT != nullptr && "trying to create a shadow for a non-FT");
1671 
1672   if (auto *Load = dyn_cast<LoadInst>(&Inst))
1673     return handleLoad(*Load, VT, ExtendedVT);
1674 
1675   if (auto *Call = dyn_cast<CallInst>(&Inst)) {
1676     // Insert after the call.
1677     BasicBlock::iterator It(Inst);
1678     IRBuilder<> Builder(Call->getParent(), ++It);
1679     Builder.SetCurrentDebugLocation(Call->getDebugLoc());
1680     return handleCallBase(*Call, VT, ExtendedVT, TLI, Map, Builder);
1681   }
1682 
1683   if (auto *Invoke = dyn_cast<InvokeInst>(&Inst)) {
1684     // The Invoke terminates the basic block, create a new basic block in
1685     // between the successful invoke and the next block.
1686     BasicBlock *InvokeBB = Invoke->getParent();
1687     BasicBlock *NextBB = Invoke->getNormalDest();
1688     BasicBlock *NewBB =
1689         BasicBlock::Create(Context, "", NextBB->getParent(), NextBB);
1690     Inst.replaceSuccessorWith(NextBB, NewBB);
1691 
1692     IRBuilder<> Builder(NewBB);
1693     Builder.SetCurrentDebugLocation(Invoke->getDebugLoc());
1694     Value *Shadow = handleCallBase(*Invoke, VT, ExtendedVT, TLI, Map, Builder);
1695     Builder.CreateBr(NextBB);
1696     NewBB->replaceSuccessorsPhiUsesWith(InvokeBB, NewBB);
1697     return Shadow;
1698   }
1699 
1700   IRBuilder<> Builder(Inst.getNextNode());
1701   Builder.SetCurrentDebugLocation(Inst.getDebugLoc());
1702 
1703   if (auto *Trunc = dyn_cast<FPTruncInst>(&Inst))
1704     return handleTrunc(*Trunc, VT, ExtendedVT, Map, Builder);
1705   if (auto *Ext = dyn_cast<FPExtInst>(&Inst))
1706     return handleExt(*Ext, VT, ExtendedVT, Map, Builder);
1707 
1708   if (auto *UnaryOp = dyn_cast<UnaryOperator>(&Inst))
1709     return Builder.CreateUnOp(UnaryOp->getOpcode(),
1710                               Map.getShadow(UnaryOp->getOperand(0)));
1711 
1712   if (auto *BinOp = dyn_cast<BinaryOperator>(&Inst))
1713     return Builder.CreateBinOp(BinOp->getOpcode(),
1714                                Map.getShadow(BinOp->getOperand(0)),
1715                                Map.getShadow(BinOp->getOperand(1)));
1716 
1717   if (isa<UIToFPInst>(&Inst) || isa<SIToFPInst>(&Inst)) {
1718     auto *Cast = cast<CastInst>(&Inst);
1719     return Builder.CreateCast(Cast->getOpcode(), Cast->getOperand(0),
1720                               ExtendedVT);
1721   }
1722 
1723   if (auto *S = dyn_cast<SelectInst>(&Inst))
1724     return Builder.CreateSelect(S->getCondition(),
1725                                 Map.getShadow(S->getTrueValue()),
1726                                 Map.getShadow(S->getFalseValue()));
1727 
1728   if (auto *Freeze = dyn_cast<FreezeInst>(&Inst))
1729     return Builder.CreateFreeze(Map.getShadow(Freeze->getOperand(0)));
1730 
1731   if (auto *Extract = dyn_cast<ExtractElementInst>(&Inst))
1732     return Builder.CreateExtractElement(
1733         Map.getShadow(Extract->getVectorOperand()), Extract->getIndexOperand());
1734 
1735   if (auto *Insert = dyn_cast<InsertElementInst>(&Inst))
1736     return Builder.CreateInsertElement(Map.getShadow(Insert->getOperand(0)),
1737                                        Map.getShadow(Insert->getOperand(1)),
1738                                        Insert->getOperand(2));
1739 
1740   if (auto *Shuffle = dyn_cast<ShuffleVectorInst>(&Inst))
1741     return Builder.CreateShuffleVector(Map.getShadow(Shuffle->getOperand(0)),
1742                                        Map.getShadow(Shuffle->getOperand(1)),
1743                                        Shuffle->getShuffleMask());
1744   // TODO: We could make aggregate object first class citizens. For now we
1745   // just extend the extracted value.
1746   if (auto *Extract = dyn_cast<ExtractValueInst>(&Inst))
1747     return Builder.CreateFPExt(Extract, ExtendedVT);
1748 
1749   if (auto *BC = dyn_cast<BitCastInst>(&Inst))
1750     return Builder.CreateFPExt(BC, ExtendedVT);
1751 
1752   report_fatal_error("Unimplemented support for " +
1753                      Twine(Inst.getOpcodeName()));
1754 }
1755 
1756 // Creates a shadow value for an instruction that defines a value of FT type.
1757 // FT operands that do not already have shadow values are created recursively.
1758 // The DFS is guaranteed to not loop as phis and arguments already have
1759 // shadows.
1760 void NumericalStabilitySanitizer::maybeCreateShadowValue(
1761     Instruction &Root, const TargetLibraryInfo &TLI, ValueToShadowMap &Map) {
1762   Type *VT = Root.getType();
1763   Type *ExtendedVT = Config.getExtendedFPType(VT);
1764   if (ExtendedVT == nullptr)
1765     return; // Not an FT value.
1766 
1767   if (Map.hasShadow(&Root))
1768     return; // Shadow already exists.
1769 
1770   assert(!isa<PHINode>(Root) && "phi nodes should already have shadows");
1771 
1772   std::vector<Instruction *> DfsStack(1, &Root);
1773   while (!DfsStack.empty()) {
1774     // Ensure that all operands to the instruction have shadows before
1775     // proceeding.
1776     Instruction *I = DfsStack.back();
1777     // The shadow for the instruction might have been created deeper in the DFS,
1778     // see `forward_use_with_two_uses` test.
1779     if (Map.hasShadow(I)) {
1780       DfsStack.pop_back();
1781       continue;
1782     }
1783 
1784     bool MissingShadow = false;
1785     for (Value *Op : I->operands()) {
1786       Type *VT = Op->getType();
1787       if (!Config.getExtendedFPType(VT))
1788         continue; // Not an FT value.
1789       if (Map.hasShadow(Op))
1790         continue; // Shadow is already available.
1791       MissingShadow = true;
1792       DfsStack.push_back(cast<Instruction>(Op));
1793     }
1794     if (MissingShadow)
1795       continue; // Process operands and come back to this instruction later.
1796 
1797     // All operands have shadows. Create a shadow for the current value.
1798     Value *Shadow = createShadowValueWithOperandsAvailable(*I, TLI, Map);
1799     Map.setShadow(*I, *Shadow);
1800     DfsStack.pop_back();
1801   }
1802 }
1803 
1804 // A floating-point store needs its value and type written to shadow memory.
1805 void NumericalStabilitySanitizer::propagateFTStore(
1806     StoreInst &Store, Type *VT, Type *ExtendedVT, const ValueToShadowMap &Map) {
1807   Value *StoredValue = Store.getValueOperand();
1808   IRBuilder<> Builder(&Store);
1809   Builder.SetCurrentDebugLocation(Store.getDebugLoc());
1810   const auto Extents = getMemoryExtentsOrDie(VT);
1811   Value *ShadowPtr = Builder.CreateCall(
1812       NsanGetShadowPtrForStore[Extents.ValueType],
1813       {Store.getPointerOperand(), ConstantInt::get(IntptrTy, Extents.NumElts)});
1814 
1815   Value *StoredShadow = Map.getShadow(StoredValue);
1816   if (!Store.getParent()->getParent()->hasOptNone()) {
1817     // Only check stores when optimizing, because non-optimized code generates
1818     // too many stores to the stack, creating false positives.
1819     if (ClCheckStores) {
1820       StoredShadow = emitCheck(StoredValue, StoredShadow, Builder,
1821                                CheckLoc::makeStore(Store.getPointerOperand()));
1822       ++NumInstrumentedFTStores;
1823     }
1824   }
1825 
1826   Builder.CreateAlignedStore(StoredShadow, ShadowPtr, Align(1),
1827                              Store.isVolatile());
1828 }
1829 
1830 // A non-ft store needs to invalidate shadow memory. Exceptions are:
1831 //   - memory transfers of floating-point data through other pointer types (llvm
1832 //     optimization passes transform `*(float*)a = *(float*)b` into
1833 //     `*(i32*)a = *(i32*)b` ). These have the same semantics as memcpy.
1834 //   - Writes of FT-sized constants. LLVM likes to do float stores as bitcasted
1835 //     ints. Note that this is not really necessary because if the value is
1836 //     unknown the framework will re-extend it on load anyway. It just felt
1837 //     easier to debug tests with vectors of FTs.
1838 void NumericalStabilitySanitizer::propagateNonFTStore(
1839     StoreInst &Store, Type *VT, const ValueToShadowMap &Map) {
1840   Value *PtrOp = Store.getPointerOperand();
1841   IRBuilder<> Builder(Store.getNextNode());
1842   Builder.SetCurrentDebugLocation(Store.getDebugLoc());
1843   Value *Dst = PtrOp;
1844   TypeSize SlotSize = DL.getTypeStoreSize(VT);
1845   assert(!SlotSize.isScalable() && "unsupported");
1846   const auto LoadSizeBytes = SlotSize.getFixedValue();
1847   Value *ValueSize = Constant::getIntegerValue(
1848       IntptrTy, APInt(IntptrTy->getPrimitiveSizeInBits(), LoadSizeBytes));
1849 
1850   ++NumInstrumentedNonFTStores;
1851   Value *StoredValue = Store.getValueOperand();
1852   if (LoadInst *Load = dyn_cast<LoadInst>(StoredValue)) {
1853     // TODO: Handle the case when the value is from a phi.
1854     // This is a memory transfer with memcpy semantics. Copy the type and
1855     // value from the source. Note that we cannot use __nsan_copy_values()
1856     // here, because that will not work when there is a write to memory in
1857     // between the load and the store, e.g. in the case of a swap.
1858     Type *ShadowTypeIntTy = Type::getIntNTy(Context, 8 * LoadSizeBytes);
1859     Type *ShadowValueIntTy =
1860         Type::getIntNTy(Context, 8 * kShadowScale * LoadSizeBytes);
1861     IRBuilder<> LoadBuilder(Load->getNextNode());
1862     Builder.SetCurrentDebugLocation(Store.getDebugLoc());
1863     Value *LoadSrc = Load->getPointerOperand();
1864     // Read the shadow type and value at load time. The type has the same size
1865     // as the FT value, the value has twice its size.
1866     // TODO: cache them to avoid re-creating them when a load is used by
1867     // several stores. Maybe create them like the FT shadows when a load is
1868     // encountered.
1869     Value *RawShadowType = LoadBuilder.CreateAlignedLoad(
1870         ShadowTypeIntTy,
1871         LoadBuilder.CreateCall(NsanGetRawShadowTypePtr, {LoadSrc}), Align(1),
1872         /*isVolatile=*/false);
1873     Value *RawShadowValue = LoadBuilder.CreateAlignedLoad(
1874         ShadowValueIntTy,
1875         LoadBuilder.CreateCall(NsanGetRawShadowPtr, {LoadSrc}), Align(1),
1876         /*isVolatile=*/false);
1877 
1878     // Write back the shadow type and value at store time.
1879     Builder.CreateAlignedStore(
1880         RawShadowType, Builder.CreateCall(NsanGetRawShadowTypePtr, {Dst}),
1881         Align(1),
1882         /*isVolatile=*/false);
1883     Builder.CreateAlignedStore(RawShadowValue,
1884                                Builder.CreateCall(NsanGetRawShadowPtr, {Dst}),
1885                                Align(1),
1886                                /*isVolatile=*/false);
1887 
1888     ++NumInstrumentedNonFTMemcpyStores;
1889     return;
1890   }
1891   // ClPropagateNonFTConstStoresAsFT is by default false.
1892   if (Constant *C; ClPropagateNonFTConstStoresAsFT &&
1893                    (C = dyn_cast<Constant>(StoredValue))) {
1894     // This might be a fp constant stored as an int. Bitcast and store if it has
1895     // appropriate size.
1896     Type *BitcastTy = nullptr; // The FT type to bitcast to.
1897     if (auto *CInt = dyn_cast<ConstantInt>(C)) {
1898       switch (CInt->getType()->getScalarSizeInBits()) {
1899       case 32:
1900         BitcastTy = Type::getFloatTy(Context);
1901         break;
1902       case 64:
1903         BitcastTy = Type::getDoubleTy(Context);
1904         break;
1905       case 80:
1906         BitcastTy = Type::getX86_FP80Ty(Context);
1907         break;
1908       default:
1909         break;
1910       }
1911     } else if (auto *CDV = dyn_cast<ConstantDataVector>(C)) {
1912       const int NumElements =
1913           cast<VectorType>(CDV->getType())->getElementCount().getFixedValue();
1914       switch (CDV->getType()->getScalarSizeInBits()) {
1915       case 32:
1916         BitcastTy =
1917             VectorType::get(Type::getFloatTy(Context), NumElements, false);
1918         break;
1919       case 64:
1920         BitcastTy =
1921             VectorType::get(Type::getDoubleTy(Context), NumElements, false);
1922         break;
1923       case 80:
1924         BitcastTy =
1925             VectorType::get(Type::getX86_FP80Ty(Context), NumElements, false);
1926         break;
1927       default:
1928         break;
1929       }
1930     }
1931     if (BitcastTy) {
1932       const MemoryExtents Extents = getMemoryExtentsOrDie(BitcastTy);
1933       Value *ShadowPtr = Builder.CreateCall(
1934           NsanGetShadowPtrForStore[Extents.ValueType],
1935           {PtrOp, ConstantInt::get(IntptrTy, Extents.NumElts)});
1936       // Bitcast the integer value to the appropriate FT type and extend to 2FT.
1937       Type *ExtVT = Config.getExtendedFPType(BitcastTy);
1938       Value *Shadow =
1939           Builder.CreateFPExt(Builder.CreateBitCast(C, BitcastTy), ExtVT);
1940       Builder.CreateAlignedStore(Shadow, ShadowPtr, Align(1),
1941                                  Store.isVolatile());
1942       return;
1943     }
1944   }
1945   // All other stores just reset the shadow value to unknown.
1946   Builder.CreateCall(NsanSetUnknownFns.getFallback(), {Dst, ValueSize});
1947 }
1948 
1949 void NumericalStabilitySanitizer::propagateShadowValues(
1950     Instruction &Inst, const TargetLibraryInfo &TLI,
1951     const ValueToShadowMap &Map) {
1952   if (auto *Store = dyn_cast<StoreInst>(&Inst)) {
1953     Value *StoredValue = Store->getValueOperand();
1954     Type *VT = StoredValue->getType();
1955     Type *ExtendedVT = Config.getExtendedFPType(VT);
1956     if (ExtendedVT == nullptr)
1957       return propagateNonFTStore(*Store, VT, Map);
1958     return propagateFTStore(*Store, VT, ExtendedVT, Map);
1959   }
1960 
1961   if (auto *FCmp = dyn_cast<FCmpInst>(&Inst)) {
1962     emitFCmpCheck(*FCmp, Map);
1963     return;
1964   }
1965 
1966   if (auto *CB = dyn_cast<CallBase>(&Inst)) {
1967     maybeAddSuffixForNsanInterface(CB);
1968     if (CallInst *CI = dyn_cast<CallInst>(&Inst))
1969       maybeMarkSanitizerLibraryCallNoBuiltin(CI, &TLI);
1970     if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(&Inst)) {
1971       instrumentMemIntrinsic(MI);
1972       return;
1973     }
1974     populateShadowStack(*CB, TLI, Map);
1975     return;
1976   }
1977 
1978   if (auto *RetInst = dyn_cast<ReturnInst>(&Inst)) {
1979     if (!ClCheckRet)
1980       return;
1981 
1982     Value *RV = RetInst->getReturnValue();
1983     if (RV == nullptr)
1984       return; // This is a `ret void`.
1985     Type *VT = RV->getType();
1986     Type *ExtendedVT = Config.getExtendedFPType(VT);
1987     if (ExtendedVT == nullptr)
1988       return; // Not an FT ret.
1989     Value *RVShadow = Map.getShadow(RV);
1990     IRBuilder<> Builder(RetInst);
1991 
1992     RVShadow = emitCheck(RV, RVShadow, Builder, CheckLoc::makeRet());
1993     ++NumInstrumentedFTRets;
1994     // Store tag.
1995     Value *FnAddr =
1996         Builder.CreatePtrToInt(Inst.getParent()->getParent(), IntptrTy);
1997     Builder.CreateStore(FnAddr, NsanShadowRetTag);
1998     // Store value.
1999     Value *ShadowRetValPtr =
2000         Builder.CreateConstGEP2_64(NsanShadowRetType, NsanShadowRetPtr, 0, 0);
2001     Builder.CreateStore(RVShadow, ShadowRetValPtr);
2002     return;
2003   }
2004 
2005   if (InsertValueInst *Insert = dyn_cast<InsertValueInst>(&Inst)) {
2006     Value *V = Insert->getOperand(1);
2007     Type *VT = V->getType();
2008     Type *ExtendedVT = Config.getExtendedFPType(VT);
2009     if (ExtendedVT == nullptr)
2010       return;
2011     IRBuilder<> Builder(Insert);
2012     emitCheck(V, Map.getShadow(V), Builder, CheckLoc::makeInsert());
2013     return;
2014   }
2015 }
2016 
2017 // Moves fast math flags from the function to individual instructions, and
2018 // removes the attribute from the function.
2019 // TODO: Make this controllable with a flag.
2020 static void moveFastMathFlags(Function &F,
2021                               std::vector<Instruction *> &Instructions) {
2022   FastMathFlags FMF;
2023 #define MOVE_FLAG(attr, setter)                                                \
2024   if (F.getFnAttribute(attr).getValueAsString() == "true") {                   \
2025     F.removeFnAttr(attr);                                                      \
2026     FMF.set##setter();                                                         \
2027   }
2028   MOVE_FLAG("unsafe-fp-math", Fast)
2029   MOVE_FLAG("no-infs-fp-math", NoInfs)
2030   MOVE_FLAG("no-nans-fp-math", NoNaNs)
2031   MOVE_FLAG("no-signed-zeros-fp-math", NoSignedZeros)
2032 #undef MOVE_FLAG
2033 
2034   for (Instruction *I : Instructions)
2035     if (isa<FPMathOperator>(I))
2036       I->setFastMathFlags(FMF);
2037 }
2038 
2039 bool NumericalStabilitySanitizer::sanitizeFunction(
2040     Function &F, const TargetLibraryInfo &TLI) {
2041   if (!F.hasFnAttribute(Attribute::SanitizeNumericalStability) ||
2042       F.isDeclaration())
2043     return false;
2044 
2045   // This is required to prevent instrumenting call to __nsan_init from within
2046   // the module constructor.
2047   if (F.getName() == kNsanModuleCtorName)
2048     return false;
2049   SmallVector<Instruction *, 8> AllLoadsAndStores;
2050   SmallVector<Instruction *, 8> LocalLoadsAndStores;
2051 
2052   // The instrumentation maintains:
2053   //  - for each IR value `v` of floating-point (or vector floating-point) type
2054   //    FT, a shadow IR value `s(v)` with twice the precision 2FT (e.g.
2055   //    double for float and f128 for double).
2056   //  - A shadow memory, which stores `s(v)` for any `v` that has been stored,
2057   //    along with a shadow memory tag, which stores whether the value in the
2058   //    corresponding shadow memory is valid. Note that this might be
2059   //    incorrect if a non-instrumented function stores to memory, or if
2060   //    memory is stored to through a char pointer.
2061   //  - A shadow stack, which holds `s(v)` for any floating-point argument `v`
2062   //    of a call to an instrumented function. This allows
2063   //    instrumented functions to retrieve the shadow values for their
2064   //    arguments.
2065   //    Because instrumented functions can be called from non-instrumented
2066   //    functions, the stack needs to include a tag so that the instrumented
2067   //    function knows whether shadow values are available for their
2068   //    parameters (i.e. whether is was called by an instrumented function).
2069   //    When shadow arguments are not available, they have to be recreated by
2070   //    extending the precision of the non-shadow arguments to the non-shadow
2071   //    value. Non-instrumented functions do not modify (or even know about) the
2072   //    shadow stack. The shadow stack pointer is __nsan_shadow_args. The shadow
2073   //    stack tag is __nsan_shadow_args_tag. The tag is any unique identifier
2074   //    for the function (we use the address of the function). Both variables
2075   //    are thread local.
2076   //    Example:
2077   //     calls                             shadow stack tag      shadow stack
2078   //     =======================================================================
2079   //     non_instrumented_1()              0                     0
2080   //             |
2081   //             v
2082   //     instrumented_2(float a)           0                     0
2083   //             |
2084   //             v
2085   //     instrumented_3(float b, double c) &instrumented_3       s(b),s(c)
2086   //             |
2087   //             v
2088   //     instrumented_4(float d)           &instrumented_4       s(d)
2089   //             |
2090   //             v
2091   //     non_instrumented_5(float e)       &non_instrumented_5   s(e)
2092   //             |
2093   //             v
2094   //     instrumented_6(float f)           &non_instrumented_5   s(e)
2095   //
2096   //   On entry, instrumented_2 checks whether the tag corresponds to its
2097   //   function ptr.
2098   //   Note that functions reset the tag to 0 after reading shadow parameters.
2099   //   This ensures that the function does not erroneously read invalid data if
2100   //   called twice in the same stack, once from an instrumented function and
2101   //   once from an uninstrumented one. For example, in the following example,
2102   //   resetting the tag in (A) ensures that (B) does not reuse the same the
2103   //   shadow arguments (which would be incorrect).
2104   //      instrumented_1(float a)
2105   //             |
2106   //             v
2107   //      instrumented_2(float b)  (A)
2108   //             |
2109   //             v
2110   //      non_instrumented_3()
2111   //             |
2112   //             v
2113   //      instrumented_2(float b)  (B)
2114   //
2115   //  - A shadow return slot. Any function that returns a floating-point value
2116   //    places a shadow return value in __nsan_shadow_ret_val. Again, because
2117   //    we might be calling non-instrumented functions, this value is guarded
2118   //    by __nsan_shadow_ret_tag marker indicating which instrumented function
2119   //    placed the value in __nsan_shadow_ret_val, so that the caller can check
2120   //    that this corresponds to the callee. Both variables are thread local.
2121   //
2122   //    For example, in the following example, the instrumentation in
2123   //    `instrumented_1` rejects the shadow return value from `instrumented_3`
2124   //    because is is not tagged as expected (`&instrumented_3` instead of
2125   //    `non_instrumented_2`):
2126   //
2127   //        instrumented_1()
2128   //            |
2129   //            v
2130   //        float non_instrumented_2()
2131   //            |
2132   //            v
2133   //        float instrumented_3()
2134   //
2135   // Calls of known math functions (sin, cos, exp, ...) are duplicated to call
2136   // their overload on the shadow type.
2137 
2138   // Collect all instructions before processing, as creating shadow values
2139   // creates new instructions inside the function.
2140   std::vector<Instruction *> OriginalInstructions;
2141   for (BasicBlock &BB : F)
2142     for (Instruction &Inst : BB)
2143       OriginalInstructions.emplace_back(&Inst);
2144 
2145   moveFastMathFlags(F, OriginalInstructions);
2146   ValueToShadowMap ValueToShadow(Config);
2147 
2148   // In the first pass, we create shadow values for all FT function arguments
2149   // and all phis. This ensures that the DFS of the next pass does not have
2150   // any loops.
2151   std::vector<PHINode *> OriginalPhis;
2152   createShadowArguments(F, TLI, ValueToShadow);
2153   for (Instruction *I : OriginalInstructions) {
2154     if (PHINode *Phi = dyn_cast<PHINode>(I)) {
2155       if (PHINode *Shadow = maybeCreateShadowPhi(*Phi, TLI)) {
2156         OriginalPhis.push_back(Phi);
2157         ValueToShadow.setShadow(*Phi, *Shadow);
2158       }
2159     }
2160   }
2161 
2162   // Create shadow values for all instructions creating FT values.
2163   for (Instruction *I : OriginalInstructions)
2164     maybeCreateShadowValue(*I, TLI, ValueToShadow);
2165 
2166   // Propagate shadow values across stores, calls and rets.
2167   for (Instruction *I : OriginalInstructions)
2168     propagateShadowValues(*I, TLI, ValueToShadow);
2169 
2170   // The last pass populates shadow phis with shadow values.
2171   for (PHINode *Phi : OriginalPhis) {
2172     PHINode *ShadowPhi = cast<PHINode>(ValueToShadow.getShadow(Phi));
2173     for (unsigned I : seq(Phi->getNumOperands())) {
2174       Value *V = Phi->getOperand(I);
2175       Value *Shadow = ValueToShadow.getShadow(V);
2176       BasicBlock *IncomingBB = Phi->getIncomingBlock(I);
2177       // For some instructions (e.g. invoke), we create the shadow in a separate
2178       // block, different from the block where the original value is created.
2179       // In that case, the shadow phi might need to refer to this block instead
2180       // of the original block.
2181       // Note that this can only happen for instructions as constant shadows are
2182       // always created in the same block.
2183       ShadowPhi->addIncoming(Shadow, IncomingBB);
2184     }
2185   }
2186 
2187   return !ValueToShadow.empty();
2188 }
2189 
2190 static uint64_t GetMemOpSize(Value *V) {
2191   uint64_t OpSize = 0;
2192   if (Constant *C = dyn_cast<Constant>(V)) {
2193     auto *CInt = dyn_cast<ConstantInt>(C);
2194     if (CInt && CInt->getValue().getBitWidth() <= 64)
2195       OpSize = CInt->getValue().getZExtValue();
2196   }
2197 
2198   return OpSize;
2199 }
2200 
2201 // Instrument the memory intrinsics so that they properly modify the shadow
2202 // memory.
2203 bool NumericalStabilitySanitizer::instrumentMemIntrinsic(MemIntrinsic *MI) {
2204   IRBuilder<> Builder(MI);
2205   if (auto *M = dyn_cast<MemSetInst>(MI)) {
2206     FunctionCallee SetUnknownFn =
2207         NsanSetUnknownFns.getFunctionFor(GetMemOpSize(M->getArgOperand(2)));
2208     if (SetUnknownFn.getFunctionType()->getNumParams() == 1)
2209       Builder.CreateCall(SetUnknownFn, {/*Address=*/M->getArgOperand(0)});
2210     else
2211       Builder.CreateCall(SetUnknownFn,
2212                          {/*Address=*/M->getArgOperand(0),
2213                           /*Size=*/Builder.CreateIntCast(M->getArgOperand(2),
2214                                                          IntptrTy, false)});
2215 
2216   } else if (auto *M = dyn_cast<MemTransferInst>(MI)) {
2217     FunctionCallee CopyFn =
2218         NsanCopyFns.getFunctionFor(GetMemOpSize(M->getArgOperand(2)));
2219 
2220     if (CopyFn.getFunctionType()->getNumParams() == 2)
2221       Builder.CreateCall(CopyFn, {/*Destination=*/M->getArgOperand(0),
2222                                   /*Source=*/M->getArgOperand(1)});
2223     else
2224       Builder.CreateCall(CopyFn, {/*Destination=*/M->getArgOperand(0),
2225                                   /*Source=*/M->getArgOperand(1),
2226                                   /*Size=*/
2227                                   Builder.CreateIntCast(M->getArgOperand(2),
2228                                                         IntptrTy, false)});
2229   }
2230   return false;
2231 }
2232 
2233 void NumericalStabilitySanitizer::maybeAddSuffixForNsanInterface(CallBase *CI) {
2234   Function *Fn = CI->getCalledFunction();
2235   if (Fn == nullptr)
2236     return;
2237 
2238   if (!Fn->getName().starts_with("__nsan_"))
2239     return;
2240 
2241   if (Fn->getName() == "__nsan_dump_shadow_mem") {
2242     assert(CI->arg_size() == 4 &&
2243            "invalid prototype for __nsan_dump_shadow_mem");
2244     // __nsan_dump_shadow_mem requires an extra parameter with the dynamic
2245     // configuration:
2246     // (shadow_type_id_for_long_double << 16) | (shadow_type_id_for_double << 8)
2247     // | shadow_type_id_for_double
2248     const uint64_t shadow_value_type_ids =
2249         (static_cast<size_t>(Config.byValueType(kLongDouble).getNsanTypeId())
2250          << 16) |
2251         (static_cast<size_t>(Config.byValueType(kDouble).getNsanTypeId())
2252          << 8) |
2253         static_cast<size_t>(Config.byValueType(kFloat).getNsanTypeId());
2254     CI->setArgOperand(3, ConstantInt::get(IntptrTy, shadow_value_type_ids));
2255   }
2256 }
2257