xref: /llvm-project/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp (revision 4c7aa6f983685e93ebb8ad0cc41247bca0d6db9b)
1 //===- MemorySanitizer.cpp - detector of uninitialized reads --------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This file is a part of MemorySanitizer, a detector of uninitialized
11 /// reads.
12 ///
13 /// The algorithm of the tool is similar to Memcheck
14 /// (https://static.usenix.org/event/usenix05/tech/general/full_papers/seward/seward_html/usenix2005.html)
15 /// We associate a few shadow bits with every byte of the application memory,
16 /// poison the shadow of the malloc-ed or alloca-ed memory, load the shadow,
17 /// bits on every memory read, propagate the shadow bits through some of the
18 /// arithmetic instruction (including MOV), store the shadow bits on every
19 /// memory write, report a bug on some other instructions (e.g. JMP) if the
20 /// associated shadow is poisoned.
21 ///
22 /// But there are differences too. The first and the major one:
23 /// compiler instrumentation instead of binary instrumentation. This
24 /// gives us much better register allocation, possible compiler
25 /// optimizations and a fast start-up. But this brings the major issue
26 /// as well: msan needs to see all program events, including system
27 /// calls and reads/writes in system libraries, so we either need to
28 /// compile *everything* with msan or use a binary translation
29 /// component (e.g. DynamoRIO) to instrument pre-built libraries.
30 /// Another difference from Memcheck is that we use 8 shadow bits per
31 /// byte of application memory and use a direct shadow mapping. This
32 /// greatly simplifies the instrumentation code and avoids races on
33 /// shadow updates (Memcheck is single-threaded so races are not a
34 /// concern there. Memcheck uses 2 shadow bits per byte with a slow
35 /// path storage that uses 8 bits per byte).
36 ///
37 /// The default value of shadow is 0, which means "clean" (not poisoned).
38 ///
39 /// Every module initializer should call __msan_init to ensure that the
40 /// shadow memory is ready. On error, __msan_warning is called. Since
41 /// parameters and return values may be passed via registers, we have a
42 /// specialized thread-local shadow for return values
43 /// (__msan_retval_tls) and parameters (__msan_param_tls).
44 ///
45 ///                           Origin tracking.
46 ///
47 /// MemorySanitizer can track origins (allocation points) of all uninitialized
48 /// values. This behavior is controlled with a flag (msan-track-origins) and is
49 /// disabled by default.
50 ///
51 /// Origins are 4-byte values created and interpreted by the runtime library.
52 /// They are stored in a second shadow mapping, one 4-byte value for 4 bytes
53 /// of application memory. Propagation of origins is basically a bunch of
54 /// "select" instructions that pick the origin of a dirty argument, if an
55 /// instruction has one.
56 ///
57 /// Every 4 aligned, consecutive bytes of application memory have one origin
58 /// value associated with them. If these bytes contain uninitialized data
59 /// coming from 2 different allocations, the last store wins. Because of this,
60 /// MemorySanitizer reports can show unrelated origins, but this is unlikely in
61 /// practice.
62 ///
63 /// Origins are meaningless for fully initialized values, so MemorySanitizer
64 /// avoids storing origin to memory when a fully initialized value is stored.
65 /// This way it avoids needless overwriting origin of the 4-byte region on
66 /// a short (i.e. 1 byte) clean store, and it is also good for performance.
67 ///
68 ///                            Atomic handling.
69 ///
70 /// Ideally, every atomic store of application value should update the
71 /// corresponding shadow location in an atomic way. Unfortunately, atomic store
72 /// of two disjoint locations can not be done without severe slowdown.
73 ///
74 /// Therefore, we implement an approximation that may err on the safe side.
75 /// In this implementation, every atomically accessed location in the program
76 /// may only change from (partially) uninitialized to fully initialized, but
77 /// not the other way around. We load the shadow _after_ the application load,
78 /// and we store the shadow _before_ the app store. Also, we always store clean
79 /// shadow (if the application store is atomic). This way, if the store-load
80 /// pair constitutes a happens-before arc, shadow store and load are correctly
81 /// ordered such that the load will get either the value that was stored, or
82 /// some later value (which is always clean).
83 ///
84 /// This does not work very well with Compare-And-Swap (CAS) and
85 /// Read-Modify-Write (RMW) operations. To follow the above logic, CAS and RMW
86 /// must store the new shadow before the app operation, and load the shadow
87 /// after the app operation. Computers don't work this way. Current
88 /// implementation ignores the load aspect of CAS/RMW, always returning a clean
89 /// value. It implements the store part as a simple atomic store by storing a
90 /// clean shadow.
91 ///
92 ///                      Instrumenting inline assembly.
93 ///
94 /// For inline assembly code LLVM has little idea about which memory locations
95 /// become initialized depending on the arguments. It can be possible to figure
96 /// out which arguments are meant to point to inputs and outputs, but the
97 /// actual semantics can be only visible at runtime. In the Linux kernel it's
98 /// also possible that the arguments only indicate the offset for a base taken
99 /// from a segment register, so it's dangerous to treat any asm() arguments as
100 /// pointers. We take a conservative approach generating calls to
101 ///   __msan_instrument_asm_store(ptr, size)
102 /// , which defer the memory unpoisoning to the runtime library.
103 /// The latter can perform more complex address checks to figure out whether
104 /// it's safe to touch the shadow memory.
105 /// Like with atomic operations, we call __msan_instrument_asm_store() before
106 /// the assembly call, so that changes to the shadow memory will be seen by
107 /// other threads together with main memory initialization.
108 ///
109 ///                  KernelMemorySanitizer (KMSAN) implementation.
110 ///
111 /// The major differences between KMSAN and MSan instrumentation are:
112 ///  - KMSAN always tracks the origins and implies msan-keep-going=true;
113 ///  - KMSAN allocates shadow and origin memory for each page separately, so
114 ///    there are no explicit accesses to shadow and origin in the
115 ///    instrumentation.
116 ///    Shadow and origin values for a particular X-byte memory location
117 ///    (X=1,2,4,8) are accessed through pointers obtained via the
118 ///      __msan_metadata_ptr_for_load_X(ptr)
119 ///      __msan_metadata_ptr_for_store_X(ptr)
120 ///    functions. The corresponding functions check that the X-byte accesses
121 ///    are possible and returns the pointers to shadow and origin memory.
122 ///    Arbitrary sized accesses are handled with:
123 ///      __msan_metadata_ptr_for_load_n(ptr, size)
124 ///      __msan_metadata_ptr_for_store_n(ptr, size);
125 ///    Note that the sanitizer code has to deal with how shadow/origin pairs
126 ///    returned by the these functions are represented in different ABIs. In
127 ///    the X86_64 ABI they are returned in RDX:RAX, in PowerPC64 they are
128 ///    returned in r3 and r4, and in the SystemZ ABI they are written to memory
129 ///    pointed to by a hidden parameter.
130 ///  - TLS variables are stored in a single per-task struct. A call to a
131 ///    function __msan_get_context_state() returning a pointer to that struct
132 ///    is inserted into every instrumented function before the entry block;
133 ///  - __msan_warning() takes a 32-bit origin parameter;
134 ///  - local variables are poisoned with __msan_poison_alloca() upon function
135 ///    entry and unpoisoned with __msan_unpoison_alloca() before leaving the
136 ///    function;
137 ///  - the pass doesn't declare any global variables or add global constructors
138 ///    to the translation unit.
139 ///
140 /// Also, KMSAN currently ignores uninitialized memory passed into inline asm
141 /// calls, making sure we're on the safe side wrt. possible false positives.
142 ///
143 ///  KernelMemorySanitizer only supports X86_64, SystemZ and PowerPC64 at the
144 ///  moment.
145 ///
146 //
147 // FIXME: This sanitizer does not yet handle scalable vectors
148 //
149 //===----------------------------------------------------------------------===//
150 
151 #include "llvm/Transforms/Instrumentation/MemorySanitizer.h"
152 #include "llvm/ADT/APInt.h"
153 #include "llvm/ADT/ArrayRef.h"
154 #include "llvm/ADT/DenseMap.h"
155 #include "llvm/ADT/DepthFirstIterator.h"
156 #include "llvm/ADT/SetVector.h"
157 #include "llvm/ADT/SmallPtrSet.h"
158 #include "llvm/ADT/SmallVector.h"
159 #include "llvm/ADT/StringExtras.h"
160 #include "llvm/ADT/StringRef.h"
161 #include "llvm/Analysis/GlobalsModRef.h"
162 #include "llvm/Analysis/TargetLibraryInfo.h"
163 #include "llvm/Analysis/ValueTracking.h"
164 #include "llvm/IR/Argument.h"
165 #include "llvm/IR/AttributeMask.h"
166 #include "llvm/IR/Attributes.h"
167 #include "llvm/IR/BasicBlock.h"
168 #include "llvm/IR/CallingConv.h"
169 #include "llvm/IR/Constant.h"
170 #include "llvm/IR/Constants.h"
171 #include "llvm/IR/DataLayout.h"
172 #include "llvm/IR/DerivedTypes.h"
173 #include "llvm/IR/Function.h"
174 #include "llvm/IR/GlobalValue.h"
175 #include "llvm/IR/GlobalVariable.h"
176 #include "llvm/IR/IRBuilder.h"
177 #include "llvm/IR/InlineAsm.h"
178 #include "llvm/IR/InstVisitor.h"
179 #include "llvm/IR/InstrTypes.h"
180 #include "llvm/IR/Instruction.h"
181 #include "llvm/IR/Instructions.h"
182 #include "llvm/IR/IntrinsicInst.h"
183 #include "llvm/IR/Intrinsics.h"
184 #include "llvm/IR/IntrinsicsAArch64.h"
185 #include "llvm/IR/IntrinsicsX86.h"
186 #include "llvm/IR/MDBuilder.h"
187 #include "llvm/IR/Module.h"
188 #include "llvm/IR/Type.h"
189 #include "llvm/IR/Value.h"
190 #include "llvm/IR/ValueMap.h"
191 #include "llvm/Support/Alignment.h"
192 #include "llvm/Support/AtomicOrdering.h"
193 #include "llvm/Support/Casting.h"
194 #include "llvm/Support/CommandLine.h"
195 #include "llvm/Support/Debug.h"
196 #include "llvm/Support/DebugCounter.h"
197 #include "llvm/Support/ErrorHandling.h"
198 #include "llvm/Support/MathExtras.h"
199 #include "llvm/Support/raw_ostream.h"
200 #include "llvm/TargetParser/Triple.h"
201 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
202 #include "llvm/Transforms/Utils/Instrumentation.h"
203 #include "llvm/Transforms/Utils/Local.h"
204 #include "llvm/Transforms/Utils/ModuleUtils.h"
205 #include <algorithm>
206 #include <cassert>
207 #include <cstddef>
208 #include <cstdint>
209 #include <memory>
210 #include <string>
211 #include <tuple>
212 
213 using namespace llvm;
214 
215 #define DEBUG_TYPE "msan"
216 
217 DEBUG_COUNTER(DebugInsertCheck, "msan-insert-check",
218               "Controls which checks to insert");
219 
220 DEBUG_COUNTER(DebugInstrumentInstruction, "msan-instrument-instruction",
221               "Controls which instruction to instrument");
222 
223 static const unsigned kOriginSize = 4;
224 static const Align kMinOriginAlignment = Align(4);
225 static const Align kShadowTLSAlignment = Align(8);
226 
227 // These constants must be kept in sync with the ones in msan.h.
228 static const unsigned kParamTLSSize = 800;
229 static const unsigned kRetvalTLSSize = 800;
230 
231 // Accesses sizes are powers of two: 1, 2, 4, 8.
232 static const size_t kNumberOfAccessSizes = 4;
233 
234 /// Track origins of uninitialized values.
235 ///
236 /// Adds a section to MemorySanitizer report that points to the allocation
237 /// (stack or heap) the uninitialized bits came from originally.
238 static cl::opt<int> ClTrackOrigins(
239     "msan-track-origins",
240     cl::desc("Track origins (allocation sites) of poisoned memory"), cl::Hidden,
241     cl::init(0));
242 
243 static cl::opt<bool> ClKeepGoing("msan-keep-going",
244                                  cl::desc("keep going after reporting a UMR"),
245                                  cl::Hidden, cl::init(false));
246 
247 static cl::opt<bool>
248     ClPoisonStack("msan-poison-stack",
249                   cl::desc("poison uninitialized stack variables"), cl::Hidden,
250                   cl::init(true));
251 
252 static cl::opt<bool> ClPoisonStackWithCall(
253     "msan-poison-stack-with-call",
254     cl::desc("poison uninitialized stack variables with a call"), cl::Hidden,
255     cl::init(false));
256 
257 static cl::opt<int> ClPoisonStackPattern(
258     "msan-poison-stack-pattern",
259     cl::desc("poison uninitialized stack variables with the given pattern"),
260     cl::Hidden, cl::init(0xff));
261 
262 static cl::opt<bool>
263     ClPrintStackNames("msan-print-stack-names",
264                       cl::desc("Print name of local stack variable"),
265                       cl::Hidden, cl::init(true));
266 
267 static cl::opt<bool> ClPoisonUndef("msan-poison-undef",
268                                    cl::desc("poison undef temps"), cl::Hidden,
269                                    cl::init(true));
270 
271 static cl::opt<bool>
272     ClHandleICmp("msan-handle-icmp",
273                  cl::desc("propagate shadow through ICmpEQ and ICmpNE"),
274                  cl::Hidden, cl::init(true));
275 
276 static cl::opt<bool>
277     ClHandleICmpExact("msan-handle-icmp-exact",
278                       cl::desc("exact handling of relational integer ICmp"),
279                       cl::Hidden, cl::init(true));
280 
281 static cl::opt<bool> ClHandleLifetimeIntrinsics(
282     "msan-handle-lifetime-intrinsics",
283     cl::desc(
284         "when possible, poison scoped variables at the beginning of the scope "
285         "(slower, but more precise)"),
286     cl::Hidden, cl::init(true));
287 
288 // When compiling the Linux kernel, we sometimes see false positives related to
289 // MSan being unable to understand that inline assembly calls may initialize
290 // local variables.
291 // This flag makes the compiler conservatively unpoison every memory location
292 // passed into an assembly call. Note that this may cause false positives.
293 // Because it's impossible to figure out the array sizes, we can only unpoison
294 // the first sizeof(type) bytes for each type* pointer.
295 static cl::opt<bool> ClHandleAsmConservative(
296     "msan-handle-asm-conservative",
297     cl::desc("conservative handling of inline assembly"), cl::Hidden,
298     cl::init(true));
299 
300 // This flag controls whether we check the shadow of the address
301 // operand of load or store. Such bugs are very rare, since load from
302 // a garbage address typically results in SEGV, but still happen
303 // (e.g. only lower bits of address are garbage, or the access happens
304 // early at program startup where malloc-ed memory is more likely to
305 // be zeroed. As of 2012-08-28 this flag adds 20% slowdown.
306 static cl::opt<bool> ClCheckAccessAddress(
307     "msan-check-access-address",
308     cl::desc("report accesses through a pointer which has poisoned shadow"),
309     cl::Hidden, cl::init(true));
310 
311 static cl::opt<bool> ClEagerChecks(
312     "msan-eager-checks",
313     cl::desc("check arguments and return values at function call boundaries"),
314     cl::Hidden, cl::init(false));
315 
316 static cl::opt<bool> ClDumpStrictInstructions(
317     "msan-dump-strict-instructions",
318     cl::desc("print out instructions with default strict semantics"),
319     cl::Hidden, cl::init(false));
320 
321 static cl::opt<bool> ClDumpStrictIntrinsics(
322     "msan-dump-strict-intrinsics",
323     cl::desc("Prints 'unknown' intrinsics that were handled heuristically. "
324              "Use -msan-dump-strict-instructions to print intrinsics that "
325              "could not be handled exactly nor heuristically."),
326     cl::Hidden, cl::init(false));
327 
328 static cl::opt<int> ClInstrumentationWithCallThreshold(
329     "msan-instrumentation-with-call-threshold",
330     cl::desc(
331         "If the function being instrumented requires more than "
332         "this number of checks and origin stores, use callbacks instead of "
333         "inline checks (-1 means never use callbacks)."),
334     cl::Hidden, cl::init(3500));
335 
336 static cl::opt<bool>
337     ClEnableKmsan("msan-kernel",
338                   cl::desc("Enable KernelMemorySanitizer instrumentation"),
339                   cl::Hidden, cl::init(false));
340 
341 static cl::opt<bool>
342     ClDisableChecks("msan-disable-checks",
343                     cl::desc("Apply no_sanitize to the whole file"), cl::Hidden,
344                     cl::init(false));
345 
346 static cl::opt<bool>
347     ClCheckConstantShadow("msan-check-constant-shadow",
348                           cl::desc("Insert checks for constant shadow values"),
349                           cl::Hidden, cl::init(true));
350 
351 // This is off by default because of a bug in gold:
352 // https://sourceware.org/bugzilla/show_bug.cgi?id=19002
353 static cl::opt<bool>
354     ClWithComdat("msan-with-comdat",
355                  cl::desc("Place MSan constructors in comdat sections"),
356                  cl::Hidden, cl::init(false));
357 
358 // These options allow to specify custom memory map parameters
359 // See MemoryMapParams for details.
360 static cl::opt<uint64_t> ClAndMask("msan-and-mask",
361                                    cl::desc("Define custom MSan AndMask"),
362                                    cl::Hidden, cl::init(0));
363 
364 static cl::opt<uint64_t> ClXorMask("msan-xor-mask",
365                                    cl::desc("Define custom MSan XorMask"),
366                                    cl::Hidden, cl::init(0));
367 
368 static cl::opt<uint64_t> ClShadowBase("msan-shadow-base",
369                                       cl::desc("Define custom MSan ShadowBase"),
370                                       cl::Hidden, cl::init(0));
371 
372 static cl::opt<uint64_t> ClOriginBase("msan-origin-base",
373                                       cl::desc("Define custom MSan OriginBase"),
374                                       cl::Hidden, cl::init(0));
375 
376 static cl::opt<int>
377     ClDisambiguateWarning("msan-disambiguate-warning-threshold",
378                           cl::desc("Define threshold for number of checks per "
379                                    "debug location to force origin update."),
380                           cl::Hidden, cl::init(3));
381 
382 const char kMsanModuleCtorName[] = "msan.module_ctor";
383 const char kMsanInitName[] = "__msan_init";
384 
385 namespace {
386 
387 // Memory map parameters used in application-to-shadow address calculation.
388 // Offset = (Addr & ~AndMask) ^ XorMask
389 // Shadow = ShadowBase + Offset
390 // Origin = OriginBase + Offset
391 struct MemoryMapParams {
392   uint64_t AndMask;
393   uint64_t XorMask;
394   uint64_t ShadowBase;
395   uint64_t OriginBase;
396 };
397 
398 struct PlatformMemoryMapParams {
399   const MemoryMapParams *bits32;
400   const MemoryMapParams *bits64;
401 };
402 
403 } // end anonymous namespace
404 
405 // i386 Linux
406 static const MemoryMapParams Linux_I386_MemoryMapParams = {
407     0x000080000000, // AndMask
408     0,              // XorMask (not used)
409     0,              // ShadowBase (not used)
410     0x000040000000, // OriginBase
411 };
412 
413 // x86_64 Linux
414 static const MemoryMapParams Linux_X86_64_MemoryMapParams = {
415     0,              // AndMask (not used)
416     0x500000000000, // XorMask
417     0,              // ShadowBase (not used)
418     0x100000000000, // OriginBase
419 };
420 
421 // mips32 Linux
422 // FIXME: Remove -msan-origin-base -msan-and-mask added by PR #109284 to tests
423 // after picking good constants
424 
425 // mips64 Linux
426 static const MemoryMapParams Linux_MIPS64_MemoryMapParams = {
427     0,              // AndMask (not used)
428     0x008000000000, // XorMask
429     0,              // ShadowBase (not used)
430     0x002000000000, // OriginBase
431 };
432 
433 // ppc32 Linux
434 // FIXME: Remove -msan-origin-base -msan-and-mask added by PR #109284 to tests
435 // after picking good constants
436 
437 // ppc64 Linux
438 static const MemoryMapParams Linux_PowerPC64_MemoryMapParams = {
439     0xE00000000000, // AndMask
440     0x100000000000, // XorMask
441     0x080000000000, // ShadowBase
442     0x1C0000000000, // OriginBase
443 };
444 
445 // s390x Linux
446 static const MemoryMapParams Linux_S390X_MemoryMapParams = {
447     0xC00000000000, // AndMask
448     0,              // XorMask (not used)
449     0x080000000000, // ShadowBase
450     0x1C0000000000, // OriginBase
451 };
452 
453 // arm32 Linux
454 // FIXME: Remove -msan-origin-base -msan-and-mask added by PR #109284 to tests
455 // after picking good constants
456 
457 // aarch64 Linux
458 static const MemoryMapParams Linux_AArch64_MemoryMapParams = {
459     0,               // AndMask (not used)
460     0x0B00000000000, // XorMask
461     0,               // ShadowBase (not used)
462     0x0200000000000, // OriginBase
463 };
464 
465 // loongarch64 Linux
466 static const MemoryMapParams Linux_LoongArch64_MemoryMapParams = {
467     0,              // AndMask (not used)
468     0x500000000000, // XorMask
469     0,              // ShadowBase (not used)
470     0x100000000000, // OriginBase
471 };
472 
473 // riscv32 Linux
474 // FIXME: Remove -msan-origin-base -msan-and-mask added by PR #109284 to tests
475 // after picking good constants
476 
477 // aarch64 FreeBSD
478 static const MemoryMapParams FreeBSD_AArch64_MemoryMapParams = {
479     0x1800000000000, // AndMask
480     0x0400000000000, // XorMask
481     0x0200000000000, // ShadowBase
482     0x0700000000000, // OriginBase
483 };
484 
485 // i386 FreeBSD
486 static const MemoryMapParams FreeBSD_I386_MemoryMapParams = {
487     0x000180000000, // AndMask
488     0x000040000000, // XorMask
489     0x000020000000, // ShadowBase
490     0x000700000000, // OriginBase
491 };
492 
493 // x86_64 FreeBSD
494 static const MemoryMapParams FreeBSD_X86_64_MemoryMapParams = {
495     0xc00000000000, // AndMask
496     0x200000000000, // XorMask
497     0x100000000000, // ShadowBase
498     0x380000000000, // OriginBase
499 };
500 
501 // x86_64 NetBSD
502 static const MemoryMapParams NetBSD_X86_64_MemoryMapParams = {
503     0,              // AndMask
504     0x500000000000, // XorMask
505     0,              // ShadowBase
506     0x100000000000, // OriginBase
507 };
508 
509 static const PlatformMemoryMapParams Linux_X86_MemoryMapParams = {
510     &Linux_I386_MemoryMapParams,
511     &Linux_X86_64_MemoryMapParams,
512 };
513 
514 static const PlatformMemoryMapParams Linux_MIPS_MemoryMapParams = {
515     nullptr,
516     &Linux_MIPS64_MemoryMapParams,
517 };
518 
519 static const PlatformMemoryMapParams Linux_PowerPC_MemoryMapParams = {
520     nullptr,
521     &Linux_PowerPC64_MemoryMapParams,
522 };
523 
524 static const PlatformMemoryMapParams Linux_S390_MemoryMapParams = {
525     nullptr,
526     &Linux_S390X_MemoryMapParams,
527 };
528 
529 static const PlatformMemoryMapParams Linux_ARM_MemoryMapParams = {
530     nullptr,
531     &Linux_AArch64_MemoryMapParams,
532 };
533 
534 static const PlatformMemoryMapParams Linux_LoongArch_MemoryMapParams = {
535     nullptr,
536     &Linux_LoongArch64_MemoryMapParams,
537 };
538 
539 static const PlatformMemoryMapParams FreeBSD_ARM_MemoryMapParams = {
540     nullptr,
541     &FreeBSD_AArch64_MemoryMapParams,
542 };
543 
544 static const PlatformMemoryMapParams FreeBSD_X86_MemoryMapParams = {
545     &FreeBSD_I386_MemoryMapParams,
546     &FreeBSD_X86_64_MemoryMapParams,
547 };
548 
549 static const PlatformMemoryMapParams NetBSD_X86_MemoryMapParams = {
550     nullptr,
551     &NetBSD_X86_64_MemoryMapParams,
552 };
553 
554 namespace {
555 
556 /// Instrument functions of a module to detect uninitialized reads.
557 ///
558 /// Instantiating MemorySanitizer inserts the msan runtime library API function
559 /// declarations into the module if they don't exist already. Instantiating
560 /// ensures the __msan_init function is in the list of global constructors for
561 /// the module.
562 class MemorySanitizer {
563 public:
564   MemorySanitizer(Module &M, MemorySanitizerOptions Options)
565       : CompileKernel(Options.Kernel), TrackOrigins(Options.TrackOrigins),
566         Recover(Options.Recover), EagerChecks(Options.EagerChecks) {
567     initializeModule(M);
568   }
569 
570   // MSan cannot be moved or copied because of MapParams.
571   MemorySanitizer(MemorySanitizer &&) = delete;
572   MemorySanitizer &operator=(MemorySanitizer &&) = delete;
573   MemorySanitizer(const MemorySanitizer &) = delete;
574   MemorySanitizer &operator=(const MemorySanitizer &) = delete;
575 
576   bool sanitizeFunction(Function &F, TargetLibraryInfo &TLI);
577 
578 private:
579   friend struct MemorySanitizerVisitor;
580   friend struct VarArgHelperBase;
581   friend struct VarArgAMD64Helper;
582   friend struct VarArgAArch64Helper;
583   friend struct VarArgPowerPCHelper;
584   friend struct VarArgSystemZHelper;
585   friend struct VarArgI386Helper;
586   friend struct VarArgGenericHelper;
587 
588   void initializeModule(Module &M);
589   void initializeCallbacks(Module &M, const TargetLibraryInfo &TLI);
590   void createKernelApi(Module &M, const TargetLibraryInfo &TLI);
591   void createUserspaceApi(Module &M, const TargetLibraryInfo &TLI);
592 
593   template <typename... ArgsTy>
594   FunctionCallee getOrInsertMsanMetadataFunction(Module &M, StringRef Name,
595                                                  ArgsTy... Args);
596 
597   /// True if we're compiling the Linux kernel.
598   bool CompileKernel;
599   /// Track origins (allocation points) of uninitialized values.
600   int TrackOrigins;
601   bool Recover;
602   bool EagerChecks;
603 
604   Triple TargetTriple;
605   LLVMContext *C;
606   Type *IntptrTy; ///< Integer type with the size of a ptr in default AS.
607   Type *OriginTy;
608   PointerType *PtrTy; ///< Integer type with the size of a ptr in default AS.
609 
610   // XxxTLS variables represent the per-thread state in MSan and per-task state
611   // in KMSAN.
612   // For the userspace these point to thread-local globals. In the kernel land
613   // they point to the members of a per-task struct obtained via a call to
614   // __msan_get_context_state().
615 
616   /// Thread-local shadow storage for function parameters.
617   Value *ParamTLS;
618 
619   /// Thread-local origin storage for function parameters.
620   Value *ParamOriginTLS;
621 
622   /// Thread-local shadow storage for function return value.
623   Value *RetvalTLS;
624 
625   /// Thread-local origin storage for function return value.
626   Value *RetvalOriginTLS;
627 
628   /// Thread-local shadow storage for in-register va_arg function.
629   Value *VAArgTLS;
630 
631   /// Thread-local shadow storage for in-register va_arg function.
632   Value *VAArgOriginTLS;
633 
634   /// Thread-local shadow storage for va_arg overflow area.
635   Value *VAArgOverflowSizeTLS;
636 
637   /// Are the instrumentation callbacks set up?
638   bool CallbacksInitialized = false;
639 
640   /// The run-time callback to print a warning.
641   FunctionCallee WarningFn;
642 
643   // These arrays are indexed by log2(AccessSize).
644   FunctionCallee MaybeWarningFn[kNumberOfAccessSizes];
645   FunctionCallee MaybeStoreOriginFn[kNumberOfAccessSizes];
646 
647   /// Run-time helper that generates a new origin value for a stack
648   /// allocation.
649   FunctionCallee MsanSetAllocaOriginWithDescriptionFn;
650   // No description version
651   FunctionCallee MsanSetAllocaOriginNoDescriptionFn;
652 
653   /// Run-time helper that poisons stack on function entry.
654   FunctionCallee MsanPoisonStackFn;
655 
656   /// Run-time helper that records a store (or any event) of an
657   /// uninitialized value and returns an updated origin id encoding this info.
658   FunctionCallee MsanChainOriginFn;
659 
660   /// Run-time helper that paints an origin over a region.
661   FunctionCallee MsanSetOriginFn;
662 
663   /// MSan runtime replacements for memmove, memcpy and memset.
664   FunctionCallee MemmoveFn, MemcpyFn, MemsetFn;
665 
666   /// KMSAN callback for task-local function argument shadow.
667   StructType *MsanContextStateTy;
668   FunctionCallee MsanGetContextStateFn;
669 
670   /// Functions for poisoning/unpoisoning local variables
671   FunctionCallee MsanPoisonAllocaFn, MsanUnpoisonAllocaFn;
672 
673   /// Pair of shadow/origin pointers.
674   Type *MsanMetadata;
675 
676   /// Each of the MsanMetadataPtrXxx functions returns a MsanMetadata.
677   FunctionCallee MsanMetadataPtrForLoadN, MsanMetadataPtrForStoreN;
678   FunctionCallee MsanMetadataPtrForLoad_1_8[4];
679   FunctionCallee MsanMetadataPtrForStore_1_8[4];
680   FunctionCallee MsanInstrumentAsmStoreFn;
681 
682   /// Storage for return values of the MsanMetadataPtrXxx functions.
683   Value *MsanMetadataAlloca;
684 
685   /// Helper to choose between different MsanMetadataPtrXxx().
686   FunctionCallee getKmsanShadowOriginAccessFn(bool isStore, int size);
687 
688   /// Memory map parameters used in application-to-shadow calculation.
689   const MemoryMapParams *MapParams;
690 
691   /// Custom memory map parameters used when -msan-shadow-base or
692   // -msan-origin-base is provided.
693   MemoryMapParams CustomMapParams;
694 
695   MDNode *ColdCallWeights;
696 
697   /// Branch weights for origin store.
698   MDNode *OriginStoreWeights;
699 };
700 
701 void insertModuleCtor(Module &M) {
702   getOrCreateSanitizerCtorAndInitFunctions(
703       M, kMsanModuleCtorName, kMsanInitName,
704       /*InitArgTypes=*/{},
705       /*InitArgs=*/{},
706       // This callback is invoked when the functions are created the first
707       // time. Hook them into the global ctors list in that case:
708       [&](Function *Ctor, FunctionCallee) {
709         if (!ClWithComdat) {
710           appendToGlobalCtors(M, Ctor, 0);
711           return;
712         }
713         Comdat *MsanCtorComdat = M.getOrInsertComdat(kMsanModuleCtorName);
714         Ctor->setComdat(MsanCtorComdat);
715         appendToGlobalCtors(M, Ctor, 0, Ctor);
716       });
717 }
718 
719 template <class T> T getOptOrDefault(const cl::opt<T> &Opt, T Default) {
720   return (Opt.getNumOccurrences() > 0) ? Opt : Default;
721 }
722 
723 } // end anonymous namespace
724 
725 MemorySanitizerOptions::MemorySanitizerOptions(int TO, bool R, bool K,
726                                                bool EagerChecks)
727     : Kernel(getOptOrDefault(ClEnableKmsan, K)),
728       TrackOrigins(getOptOrDefault(ClTrackOrigins, Kernel ? 2 : TO)),
729       Recover(getOptOrDefault(ClKeepGoing, Kernel || R)),
730       EagerChecks(getOptOrDefault(ClEagerChecks, EagerChecks)) {}
731 
732 PreservedAnalyses MemorySanitizerPass::run(Module &M,
733                                            ModuleAnalysisManager &AM) {
734   // Return early if nosanitize_memory module flag is present for the module.
735   if (checkIfAlreadyInstrumented(M, "nosanitize_memory"))
736     return PreservedAnalyses::all();
737   bool Modified = false;
738   if (!Options.Kernel) {
739     insertModuleCtor(M);
740     Modified = true;
741   }
742 
743   auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
744   for (Function &F : M) {
745     if (F.empty())
746       continue;
747     MemorySanitizer Msan(*F.getParent(), Options);
748     Modified |=
749         Msan.sanitizeFunction(F, FAM.getResult<TargetLibraryAnalysis>(F));
750   }
751 
752   if (!Modified)
753     return PreservedAnalyses::all();
754 
755   PreservedAnalyses PA = PreservedAnalyses::none();
756   // GlobalsAA is considered stateless and does not get invalidated unless
757   // explicitly invalidated; PreservedAnalyses::none() is not enough. Sanitizers
758   // make changes that require GlobalsAA to be invalidated.
759   PA.abandon<GlobalsAA>();
760   return PA;
761 }
762 
763 void MemorySanitizerPass::printPipeline(
764     raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
765   static_cast<PassInfoMixin<MemorySanitizerPass> *>(this)->printPipeline(
766       OS, MapClassName2PassName);
767   OS << '<';
768   if (Options.Recover)
769     OS << "recover;";
770   if (Options.Kernel)
771     OS << "kernel;";
772   if (Options.EagerChecks)
773     OS << "eager-checks;";
774   OS << "track-origins=" << Options.TrackOrigins;
775   OS << '>';
776 }
777 
778 /// Create a non-const global initialized with the given string.
779 ///
780 /// Creates a writable global for Str so that we can pass it to the
781 /// run-time lib. Runtime uses first 4 bytes of the string to store the
782 /// frame ID, so the string needs to be mutable.
783 static GlobalVariable *createPrivateConstGlobalForString(Module &M,
784                                                          StringRef Str) {
785   Constant *StrConst = ConstantDataArray::getString(M.getContext(), Str);
786   return new GlobalVariable(M, StrConst->getType(), /*isConstant=*/true,
787                             GlobalValue::PrivateLinkage, StrConst, "");
788 }
789 
790 template <typename... ArgsTy>
791 FunctionCallee
792 MemorySanitizer::getOrInsertMsanMetadataFunction(Module &M, StringRef Name,
793                                                  ArgsTy... Args) {
794   if (TargetTriple.getArch() == Triple::systemz) {
795     // SystemZ ABI: shadow/origin pair is returned via a hidden parameter.
796     return M.getOrInsertFunction(Name, Type::getVoidTy(*C), PtrTy,
797                                  std::forward<ArgsTy>(Args)...);
798   }
799 
800   return M.getOrInsertFunction(Name, MsanMetadata,
801                                std::forward<ArgsTy>(Args)...);
802 }
803 
804 /// Create KMSAN API callbacks.
805 void MemorySanitizer::createKernelApi(Module &M, const TargetLibraryInfo &TLI) {
806   IRBuilder<> IRB(*C);
807 
808   // These will be initialized in insertKmsanPrologue().
809   RetvalTLS = nullptr;
810   RetvalOriginTLS = nullptr;
811   ParamTLS = nullptr;
812   ParamOriginTLS = nullptr;
813   VAArgTLS = nullptr;
814   VAArgOriginTLS = nullptr;
815   VAArgOverflowSizeTLS = nullptr;
816 
817   WarningFn = M.getOrInsertFunction("__msan_warning",
818                                     TLI.getAttrList(C, {0}, /*Signed=*/false),
819                                     IRB.getVoidTy(), IRB.getInt32Ty());
820 
821   // Requests the per-task context state (kmsan_context_state*) from the
822   // runtime library.
823   MsanContextStateTy = StructType::get(
824       ArrayType::get(IRB.getInt64Ty(), kParamTLSSize / 8),
825       ArrayType::get(IRB.getInt64Ty(), kRetvalTLSSize / 8),
826       ArrayType::get(IRB.getInt64Ty(), kParamTLSSize / 8),
827       ArrayType::get(IRB.getInt64Ty(), kParamTLSSize / 8), /* va_arg_origin */
828       IRB.getInt64Ty(), ArrayType::get(OriginTy, kParamTLSSize / 4), OriginTy,
829       OriginTy);
830   MsanGetContextStateFn =
831       M.getOrInsertFunction("__msan_get_context_state", PtrTy);
832 
833   MsanMetadata = StructType::get(PtrTy, PtrTy);
834 
835   for (int ind = 0, size = 1; ind < 4; ind++, size <<= 1) {
836     std::string name_load =
837         "__msan_metadata_ptr_for_load_" + std::to_string(size);
838     std::string name_store =
839         "__msan_metadata_ptr_for_store_" + std::to_string(size);
840     MsanMetadataPtrForLoad_1_8[ind] =
841         getOrInsertMsanMetadataFunction(M, name_load, PtrTy);
842     MsanMetadataPtrForStore_1_8[ind] =
843         getOrInsertMsanMetadataFunction(M, name_store, PtrTy);
844   }
845 
846   MsanMetadataPtrForLoadN = getOrInsertMsanMetadataFunction(
847       M, "__msan_metadata_ptr_for_load_n", PtrTy, IRB.getInt64Ty());
848   MsanMetadataPtrForStoreN = getOrInsertMsanMetadataFunction(
849       M, "__msan_metadata_ptr_for_store_n", PtrTy, IRB.getInt64Ty());
850 
851   // Functions for poisoning and unpoisoning memory.
852   MsanPoisonAllocaFn = M.getOrInsertFunction(
853       "__msan_poison_alloca", IRB.getVoidTy(), PtrTy, IntptrTy, PtrTy);
854   MsanUnpoisonAllocaFn = M.getOrInsertFunction(
855       "__msan_unpoison_alloca", IRB.getVoidTy(), PtrTy, IntptrTy);
856 }
857 
858 static Constant *getOrInsertGlobal(Module &M, StringRef Name, Type *Ty) {
859   return M.getOrInsertGlobal(Name, Ty, [&] {
860     return new GlobalVariable(M, Ty, false, GlobalVariable::ExternalLinkage,
861                               nullptr, Name, nullptr,
862                               GlobalVariable::InitialExecTLSModel);
863   });
864 }
865 
866 /// Insert declarations for userspace-specific functions and globals.
867 void MemorySanitizer::createUserspaceApi(Module &M,
868                                          const TargetLibraryInfo &TLI) {
869   IRBuilder<> IRB(*C);
870 
871   // Create the callback.
872   // FIXME: this function should have "Cold" calling conv,
873   // which is not yet implemented.
874   if (TrackOrigins) {
875     StringRef WarningFnName = Recover ? "__msan_warning_with_origin"
876                                       : "__msan_warning_with_origin_noreturn";
877     WarningFn = M.getOrInsertFunction(WarningFnName,
878                                       TLI.getAttrList(C, {0}, /*Signed=*/false),
879                                       IRB.getVoidTy(), IRB.getInt32Ty());
880   } else {
881     StringRef WarningFnName =
882         Recover ? "__msan_warning" : "__msan_warning_noreturn";
883     WarningFn = M.getOrInsertFunction(WarningFnName, IRB.getVoidTy());
884   }
885 
886   // Create the global TLS variables.
887   RetvalTLS =
888       getOrInsertGlobal(M, "__msan_retval_tls",
889                         ArrayType::get(IRB.getInt64Ty(), kRetvalTLSSize / 8));
890 
891   RetvalOriginTLS = getOrInsertGlobal(M, "__msan_retval_origin_tls", OriginTy);
892 
893   ParamTLS =
894       getOrInsertGlobal(M, "__msan_param_tls",
895                         ArrayType::get(IRB.getInt64Ty(), kParamTLSSize / 8));
896 
897   ParamOriginTLS =
898       getOrInsertGlobal(M, "__msan_param_origin_tls",
899                         ArrayType::get(OriginTy, kParamTLSSize / 4));
900 
901   VAArgTLS =
902       getOrInsertGlobal(M, "__msan_va_arg_tls",
903                         ArrayType::get(IRB.getInt64Ty(), kParamTLSSize / 8));
904 
905   VAArgOriginTLS =
906       getOrInsertGlobal(M, "__msan_va_arg_origin_tls",
907                         ArrayType::get(OriginTy, kParamTLSSize / 4));
908 
909   VAArgOverflowSizeTLS =
910       getOrInsertGlobal(M, "__msan_va_arg_overflow_size_tls", IRB.getInt64Ty());
911 
912   for (size_t AccessSizeIndex = 0; AccessSizeIndex < kNumberOfAccessSizes;
913        AccessSizeIndex++) {
914     unsigned AccessSize = 1 << AccessSizeIndex;
915     std::string FunctionName = "__msan_maybe_warning_" + itostr(AccessSize);
916     MaybeWarningFn[AccessSizeIndex] = M.getOrInsertFunction(
917         FunctionName, TLI.getAttrList(C, {0, 1}, /*Signed=*/false),
918         IRB.getVoidTy(), IRB.getIntNTy(AccessSize * 8), IRB.getInt32Ty());
919 
920     FunctionName = "__msan_maybe_store_origin_" + itostr(AccessSize);
921     MaybeStoreOriginFn[AccessSizeIndex] = M.getOrInsertFunction(
922         FunctionName, TLI.getAttrList(C, {0, 2}, /*Signed=*/false),
923         IRB.getVoidTy(), IRB.getIntNTy(AccessSize * 8), PtrTy,
924         IRB.getInt32Ty());
925   }
926 
927   MsanSetAllocaOriginWithDescriptionFn =
928       M.getOrInsertFunction("__msan_set_alloca_origin_with_descr",
929                             IRB.getVoidTy(), PtrTy, IntptrTy, PtrTy, PtrTy);
930   MsanSetAllocaOriginNoDescriptionFn =
931       M.getOrInsertFunction("__msan_set_alloca_origin_no_descr",
932                             IRB.getVoidTy(), PtrTy, IntptrTy, PtrTy);
933   MsanPoisonStackFn = M.getOrInsertFunction("__msan_poison_stack",
934                                             IRB.getVoidTy(), PtrTy, IntptrTy);
935 }
936 
937 /// Insert extern declaration of runtime-provided functions and globals.
938 void MemorySanitizer::initializeCallbacks(Module &M,
939                                           const TargetLibraryInfo &TLI) {
940   // Only do this once.
941   if (CallbacksInitialized)
942     return;
943 
944   IRBuilder<> IRB(*C);
945   // Initialize callbacks that are common for kernel and userspace
946   // instrumentation.
947   MsanChainOriginFn = M.getOrInsertFunction(
948       "__msan_chain_origin",
949       TLI.getAttrList(C, {0}, /*Signed=*/false, /*Ret=*/true), IRB.getInt32Ty(),
950       IRB.getInt32Ty());
951   MsanSetOriginFn = M.getOrInsertFunction(
952       "__msan_set_origin", TLI.getAttrList(C, {2}, /*Signed=*/false),
953       IRB.getVoidTy(), PtrTy, IntptrTy, IRB.getInt32Ty());
954   MemmoveFn =
955       M.getOrInsertFunction("__msan_memmove", PtrTy, PtrTy, PtrTy, IntptrTy);
956   MemcpyFn =
957       M.getOrInsertFunction("__msan_memcpy", PtrTy, PtrTy, PtrTy, IntptrTy);
958   MemsetFn = M.getOrInsertFunction("__msan_memset",
959                                    TLI.getAttrList(C, {1}, /*Signed=*/true),
960                                    PtrTy, PtrTy, IRB.getInt32Ty(), IntptrTy);
961 
962   MsanInstrumentAsmStoreFn = M.getOrInsertFunction(
963       "__msan_instrument_asm_store", IRB.getVoidTy(), PtrTy, IntptrTy);
964 
965   if (CompileKernel) {
966     createKernelApi(M, TLI);
967   } else {
968     createUserspaceApi(M, TLI);
969   }
970   CallbacksInitialized = true;
971 }
972 
973 FunctionCallee MemorySanitizer::getKmsanShadowOriginAccessFn(bool isStore,
974                                                              int size) {
975   FunctionCallee *Fns =
976       isStore ? MsanMetadataPtrForStore_1_8 : MsanMetadataPtrForLoad_1_8;
977   switch (size) {
978   case 1:
979     return Fns[0];
980   case 2:
981     return Fns[1];
982   case 4:
983     return Fns[2];
984   case 8:
985     return Fns[3];
986   default:
987     return nullptr;
988   }
989 }
990 
991 /// Module-level initialization.
992 ///
993 /// inserts a call to __msan_init to the module's constructor list.
994 void MemorySanitizer::initializeModule(Module &M) {
995   auto &DL = M.getDataLayout();
996 
997   TargetTriple = Triple(M.getTargetTriple());
998 
999   bool ShadowPassed = ClShadowBase.getNumOccurrences() > 0;
1000   bool OriginPassed = ClOriginBase.getNumOccurrences() > 0;
1001   // Check the overrides first
1002   if (ShadowPassed || OriginPassed) {
1003     CustomMapParams.AndMask = ClAndMask;
1004     CustomMapParams.XorMask = ClXorMask;
1005     CustomMapParams.ShadowBase = ClShadowBase;
1006     CustomMapParams.OriginBase = ClOriginBase;
1007     MapParams = &CustomMapParams;
1008   } else {
1009     switch (TargetTriple.getOS()) {
1010     case Triple::FreeBSD:
1011       switch (TargetTriple.getArch()) {
1012       case Triple::aarch64:
1013         MapParams = FreeBSD_ARM_MemoryMapParams.bits64;
1014         break;
1015       case Triple::x86_64:
1016         MapParams = FreeBSD_X86_MemoryMapParams.bits64;
1017         break;
1018       case Triple::x86:
1019         MapParams = FreeBSD_X86_MemoryMapParams.bits32;
1020         break;
1021       default:
1022         report_fatal_error("unsupported architecture");
1023       }
1024       break;
1025     case Triple::NetBSD:
1026       switch (TargetTriple.getArch()) {
1027       case Triple::x86_64:
1028         MapParams = NetBSD_X86_MemoryMapParams.bits64;
1029         break;
1030       default:
1031         report_fatal_error("unsupported architecture");
1032       }
1033       break;
1034     case Triple::Linux:
1035       switch (TargetTriple.getArch()) {
1036       case Triple::x86_64:
1037         MapParams = Linux_X86_MemoryMapParams.bits64;
1038         break;
1039       case Triple::x86:
1040         MapParams = Linux_X86_MemoryMapParams.bits32;
1041         break;
1042       case Triple::mips64:
1043       case Triple::mips64el:
1044         MapParams = Linux_MIPS_MemoryMapParams.bits64;
1045         break;
1046       case Triple::ppc64:
1047       case Triple::ppc64le:
1048         MapParams = Linux_PowerPC_MemoryMapParams.bits64;
1049         break;
1050       case Triple::systemz:
1051         MapParams = Linux_S390_MemoryMapParams.bits64;
1052         break;
1053       case Triple::aarch64:
1054       case Triple::aarch64_be:
1055         MapParams = Linux_ARM_MemoryMapParams.bits64;
1056         break;
1057       case Triple::loongarch64:
1058         MapParams = Linux_LoongArch_MemoryMapParams.bits64;
1059         break;
1060       default:
1061         report_fatal_error("unsupported architecture");
1062       }
1063       break;
1064     default:
1065       report_fatal_error("unsupported operating system");
1066     }
1067   }
1068 
1069   C = &(M.getContext());
1070   IRBuilder<> IRB(*C);
1071   IntptrTy = IRB.getIntPtrTy(DL);
1072   OriginTy = IRB.getInt32Ty();
1073   PtrTy = IRB.getPtrTy();
1074 
1075   ColdCallWeights = MDBuilder(*C).createUnlikelyBranchWeights();
1076   OriginStoreWeights = MDBuilder(*C).createUnlikelyBranchWeights();
1077 
1078   if (!CompileKernel) {
1079     if (TrackOrigins)
1080       M.getOrInsertGlobal("__msan_track_origins", IRB.getInt32Ty(), [&] {
1081         return new GlobalVariable(
1082             M, IRB.getInt32Ty(), true, GlobalValue::WeakODRLinkage,
1083             IRB.getInt32(TrackOrigins), "__msan_track_origins");
1084       });
1085 
1086     if (Recover)
1087       M.getOrInsertGlobal("__msan_keep_going", IRB.getInt32Ty(), [&] {
1088         return new GlobalVariable(M, IRB.getInt32Ty(), true,
1089                                   GlobalValue::WeakODRLinkage,
1090                                   IRB.getInt32(Recover), "__msan_keep_going");
1091       });
1092   }
1093 }
1094 
1095 namespace {
1096 
1097 /// A helper class that handles instrumentation of VarArg
1098 /// functions on a particular platform.
1099 ///
1100 /// Implementations are expected to insert the instrumentation
1101 /// necessary to propagate argument shadow through VarArg function
1102 /// calls. Visit* methods are called during an InstVisitor pass over
1103 /// the function, and should avoid creating new basic blocks. A new
1104 /// instance of this class is created for each instrumented function.
1105 struct VarArgHelper {
1106   virtual ~VarArgHelper() = default;
1107 
1108   /// Visit a CallBase.
1109   virtual void visitCallBase(CallBase &CB, IRBuilder<> &IRB) = 0;
1110 
1111   /// Visit a va_start call.
1112   virtual void visitVAStartInst(VAStartInst &I) = 0;
1113 
1114   /// Visit a va_copy call.
1115   virtual void visitVACopyInst(VACopyInst &I) = 0;
1116 
1117   /// Finalize function instrumentation.
1118   ///
1119   /// This method is called after visiting all interesting (see above)
1120   /// instructions in a function.
1121   virtual void finalizeInstrumentation() = 0;
1122 };
1123 
1124 struct MemorySanitizerVisitor;
1125 
1126 } // end anonymous namespace
1127 
1128 static VarArgHelper *CreateVarArgHelper(Function &Func, MemorySanitizer &Msan,
1129                                         MemorySanitizerVisitor &Visitor);
1130 
1131 static unsigned TypeSizeToSizeIndex(TypeSize TS) {
1132   if (TS.isScalable())
1133     // Scalable types unconditionally take slowpaths.
1134     return kNumberOfAccessSizes;
1135   unsigned TypeSizeFixed = TS.getFixedValue();
1136   if (TypeSizeFixed <= 8)
1137     return 0;
1138   return Log2_32_Ceil((TypeSizeFixed + 7) / 8);
1139 }
1140 
1141 namespace {
1142 
1143 /// Helper class to attach debug information of the given instruction onto new
1144 /// instructions inserted after.
1145 class NextNodeIRBuilder : public IRBuilder<> {
1146 public:
1147   explicit NextNodeIRBuilder(Instruction *IP) : IRBuilder<>(IP->getNextNode()) {
1148     SetCurrentDebugLocation(IP->getDebugLoc());
1149   }
1150 };
1151 
1152 /// This class does all the work for a given function. Store and Load
1153 /// instructions store and load corresponding shadow and origin
1154 /// values. Most instructions propagate shadow from arguments to their
1155 /// return values. Certain instructions (most importantly, BranchInst)
1156 /// test their argument shadow and print reports (with a runtime call) if it's
1157 /// non-zero.
1158 struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
1159   Function &F;
1160   MemorySanitizer &MS;
1161   SmallVector<PHINode *, 16> ShadowPHINodes, OriginPHINodes;
1162   ValueMap<Value *, Value *> ShadowMap, OriginMap;
1163   std::unique_ptr<VarArgHelper> VAHelper;
1164   const TargetLibraryInfo *TLI;
1165   Instruction *FnPrologueEnd;
1166   SmallVector<Instruction *, 16> Instructions;
1167 
1168   // The following flags disable parts of MSan instrumentation based on
1169   // exclusion list contents and command-line options.
1170   bool InsertChecks;
1171   bool PropagateShadow;
1172   bool PoisonStack;
1173   bool PoisonUndef;
1174 
1175   struct ShadowOriginAndInsertPoint {
1176     Value *Shadow;
1177     Value *Origin;
1178     Instruction *OrigIns;
1179 
1180     ShadowOriginAndInsertPoint(Value *S, Value *O, Instruction *I)
1181         : Shadow(S), Origin(O), OrigIns(I) {}
1182   };
1183   SmallVector<ShadowOriginAndInsertPoint, 16> InstrumentationList;
1184   DenseMap<const DILocation *, int> LazyWarningDebugLocationCount;
1185   bool InstrumentLifetimeStart = ClHandleLifetimeIntrinsics;
1186   SmallSetVector<AllocaInst *, 16> AllocaSet;
1187   SmallVector<std::pair<IntrinsicInst *, AllocaInst *>, 16> LifetimeStartList;
1188   SmallVector<StoreInst *, 16> StoreList;
1189   int64_t SplittableBlocksCount = 0;
1190 
1191   MemorySanitizerVisitor(Function &F, MemorySanitizer &MS,
1192                          const TargetLibraryInfo &TLI)
1193       : F(F), MS(MS), VAHelper(CreateVarArgHelper(F, MS, *this)), TLI(&TLI) {
1194     bool SanitizeFunction =
1195         F.hasFnAttribute(Attribute::SanitizeMemory) && !ClDisableChecks;
1196     InsertChecks = SanitizeFunction;
1197     PropagateShadow = SanitizeFunction;
1198     PoisonStack = SanitizeFunction && ClPoisonStack;
1199     PoisonUndef = SanitizeFunction && ClPoisonUndef;
1200 
1201     // In the presence of unreachable blocks, we may see Phi nodes with
1202     // incoming nodes from such blocks. Since InstVisitor skips unreachable
1203     // blocks, such nodes will not have any shadow value associated with them.
1204     // It's easier to remove unreachable blocks than deal with missing shadow.
1205     removeUnreachableBlocks(F);
1206 
1207     MS.initializeCallbacks(*F.getParent(), TLI);
1208     FnPrologueEnd =
1209         IRBuilder<>(&F.getEntryBlock(), F.getEntryBlock().getFirstNonPHIIt())
1210             .CreateIntrinsic(Intrinsic::donothing, {}, {});
1211 
1212     if (MS.CompileKernel) {
1213       IRBuilder<> IRB(FnPrologueEnd);
1214       insertKmsanPrologue(IRB);
1215     }
1216 
1217     LLVM_DEBUG(if (!InsertChecks) dbgs()
1218                << "MemorySanitizer is not inserting checks into '"
1219                << F.getName() << "'\n");
1220   }
1221 
1222   bool instrumentWithCalls(Value *V) {
1223     // Constants likely will be eliminated by follow-up passes.
1224     if (isa<Constant>(V))
1225       return false;
1226 
1227     ++SplittableBlocksCount;
1228     return ClInstrumentationWithCallThreshold >= 0 &&
1229            SplittableBlocksCount > ClInstrumentationWithCallThreshold;
1230   }
1231 
1232   bool isInPrologue(Instruction &I) {
1233     return I.getParent() == FnPrologueEnd->getParent() &&
1234            (&I == FnPrologueEnd || I.comesBefore(FnPrologueEnd));
1235   }
1236 
1237   // Creates a new origin and records the stack trace. In general we can call
1238   // this function for any origin manipulation we like. However it will cost
1239   // runtime resources. So use this wisely only if it can provide additional
1240   // information helpful to a user.
1241   Value *updateOrigin(Value *V, IRBuilder<> &IRB) {
1242     if (MS.TrackOrigins <= 1)
1243       return V;
1244     return IRB.CreateCall(MS.MsanChainOriginFn, V);
1245   }
1246 
1247   Value *originToIntptr(IRBuilder<> &IRB, Value *Origin) {
1248     const DataLayout &DL = F.getDataLayout();
1249     unsigned IntptrSize = DL.getTypeStoreSize(MS.IntptrTy);
1250     if (IntptrSize == kOriginSize)
1251       return Origin;
1252     assert(IntptrSize == kOriginSize * 2);
1253     Origin = IRB.CreateIntCast(Origin, MS.IntptrTy, /* isSigned */ false);
1254     return IRB.CreateOr(Origin, IRB.CreateShl(Origin, kOriginSize * 8));
1255   }
1256 
1257   /// Fill memory range with the given origin value.
1258   void paintOrigin(IRBuilder<> &IRB, Value *Origin, Value *OriginPtr,
1259                    TypeSize TS, Align Alignment) {
1260     const DataLayout &DL = F.getDataLayout();
1261     const Align IntptrAlignment = DL.getABITypeAlign(MS.IntptrTy);
1262     unsigned IntptrSize = DL.getTypeStoreSize(MS.IntptrTy);
1263     assert(IntptrAlignment >= kMinOriginAlignment);
1264     assert(IntptrSize >= kOriginSize);
1265 
1266     // Note: The loop based formation works for fixed length vectors too,
1267     // however we prefer to unroll and specialize alignment below.
1268     if (TS.isScalable()) {
1269       Value *Size = IRB.CreateTypeSize(MS.IntptrTy, TS);
1270       Value *RoundUp =
1271           IRB.CreateAdd(Size, ConstantInt::get(MS.IntptrTy, kOriginSize - 1));
1272       Value *End =
1273           IRB.CreateUDiv(RoundUp, ConstantInt::get(MS.IntptrTy, kOriginSize));
1274       auto [InsertPt, Index] =
1275           SplitBlockAndInsertSimpleForLoop(End, IRB.GetInsertPoint());
1276       IRB.SetInsertPoint(InsertPt);
1277 
1278       Value *GEP = IRB.CreateGEP(MS.OriginTy, OriginPtr, Index);
1279       IRB.CreateAlignedStore(Origin, GEP, kMinOriginAlignment);
1280       return;
1281     }
1282 
1283     unsigned Size = TS.getFixedValue();
1284 
1285     unsigned Ofs = 0;
1286     Align CurrentAlignment = Alignment;
1287     if (Alignment >= IntptrAlignment && IntptrSize > kOriginSize) {
1288       Value *IntptrOrigin = originToIntptr(IRB, Origin);
1289       Value *IntptrOriginPtr = IRB.CreatePointerCast(OriginPtr, MS.PtrTy);
1290       for (unsigned i = 0; i < Size / IntptrSize; ++i) {
1291         Value *Ptr = i ? IRB.CreateConstGEP1_32(MS.IntptrTy, IntptrOriginPtr, i)
1292                        : IntptrOriginPtr;
1293         IRB.CreateAlignedStore(IntptrOrigin, Ptr, CurrentAlignment);
1294         Ofs += IntptrSize / kOriginSize;
1295         CurrentAlignment = IntptrAlignment;
1296       }
1297     }
1298 
1299     for (unsigned i = Ofs; i < (Size + kOriginSize - 1) / kOriginSize; ++i) {
1300       Value *GEP =
1301           i ? IRB.CreateConstGEP1_32(MS.OriginTy, OriginPtr, i) : OriginPtr;
1302       IRB.CreateAlignedStore(Origin, GEP, CurrentAlignment);
1303       CurrentAlignment = kMinOriginAlignment;
1304     }
1305   }
1306 
1307   void storeOrigin(IRBuilder<> &IRB, Value *Addr, Value *Shadow, Value *Origin,
1308                    Value *OriginPtr, Align Alignment) {
1309     const DataLayout &DL = F.getDataLayout();
1310     const Align OriginAlignment = std::max(kMinOriginAlignment, Alignment);
1311     TypeSize StoreSize = DL.getTypeStoreSize(Shadow->getType());
1312     // ZExt cannot convert between vector and scalar
1313     Value *ConvertedShadow = convertShadowToScalar(Shadow, IRB);
1314     if (auto *ConstantShadow = dyn_cast<Constant>(ConvertedShadow)) {
1315       if (!ClCheckConstantShadow || ConstantShadow->isZeroValue()) {
1316         // Origin is not needed: value is initialized or const shadow is
1317         // ignored.
1318         return;
1319       }
1320       if (llvm::isKnownNonZero(ConvertedShadow, DL)) {
1321         // Copy origin as the value is definitely uninitialized.
1322         paintOrigin(IRB, updateOrigin(Origin, IRB), OriginPtr, StoreSize,
1323                     OriginAlignment);
1324         return;
1325       }
1326       // Fallback to runtime check, which still can be optimized out later.
1327     }
1328 
1329     TypeSize TypeSizeInBits = DL.getTypeSizeInBits(ConvertedShadow->getType());
1330     unsigned SizeIndex = TypeSizeToSizeIndex(TypeSizeInBits);
1331     if (instrumentWithCalls(ConvertedShadow) &&
1332         SizeIndex < kNumberOfAccessSizes && !MS.CompileKernel) {
1333       FunctionCallee Fn = MS.MaybeStoreOriginFn[SizeIndex];
1334       Value *ConvertedShadow2 =
1335           IRB.CreateZExt(ConvertedShadow, IRB.getIntNTy(8 * (1 << SizeIndex)));
1336       CallBase *CB = IRB.CreateCall(Fn, {ConvertedShadow2, Addr, Origin});
1337       CB->addParamAttr(0, Attribute::ZExt);
1338       CB->addParamAttr(2, Attribute::ZExt);
1339     } else {
1340       Value *Cmp = convertToBool(ConvertedShadow, IRB, "_mscmp");
1341       Instruction *CheckTerm = SplitBlockAndInsertIfThen(
1342           Cmp, &*IRB.GetInsertPoint(), false, MS.OriginStoreWeights);
1343       IRBuilder<> IRBNew(CheckTerm);
1344       paintOrigin(IRBNew, updateOrigin(Origin, IRBNew), OriginPtr, StoreSize,
1345                   OriginAlignment);
1346     }
1347   }
1348 
1349   void materializeStores() {
1350     for (StoreInst *SI : StoreList) {
1351       IRBuilder<> IRB(SI);
1352       Value *Val = SI->getValueOperand();
1353       Value *Addr = SI->getPointerOperand();
1354       Value *Shadow = SI->isAtomic() ? getCleanShadow(Val) : getShadow(Val);
1355       Value *ShadowPtr, *OriginPtr;
1356       Type *ShadowTy = Shadow->getType();
1357       const Align Alignment = SI->getAlign();
1358       const Align OriginAlignment = std::max(kMinOriginAlignment, Alignment);
1359       std::tie(ShadowPtr, OriginPtr) =
1360           getShadowOriginPtr(Addr, IRB, ShadowTy, Alignment, /*isStore*/ true);
1361 
1362       StoreInst *NewSI = IRB.CreateAlignedStore(Shadow, ShadowPtr, Alignment);
1363       LLVM_DEBUG(dbgs() << "  STORE: " << *NewSI << "\n");
1364       (void)NewSI;
1365 
1366       if (SI->isAtomic())
1367         SI->setOrdering(addReleaseOrdering(SI->getOrdering()));
1368 
1369       if (MS.TrackOrigins && !SI->isAtomic())
1370         storeOrigin(IRB, Addr, Shadow, getOrigin(Val), OriginPtr,
1371                     OriginAlignment);
1372     }
1373   }
1374 
1375   // Returns true if Debug Location corresponds to multiple warnings.
1376   bool shouldDisambiguateWarningLocation(const DebugLoc &DebugLoc) {
1377     if (MS.TrackOrigins < 2)
1378       return false;
1379 
1380     if (LazyWarningDebugLocationCount.empty())
1381       for (const auto &I : InstrumentationList)
1382         ++LazyWarningDebugLocationCount[I.OrigIns->getDebugLoc()];
1383 
1384     return LazyWarningDebugLocationCount[DebugLoc] >= ClDisambiguateWarning;
1385   }
1386 
1387   /// Helper function to insert a warning at IRB's current insert point.
1388   void insertWarningFn(IRBuilder<> &IRB, Value *Origin) {
1389     if (!Origin)
1390       Origin = (Value *)IRB.getInt32(0);
1391     assert(Origin->getType()->isIntegerTy());
1392 
1393     if (shouldDisambiguateWarningLocation(IRB.getCurrentDebugLocation())) {
1394       // Try to create additional origin with debug info of the last origin
1395       // instruction. It may provide additional information to the user.
1396       if (Instruction *OI = dyn_cast_or_null<Instruction>(Origin)) {
1397         assert(MS.TrackOrigins);
1398         auto NewDebugLoc = OI->getDebugLoc();
1399         // Origin update with missing or the same debug location provides no
1400         // additional value.
1401         if (NewDebugLoc && NewDebugLoc != IRB.getCurrentDebugLocation()) {
1402           // Insert update just before the check, so we call runtime only just
1403           // before the report.
1404           IRBuilder<> IRBOrigin(&*IRB.GetInsertPoint());
1405           IRBOrigin.SetCurrentDebugLocation(NewDebugLoc);
1406           Origin = updateOrigin(Origin, IRBOrigin);
1407         }
1408       }
1409     }
1410 
1411     if (MS.CompileKernel || MS.TrackOrigins)
1412       IRB.CreateCall(MS.WarningFn, Origin)->setCannotMerge();
1413     else
1414       IRB.CreateCall(MS.WarningFn)->setCannotMerge();
1415     // FIXME: Insert UnreachableInst if !MS.Recover?
1416     // This may invalidate some of the following checks and needs to be done
1417     // at the very end.
1418   }
1419 
1420   void materializeOneCheck(IRBuilder<> &IRB, Value *ConvertedShadow,
1421                            Value *Origin) {
1422     const DataLayout &DL = F.getDataLayout();
1423     TypeSize TypeSizeInBits = DL.getTypeSizeInBits(ConvertedShadow->getType());
1424     unsigned SizeIndex = TypeSizeToSizeIndex(TypeSizeInBits);
1425     if (instrumentWithCalls(ConvertedShadow) &&
1426         SizeIndex < kNumberOfAccessSizes && !MS.CompileKernel) {
1427       FunctionCallee Fn = MS.MaybeWarningFn[SizeIndex];
1428       // ZExt cannot convert between vector and scalar
1429       ConvertedShadow = convertShadowToScalar(ConvertedShadow, IRB);
1430       Value *ConvertedShadow2 =
1431           IRB.CreateZExt(ConvertedShadow, IRB.getIntNTy(8 * (1 << SizeIndex)));
1432       CallBase *CB = IRB.CreateCall(
1433           Fn, {ConvertedShadow2,
1434                MS.TrackOrigins && Origin ? Origin : (Value *)IRB.getInt32(0)});
1435       CB->addParamAttr(0, Attribute::ZExt);
1436       CB->addParamAttr(1, Attribute::ZExt);
1437     } else {
1438       Value *Cmp = convertToBool(ConvertedShadow, IRB, "_mscmp");
1439       Instruction *CheckTerm = SplitBlockAndInsertIfThen(
1440           Cmp, &*IRB.GetInsertPoint(),
1441           /* Unreachable */ !MS.Recover, MS.ColdCallWeights);
1442 
1443       IRB.SetInsertPoint(CheckTerm);
1444       insertWarningFn(IRB, Origin);
1445       LLVM_DEBUG(dbgs() << "  CHECK: " << *Cmp << "\n");
1446     }
1447   }
1448 
1449   void materializeInstructionChecks(
1450       ArrayRef<ShadowOriginAndInsertPoint> InstructionChecks) {
1451     const DataLayout &DL = F.getDataLayout();
1452     // Disable combining in some cases. TrackOrigins checks each shadow to pick
1453     // correct origin.
1454     bool Combine = !MS.TrackOrigins;
1455     Instruction *Instruction = InstructionChecks.front().OrigIns;
1456     Value *Shadow = nullptr;
1457     for (const auto &ShadowData : InstructionChecks) {
1458       assert(ShadowData.OrigIns == Instruction);
1459       IRBuilder<> IRB(Instruction);
1460 
1461       Value *ConvertedShadow = ShadowData.Shadow;
1462 
1463       if (auto *ConstantShadow = dyn_cast<Constant>(ConvertedShadow)) {
1464         if (!ClCheckConstantShadow || ConstantShadow->isZeroValue()) {
1465           // Skip, value is initialized or const shadow is ignored.
1466           continue;
1467         }
1468         if (llvm::isKnownNonZero(ConvertedShadow, DL)) {
1469           // Report as the value is definitely uninitialized.
1470           insertWarningFn(IRB, ShadowData.Origin);
1471           if (!MS.Recover)
1472             return; // Always fail and stop here, not need to check the rest.
1473           // Skip entire instruction,
1474           continue;
1475         }
1476         // Fallback to runtime check, which still can be optimized out later.
1477       }
1478 
1479       if (!Combine) {
1480         materializeOneCheck(IRB, ConvertedShadow, ShadowData.Origin);
1481         continue;
1482       }
1483 
1484       if (!Shadow) {
1485         Shadow = ConvertedShadow;
1486         continue;
1487       }
1488 
1489       Shadow = convertToBool(Shadow, IRB, "_mscmp");
1490       ConvertedShadow = convertToBool(ConvertedShadow, IRB, "_mscmp");
1491       Shadow = IRB.CreateOr(Shadow, ConvertedShadow, "_msor");
1492     }
1493 
1494     if (Shadow) {
1495       assert(Combine);
1496       IRBuilder<> IRB(Instruction);
1497       materializeOneCheck(IRB, Shadow, nullptr);
1498     }
1499   }
1500 
1501   void materializeChecks() {
1502 #ifndef NDEBUG
1503     // For assert below.
1504     SmallPtrSet<Instruction *, 16> Done;
1505 #endif
1506 
1507     for (auto I = InstrumentationList.begin();
1508          I != InstrumentationList.end();) {
1509       auto OrigIns = I->OrigIns;
1510       // Checks are grouped by the original instruction. We call all
1511       // `insertShadowCheck` for an instruction at once.
1512       assert(Done.insert(OrigIns).second);
1513       auto J = std::find_if(I + 1, InstrumentationList.end(),
1514                             [OrigIns](const ShadowOriginAndInsertPoint &R) {
1515                               return OrigIns != R.OrigIns;
1516                             });
1517       // Process all checks of instruction at once.
1518       materializeInstructionChecks(ArrayRef<ShadowOriginAndInsertPoint>(I, J));
1519       I = J;
1520     }
1521 
1522     LLVM_DEBUG(dbgs() << "DONE:\n" << F);
1523   }
1524 
1525   // Returns the last instruction in the new prologue
1526   void insertKmsanPrologue(IRBuilder<> &IRB) {
1527     Value *ContextState = IRB.CreateCall(MS.MsanGetContextStateFn, {});
1528     Constant *Zero = IRB.getInt32(0);
1529     MS.ParamTLS = IRB.CreateGEP(MS.MsanContextStateTy, ContextState,
1530                                 {Zero, IRB.getInt32(0)}, "param_shadow");
1531     MS.RetvalTLS = IRB.CreateGEP(MS.MsanContextStateTy, ContextState,
1532                                  {Zero, IRB.getInt32(1)}, "retval_shadow");
1533     MS.VAArgTLS = IRB.CreateGEP(MS.MsanContextStateTy, ContextState,
1534                                 {Zero, IRB.getInt32(2)}, "va_arg_shadow");
1535     MS.VAArgOriginTLS = IRB.CreateGEP(MS.MsanContextStateTy, ContextState,
1536                                       {Zero, IRB.getInt32(3)}, "va_arg_origin");
1537     MS.VAArgOverflowSizeTLS =
1538         IRB.CreateGEP(MS.MsanContextStateTy, ContextState,
1539                       {Zero, IRB.getInt32(4)}, "va_arg_overflow_size");
1540     MS.ParamOriginTLS = IRB.CreateGEP(MS.MsanContextStateTy, ContextState,
1541                                       {Zero, IRB.getInt32(5)}, "param_origin");
1542     MS.RetvalOriginTLS =
1543         IRB.CreateGEP(MS.MsanContextStateTy, ContextState,
1544                       {Zero, IRB.getInt32(6)}, "retval_origin");
1545     if (MS.TargetTriple.getArch() == Triple::systemz)
1546       MS.MsanMetadataAlloca = IRB.CreateAlloca(MS.MsanMetadata, 0u);
1547   }
1548 
1549   /// Add MemorySanitizer instrumentation to a function.
1550   bool runOnFunction() {
1551     // Iterate all BBs in depth-first order and create shadow instructions
1552     // for all instructions (where applicable).
1553     // For PHI nodes we create dummy shadow PHIs which will be finalized later.
1554     for (BasicBlock *BB : depth_first(FnPrologueEnd->getParent()))
1555       visit(*BB);
1556 
1557     // `visit` above only collects instructions. Process them after iterating
1558     // CFG to avoid requirement on CFG transformations.
1559     for (Instruction *I : Instructions)
1560       InstVisitor<MemorySanitizerVisitor>::visit(*I);
1561 
1562     // Finalize PHI nodes.
1563     for (PHINode *PN : ShadowPHINodes) {
1564       PHINode *PNS = cast<PHINode>(getShadow(PN));
1565       PHINode *PNO = MS.TrackOrigins ? cast<PHINode>(getOrigin(PN)) : nullptr;
1566       size_t NumValues = PN->getNumIncomingValues();
1567       for (size_t v = 0; v < NumValues; v++) {
1568         PNS->addIncoming(getShadow(PN, v), PN->getIncomingBlock(v));
1569         if (PNO)
1570           PNO->addIncoming(getOrigin(PN, v), PN->getIncomingBlock(v));
1571       }
1572     }
1573 
1574     VAHelper->finalizeInstrumentation();
1575 
1576     // Poison llvm.lifetime.start intrinsics, if we haven't fallen back to
1577     // instrumenting only allocas.
1578     if (InstrumentLifetimeStart) {
1579       for (auto Item : LifetimeStartList) {
1580         instrumentAlloca(*Item.second, Item.first);
1581         AllocaSet.remove(Item.second);
1582       }
1583     }
1584     // Poison the allocas for which we didn't instrument the corresponding
1585     // lifetime intrinsics.
1586     for (AllocaInst *AI : AllocaSet)
1587       instrumentAlloca(*AI);
1588 
1589     // Insert shadow value checks.
1590     materializeChecks();
1591 
1592     // Delayed instrumentation of StoreInst.
1593     // This may not add new address checks.
1594     materializeStores();
1595 
1596     return true;
1597   }
1598 
1599   /// Compute the shadow type that corresponds to a given Value.
1600   Type *getShadowTy(Value *V) { return getShadowTy(V->getType()); }
1601 
1602   /// Compute the shadow type that corresponds to a given Type.
1603   Type *getShadowTy(Type *OrigTy) {
1604     if (!OrigTy->isSized()) {
1605       return nullptr;
1606     }
1607     // For integer type, shadow is the same as the original type.
1608     // This may return weird-sized types like i1.
1609     if (IntegerType *IT = dyn_cast<IntegerType>(OrigTy))
1610       return IT;
1611     const DataLayout &DL = F.getDataLayout();
1612     if (VectorType *VT = dyn_cast<VectorType>(OrigTy)) {
1613       uint32_t EltSize = DL.getTypeSizeInBits(VT->getElementType());
1614       return VectorType::get(IntegerType::get(*MS.C, EltSize),
1615                              VT->getElementCount());
1616     }
1617     if (ArrayType *AT = dyn_cast<ArrayType>(OrigTy)) {
1618       return ArrayType::get(getShadowTy(AT->getElementType()),
1619                             AT->getNumElements());
1620     }
1621     if (StructType *ST = dyn_cast<StructType>(OrigTy)) {
1622       SmallVector<Type *, 4> Elements;
1623       for (unsigned i = 0, n = ST->getNumElements(); i < n; i++)
1624         Elements.push_back(getShadowTy(ST->getElementType(i)));
1625       StructType *Res = StructType::get(*MS.C, Elements, ST->isPacked());
1626       LLVM_DEBUG(dbgs() << "getShadowTy: " << *ST << " ===> " << *Res << "\n");
1627       return Res;
1628     }
1629     uint32_t TypeSize = DL.getTypeSizeInBits(OrigTy);
1630     return IntegerType::get(*MS.C, TypeSize);
1631   }
1632 
1633   /// Extract combined shadow of struct elements as a bool
1634   Value *collapseStructShadow(StructType *Struct, Value *Shadow,
1635                               IRBuilder<> &IRB) {
1636     Value *FalseVal = IRB.getIntN(/* width */ 1, /* value */ 0);
1637     Value *Aggregator = FalseVal;
1638 
1639     for (unsigned Idx = 0; Idx < Struct->getNumElements(); Idx++) {
1640       // Combine by ORing together each element's bool shadow
1641       Value *ShadowItem = IRB.CreateExtractValue(Shadow, Idx);
1642       Value *ShadowBool = convertToBool(ShadowItem, IRB);
1643 
1644       if (Aggregator != FalseVal)
1645         Aggregator = IRB.CreateOr(Aggregator, ShadowBool);
1646       else
1647         Aggregator = ShadowBool;
1648     }
1649 
1650     return Aggregator;
1651   }
1652 
1653   // Extract combined shadow of array elements
1654   Value *collapseArrayShadow(ArrayType *Array, Value *Shadow,
1655                              IRBuilder<> &IRB) {
1656     if (!Array->getNumElements())
1657       return IRB.getIntN(/* width */ 1, /* value */ 0);
1658 
1659     Value *FirstItem = IRB.CreateExtractValue(Shadow, 0);
1660     Value *Aggregator = convertShadowToScalar(FirstItem, IRB);
1661 
1662     for (unsigned Idx = 1; Idx < Array->getNumElements(); Idx++) {
1663       Value *ShadowItem = IRB.CreateExtractValue(Shadow, Idx);
1664       Value *ShadowInner = convertShadowToScalar(ShadowItem, IRB);
1665       Aggregator = IRB.CreateOr(Aggregator, ShadowInner);
1666     }
1667     return Aggregator;
1668   }
1669 
1670   /// Convert a shadow value to it's flattened variant. The resulting
1671   /// shadow may not necessarily have the same bit width as the input
1672   /// value, but it will always be comparable to zero.
1673   Value *convertShadowToScalar(Value *V, IRBuilder<> &IRB) {
1674     if (StructType *Struct = dyn_cast<StructType>(V->getType()))
1675       return collapseStructShadow(Struct, V, IRB);
1676     if (ArrayType *Array = dyn_cast<ArrayType>(V->getType()))
1677       return collapseArrayShadow(Array, V, IRB);
1678     if (isa<VectorType>(V->getType())) {
1679       if (isa<ScalableVectorType>(V->getType()))
1680         return convertShadowToScalar(IRB.CreateOrReduce(V), IRB);
1681       unsigned BitWidth =
1682           V->getType()->getPrimitiveSizeInBits().getFixedValue();
1683       return IRB.CreateBitCast(V, IntegerType::get(*MS.C, BitWidth));
1684     }
1685     return V;
1686   }
1687 
1688   // Convert a scalar value to an i1 by comparing with 0
1689   Value *convertToBool(Value *V, IRBuilder<> &IRB, const Twine &name = "") {
1690     Type *VTy = V->getType();
1691     if (!VTy->isIntegerTy())
1692       return convertToBool(convertShadowToScalar(V, IRB), IRB, name);
1693     if (VTy->getIntegerBitWidth() == 1)
1694       // Just converting a bool to a bool, so do nothing.
1695       return V;
1696     return IRB.CreateICmpNE(V, ConstantInt::get(VTy, 0), name);
1697   }
1698 
1699   Type *ptrToIntPtrType(Type *PtrTy) const {
1700     if (VectorType *VectTy = dyn_cast<VectorType>(PtrTy)) {
1701       return VectorType::get(ptrToIntPtrType(VectTy->getElementType()),
1702                              VectTy->getElementCount());
1703     }
1704     assert(PtrTy->isIntOrPtrTy());
1705     return MS.IntptrTy;
1706   }
1707 
1708   Type *getPtrToShadowPtrType(Type *IntPtrTy, Type *ShadowTy) const {
1709     if (VectorType *VectTy = dyn_cast<VectorType>(IntPtrTy)) {
1710       return VectorType::get(
1711           getPtrToShadowPtrType(VectTy->getElementType(), ShadowTy),
1712           VectTy->getElementCount());
1713     }
1714     assert(IntPtrTy == MS.IntptrTy);
1715     return MS.PtrTy;
1716   }
1717 
1718   Constant *constToIntPtr(Type *IntPtrTy, uint64_t C) const {
1719     if (VectorType *VectTy = dyn_cast<VectorType>(IntPtrTy)) {
1720       return ConstantVector::getSplat(
1721           VectTy->getElementCount(),
1722           constToIntPtr(VectTy->getElementType(), C));
1723     }
1724     assert(IntPtrTy == MS.IntptrTy);
1725     return ConstantInt::get(MS.IntptrTy, C);
1726   }
1727 
1728   /// Returns the integer shadow offset that corresponds to a given
1729   /// application address, whereby:
1730   ///
1731   ///     Offset = (Addr & ~AndMask) ^ XorMask
1732   ///     Shadow = ShadowBase + Offset
1733   ///     Origin = (OriginBase + Offset) & ~Alignment
1734   ///
1735   /// Note: for efficiency, many shadow mappings only require use the XorMask
1736   ///       and OriginBase; the AndMask and ShadowBase are often zero.
1737   Value *getShadowPtrOffset(Value *Addr, IRBuilder<> &IRB) {
1738     Type *IntptrTy = ptrToIntPtrType(Addr->getType());
1739     Value *OffsetLong = IRB.CreatePointerCast(Addr, IntptrTy);
1740 
1741     if (uint64_t AndMask = MS.MapParams->AndMask)
1742       OffsetLong = IRB.CreateAnd(OffsetLong, constToIntPtr(IntptrTy, ~AndMask));
1743 
1744     if (uint64_t XorMask = MS.MapParams->XorMask)
1745       OffsetLong = IRB.CreateXor(OffsetLong, constToIntPtr(IntptrTy, XorMask));
1746     return OffsetLong;
1747   }
1748 
1749   /// Compute the shadow and origin addresses corresponding to a given
1750   /// application address.
1751   ///
1752   /// Shadow = ShadowBase + Offset
1753   /// Origin = (OriginBase + Offset) & ~3ULL
1754   /// Addr can be a ptr or <N x ptr>. In both cases ShadowTy the shadow type of
1755   /// a single pointee.
1756   /// Returns <shadow_ptr, origin_ptr> or <<N x shadow_ptr>, <N x origin_ptr>>.
1757   std::pair<Value *, Value *>
1758   getShadowOriginPtrUserspace(Value *Addr, IRBuilder<> &IRB, Type *ShadowTy,
1759                               MaybeAlign Alignment) {
1760     VectorType *VectTy = dyn_cast<VectorType>(Addr->getType());
1761     if (!VectTy) {
1762       assert(Addr->getType()->isPointerTy());
1763     } else {
1764       assert(VectTy->getElementType()->isPointerTy());
1765     }
1766     Type *IntptrTy = ptrToIntPtrType(Addr->getType());
1767     Value *ShadowOffset = getShadowPtrOffset(Addr, IRB);
1768     Value *ShadowLong = ShadowOffset;
1769     if (uint64_t ShadowBase = MS.MapParams->ShadowBase) {
1770       ShadowLong =
1771           IRB.CreateAdd(ShadowLong, constToIntPtr(IntptrTy, ShadowBase));
1772     }
1773     Value *ShadowPtr = IRB.CreateIntToPtr(
1774         ShadowLong, getPtrToShadowPtrType(IntptrTy, ShadowTy));
1775 
1776     Value *OriginPtr = nullptr;
1777     if (MS.TrackOrigins) {
1778       Value *OriginLong = ShadowOffset;
1779       uint64_t OriginBase = MS.MapParams->OriginBase;
1780       if (OriginBase != 0)
1781         OriginLong =
1782             IRB.CreateAdd(OriginLong, constToIntPtr(IntptrTy, OriginBase));
1783       if (!Alignment || *Alignment < kMinOriginAlignment) {
1784         uint64_t Mask = kMinOriginAlignment.value() - 1;
1785         OriginLong = IRB.CreateAnd(OriginLong, constToIntPtr(IntptrTy, ~Mask));
1786       }
1787       OriginPtr = IRB.CreateIntToPtr(
1788           OriginLong, getPtrToShadowPtrType(IntptrTy, MS.OriginTy));
1789     }
1790     return std::make_pair(ShadowPtr, OriginPtr);
1791   }
1792 
1793   template <typename... ArgsTy>
1794   Value *createMetadataCall(IRBuilder<> &IRB, FunctionCallee Callee,
1795                             ArgsTy... Args) {
1796     if (MS.TargetTriple.getArch() == Triple::systemz) {
1797       IRB.CreateCall(Callee,
1798                      {MS.MsanMetadataAlloca, std::forward<ArgsTy>(Args)...});
1799       return IRB.CreateLoad(MS.MsanMetadata, MS.MsanMetadataAlloca);
1800     }
1801 
1802     return IRB.CreateCall(Callee, {std::forward<ArgsTy>(Args)...});
1803   }
1804 
1805   std::pair<Value *, Value *> getShadowOriginPtrKernelNoVec(Value *Addr,
1806                                                             IRBuilder<> &IRB,
1807                                                             Type *ShadowTy,
1808                                                             bool isStore) {
1809     Value *ShadowOriginPtrs;
1810     const DataLayout &DL = F.getDataLayout();
1811     TypeSize Size = DL.getTypeStoreSize(ShadowTy);
1812 
1813     FunctionCallee Getter = MS.getKmsanShadowOriginAccessFn(isStore, Size);
1814     Value *AddrCast = IRB.CreatePointerCast(Addr, MS.PtrTy);
1815     if (Getter) {
1816       ShadowOriginPtrs = createMetadataCall(IRB, Getter, AddrCast);
1817     } else {
1818       Value *SizeVal = ConstantInt::get(MS.IntptrTy, Size);
1819       ShadowOriginPtrs = createMetadataCall(
1820           IRB,
1821           isStore ? MS.MsanMetadataPtrForStoreN : MS.MsanMetadataPtrForLoadN,
1822           AddrCast, SizeVal);
1823     }
1824     Value *ShadowPtr = IRB.CreateExtractValue(ShadowOriginPtrs, 0);
1825     ShadowPtr = IRB.CreatePointerCast(ShadowPtr, MS.PtrTy);
1826     Value *OriginPtr = IRB.CreateExtractValue(ShadowOriginPtrs, 1);
1827 
1828     return std::make_pair(ShadowPtr, OriginPtr);
1829   }
1830 
1831   /// Addr can be a ptr or <N x ptr>. In both cases ShadowTy the shadow type of
1832   /// a single pointee.
1833   /// Returns <shadow_ptr, origin_ptr> or <<N x shadow_ptr>, <N x origin_ptr>>.
1834   std::pair<Value *, Value *> getShadowOriginPtrKernel(Value *Addr,
1835                                                        IRBuilder<> &IRB,
1836                                                        Type *ShadowTy,
1837                                                        bool isStore) {
1838     VectorType *VectTy = dyn_cast<VectorType>(Addr->getType());
1839     if (!VectTy) {
1840       assert(Addr->getType()->isPointerTy());
1841       return getShadowOriginPtrKernelNoVec(Addr, IRB, ShadowTy, isStore);
1842     }
1843 
1844     // TODO: Support callbacs with vectors of addresses.
1845     unsigned NumElements = cast<FixedVectorType>(VectTy)->getNumElements();
1846     Value *ShadowPtrs = ConstantInt::getNullValue(
1847         FixedVectorType::get(IRB.getPtrTy(), NumElements));
1848     Value *OriginPtrs = nullptr;
1849     if (MS.TrackOrigins)
1850       OriginPtrs = ConstantInt::getNullValue(
1851           FixedVectorType::get(IRB.getPtrTy(), NumElements));
1852     for (unsigned i = 0; i < NumElements; ++i) {
1853       Value *OneAddr =
1854           IRB.CreateExtractElement(Addr, ConstantInt::get(IRB.getInt32Ty(), i));
1855       auto [ShadowPtr, OriginPtr] =
1856           getShadowOriginPtrKernelNoVec(OneAddr, IRB, ShadowTy, isStore);
1857 
1858       ShadowPtrs = IRB.CreateInsertElement(
1859           ShadowPtrs, ShadowPtr, ConstantInt::get(IRB.getInt32Ty(), i));
1860       if (MS.TrackOrigins)
1861         OriginPtrs = IRB.CreateInsertElement(
1862             OriginPtrs, OriginPtr, ConstantInt::get(IRB.getInt32Ty(), i));
1863     }
1864     return {ShadowPtrs, OriginPtrs};
1865   }
1866 
1867   std::pair<Value *, Value *> getShadowOriginPtr(Value *Addr, IRBuilder<> &IRB,
1868                                                  Type *ShadowTy,
1869                                                  MaybeAlign Alignment,
1870                                                  bool isStore) {
1871     if (MS.CompileKernel)
1872       return getShadowOriginPtrKernel(Addr, IRB, ShadowTy, isStore);
1873     return getShadowOriginPtrUserspace(Addr, IRB, ShadowTy, Alignment);
1874   }
1875 
1876   /// Compute the shadow address for a given function argument.
1877   ///
1878   /// Shadow = ParamTLS+ArgOffset.
1879   Value *getShadowPtrForArgument(IRBuilder<> &IRB, int ArgOffset) {
1880     Value *Base = IRB.CreatePointerCast(MS.ParamTLS, MS.IntptrTy);
1881     if (ArgOffset)
1882       Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
1883     return IRB.CreateIntToPtr(Base, IRB.getPtrTy(0), "_msarg");
1884   }
1885 
1886   /// Compute the origin address for a given function argument.
1887   Value *getOriginPtrForArgument(IRBuilder<> &IRB, int ArgOffset) {
1888     if (!MS.TrackOrigins)
1889       return nullptr;
1890     Value *Base = IRB.CreatePointerCast(MS.ParamOriginTLS, MS.IntptrTy);
1891     if (ArgOffset)
1892       Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
1893     return IRB.CreateIntToPtr(Base, IRB.getPtrTy(0), "_msarg_o");
1894   }
1895 
1896   /// Compute the shadow address for a retval.
1897   Value *getShadowPtrForRetval(IRBuilder<> &IRB) {
1898     return IRB.CreatePointerCast(MS.RetvalTLS, IRB.getPtrTy(0), "_msret");
1899   }
1900 
1901   /// Compute the origin address for a retval.
1902   Value *getOriginPtrForRetval() {
1903     // We keep a single origin for the entire retval. Might be too optimistic.
1904     return MS.RetvalOriginTLS;
1905   }
1906 
1907   /// Set SV to be the shadow value for V.
1908   void setShadow(Value *V, Value *SV) {
1909     assert(!ShadowMap.count(V) && "Values may only have one shadow");
1910     ShadowMap[V] = PropagateShadow ? SV : getCleanShadow(V);
1911   }
1912 
1913   /// Set Origin to be the origin value for V.
1914   void setOrigin(Value *V, Value *Origin) {
1915     if (!MS.TrackOrigins)
1916       return;
1917     assert(!OriginMap.count(V) && "Values may only have one origin");
1918     LLVM_DEBUG(dbgs() << "ORIGIN: " << *V << "  ==> " << *Origin << "\n");
1919     OriginMap[V] = Origin;
1920   }
1921 
1922   Constant *getCleanShadow(Type *OrigTy) {
1923     Type *ShadowTy = getShadowTy(OrigTy);
1924     if (!ShadowTy)
1925       return nullptr;
1926     return Constant::getNullValue(ShadowTy);
1927   }
1928 
1929   /// Create a clean shadow value for a given value.
1930   ///
1931   /// Clean shadow (all zeroes) means all bits of the value are defined
1932   /// (initialized).
1933   Constant *getCleanShadow(Value *V) { return getCleanShadow(V->getType()); }
1934 
1935   /// Create a dirty shadow of a given shadow type.
1936   Constant *getPoisonedShadow(Type *ShadowTy) {
1937     assert(ShadowTy);
1938     if (isa<IntegerType>(ShadowTy) || isa<VectorType>(ShadowTy))
1939       return Constant::getAllOnesValue(ShadowTy);
1940     if (ArrayType *AT = dyn_cast<ArrayType>(ShadowTy)) {
1941       SmallVector<Constant *, 4> Vals(AT->getNumElements(),
1942                                       getPoisonedShadow(AT->getElementType()));
1943       return ConstantArray::get(AT, Vals);
1944     }
1945     if (StructType *ST = dyn_cast<StructType>(ShadowTy)) {
1946       SmallVector<Constant *, 4> Vals;
1947       for (unsigned i = 0, n = ST->getNumElements(); i < n; i++)
1948         Vals.push_back(getPoisonedShadow(ST->getElementType(i)));
1949       return ConstantStruct::get(ST, Vals);
1950     }
1951     llvm_unreachable("Unexpected shadow type");
1952   }
1953 
1954   /// Create a dirty shadow for a given value.
1955   Constant *getPoisonedShadow(Value *V) {
1956     Type *ShadowTy = getShadowTy(V);
1957     if (!ShadowTy)
1958       return nullptr;
1959     return getPoisonedShadow(ShadowTy);
1960   }
1961 
1962   /// Create a clean (zero) origin.
1963   Value *getCleanOrigin() { return Constant::getNullValue(MS.OriginTy); }
1964 
1965   /// Get the shadow value for a given Value.
1966   ///
1967   /// This function either returns the value set earlier with setShadow,
1968   /// or extracts if from ParamTLS (for function arguments).
1969   Value *getShadow(Value *V) {
1970     if (Instruction *I = dyn_cast<Instruction>(V)) {
1971       if (!PropagateShadow || I->getMetadata(LLVMContext::MD_nosanitize))
1972         return getCleanShadow(V);
1973       // For instructions the shadow is already stored in the map.
1974       Value *Shadow = ShadowMap[V];
1975       if (!Shadow) {
1976         LLVM_DEBUG(dbgs() << "No shadow: " << *V << "\n" << *(I->getParent()));
1977         (void)I;
1978         assert(Shadow && "No shadow for a value");
1979       }
1980       return Shadow;
1981     }
1982     if (UndefValue *U = dyn_cast<UndefValue>(V)) {
1983       Value *AllOnes = (PropagateShadow && PoisonUndef) ? getPoisonedShadow(V)
1984                                                         : getCleanShadow(V);
1985       LLVM_DEBUG(dbgs() << "Undef: " << *U << " ==> " << *AllOnes << "\n");
1986       (void)U;
1987       return AllOnes;
1988     }
1989     if (Argument *A = dyn_cast<Argument>(V)) {
1990       // For arguments we compute the shadow on demand and store it in the map.
1991       Value *&ShadowPtr = ShadowMap[V];
1992       if (ShadowPtr)
1993         return ShadowPtr;
1994       Function *F = A->getParent();
1995       IRBuilder<> EntryIRB(FnPrologueEnd);
1996       unsigned ArgOffset = 0;
1997       const DataLayout &DL = F->getDataLayout();
1998       for (auto &FArg : F->args()) {
1999         if (!FArg.getType()->isSized() || FArg.getType()->isScalableTy()) {
2000           LLVM_DEBUG(dbgs() << (FArg.getType()->isScalableTy()
2001                                     ? "vscale not fully supported\n"
2002                                     : "Arg is not sized\n"));
2003           if (A == &FArg) {
2004             ShadowPtr = getCleanShadow(V);
2005             setOrigin(A, getCleanOrigin());
2006             break;
2007           }
2008           continue;
2009         }
2010 
2011         unsigned Size = FArg.hasByValAttr()
2012                             ? DL.getTypeAllocSize(FArg.getParamByValType())
2013                             : DL.getTypeAllocSize(FArg.getType());
2014 
2015         if (A == &FArg) {
2016           bool Overflow = ArgOffset + Size > kParamTLSSize;
2017           if (FArg.hasByValAttr()) {
2018             // ByVal pointer itself has clean shadow. We copy the actual
2019             // argument shadow to the underlying memory.
2020             // Figure out maximal valid memcpy alignment.
2021             const Align ArgAlign = DL.getValueOrABITypeAlignment(
2022                 FArg.getParamAlign(), FArg.getParamByValType());
2023             Value *CpShadowPtr, *CpOriginPtr;
2024             std::tie(CpShadowPtr, CpOriginPtr) =
2025                 getShadowOriginPtr(V, EntryIRB, EntryIRB.getInt8Ty(), ArgAlign,
2026                                    /*isStore*/ true);
2027             if (!PropagateShadow || Overflow) {
2028               // ParamTLS overflow.
2029               EntryIRB.CreateMemSet(
2030                   CpShadowPtr, Constant::getNullValue(EntryIRB.getInt8Ty()),
2031                   Size, ArgAlign);
2032             } else {
2033               Value *Base = getShadowPtrForArgument(EntryIRB, ArgOffset);
2034               const Align CopyAlign = std::min(ArgAlign, kShadowTLSAlignment);
2035               Value *Cpy = EntryIRB.CreateMemCpy(CpShadowPtr, CopyAlign, Base,
2036                                                  CopyAlign, Size);
2037               LLVM_DEBUG(dbgs() << "  ByValCpy: " << *Cpy << "\n");
2038               (void)Cpy;
2039 
2040               if (MS.TrackOrigins) {
2041                 Value *OriginPtr = getOriginPtrForArgument(EntryIRB, ArgOffset);
2042                 // FIXME: OriginSize should be:
2043                 // alignTo(V % kMinOriginAlignment + Size, kMinOriginAlignment)
2044                 unsigned OriginSize = alignTo(Size, kMinOriginAlignment);
2045                 EntryIRB.CreateMemCpy(
2046                     CpOriginPtr,
2047                     /* by getShadowOriginPtr */ kMinOriginAlignment, OriginPtr,
2048                     /* by origin_tls[ArgOffset] */ kMinOriginAlignment,
2049                     OriginSize);
2050               }
2051             }
2052           }
2053 
2054           if (!PropagateShadow || Overflow || FArg.hasByValAttr() ||
2055               (MS.EagerChecks && FArg.hasAttribute(Attribute::NoUndef))) {
2056             ShadowPtr = getCleanShadow(V);
2057             setOrigin(A, getCleanOrigin());
2058           } else {
2059             // Shadow over TLS
2060             Value *Base = getShadowPtrForArgument(EntryIRB, ArgOffset);
2061             ShadowPtr = EntryIRB.CreateAlignedLoad(getShadowTy(&FArg), Base,
2062                                                    kShadowTLSAlignment);
2063             if (MS.TrackOrigins) {
2064               Value *OriginPtr = getOriginPtrForArgument(EntryIRB, ArgOffset);
2065               setOrigin(A, EntryIRB.CreateLoad(MS.OriginTy, OriginPtr));
2066             }
2067           }
2068           LLVM_DEBUG(dbgs()
2069                      << "  ARG:    " << FArg << " ==> " << *ShadowPtr << "\n");
2070           break;
2071         }
2072 
2073         ArgOffset += alignTo(Size, kShadowTLSAlignment);
2074       }
2075       assert(ShadowPtr && "Could not find shadow for an argument");
2076       return ShadowPtr;
2077     }
2078     // For everything else the shadow is zero.
2079     return getCleanShadow(V);
2080   }
2081 
2082   /// Get the shadow for i-th argument of the instruction I.
2083   Value *getShadow(Instruction *I, int i) {
2084     return getShadow(I->getOperand(i));
2085   }
2086 
2087   /// Get the origin for a value.
2088   Value *getOrigin(Value *V) {
2089     if (!MS.TrackOrigins)
2090       return nullptr;
2091     if (!PropagateShadow || isa<Constant>(V) || isa<InlineAsm>(V))
2092       return getCleanOrigin();
2093     assert((isa<Instruction>(V) || isa<Argument>(V)) &&
2094            "Unexpected value type in getOrigin()");
2095     if (Instruction *I = dyn_cast<Instruction>(V)) {
2096       if (I->getMetadata(LLVMContext::MD_nosanitize))
2097         return getCleanOrigin();
2098     }
2099     Value *Origin = OriginMap[V];
2100     assert(Origin && "Missing origin");
2101     return Origin;
2102   }
2103 
2104   /// Get the origin for i-th argument of the instruction I.
2105   Value *getOrigin(Instruction *I, int i) {
2106     return getOrigin(I->getOperand(i));
2107   }
2108 
2109   /// Remember the place where a shadow check should be inserted.
2110   ///
2111   /// This location will be later instrumented with a check that will print a
2112   /// UMR warning in runtime if the shadow value is not 0.
2113   void insertShadowCheck(Value *Shadow, Value *Origin, Instruction *OrigIns) {
2114     assert(Shadow);
2115     if (!InsertChecks)
2116       return;
2117 
2118     if (!DebugCounter::shouldExecute(DebugInsertCheck)) {
2119       LLVM_DEBUG(dbgs() << "Skipping check of " << *Shadow << " before "
2120                         << *OrigIns << "\n");
2121       return;
2122     }
2123 #ifndef NDEBUG
2124     Type *ShadowTy = Shadow->getType();
2125     assert((isa<IntegerType>(ShadowTy) || isa<VectorType>(ShadowTy) ||
2126             isa<StructType>(ShadowTy) || isa<ArrayType>(ShadowTy)) &&
2127            "Can only insert checks for integer, vector, and aggregate shadow "
2128            "types");
2129 #endif
2130     InstrumentationList.push_back(
2131         ShadowOriginAndInsertPoint(Shadow, Origin, OrigIns));
2132   }
2133 
2134   /// Remember the place where a shadow check should be inserted.
2135   ///
2136   /// This location will be later instrumented with a check that will print a
2137   /// UMR warning in runtime if the value is not fully defined.
2138   void insertShadowCheck(Value *Val, Instruction *OrigIns) {
2139     assert(Val);
2140     Value *Shadow, *Origin;
2141     if (ClCheckConstantShadow) {
2142       Shadow = getShadow(Val);
2143       if (!Shadow)
2144         return;
2145       Origin = getOrigin(Val);
2146     } else {
2147       Shadow = dyn_cast_or_null<Instruction>(getShadow(Val));
2148       if (!Shadow)
2149         return;
2150       Origin = dyn_cast_or_null<Instruction>(getOrigin(Val));
2151     }
2152     insertShadowCheck(Shadow, Origin, OrigIns);
2153   }
2154 
2155   AtomicOrdering addReleaseOrdering(AtomicOrdering a) {
2156     switch (a) {
2157     case AtomicOrdering::NotAtomic:
2158       return AtomicOrdering::NotAtomic;
2159     case AtomicOrdering::Unordered:
2160     case AtomicOrdering::Monotonic:
2161     case AtomicOrdering::Release:
2162       return AtomicOrdering::Release;
2163     case AtomicOrdering::Acquire:
2164     case AtomicOrdering::AcquireRelease:
2165       return AtomicOrdering::AcquireRelease;
2166     case AtomicOrdering::SequentiallyConsistent:
2167       return AtomicOrdering::SequentiallyConsistent;
2168     }
2169     llvm_unreachable("Unknown ordering");
2170   }
2171 
2172   Value *makeAddReleaseOrderingTable(IRBuilder<> &IRB) {
2173     constexpr int NumOrderings = (int)AtomicOrderingCABI::seq_cst + 1;
2174     uint32_t OrderingTable[NumOrderings] = {};
2175 
2176     OrderingTable[(int)AtomicOrderingCABI::relaxed] =
2177         OrderingTable[(int)AtomicOrderingCABI::release] =
2178             (int)AtomicOrderingCABI::release;
2179     OrderingTable[(int)AtomicOrderingCABI::consume] =
2180         OrderingTable[(int)AtomicOrderingCABI::acquire] =
2181             OrderingTable[(int)AtomicOrderingCABI::acq_rel] =
2182                 (int)AtomicOrderingCABI::acq_rel;
2183     OrderingTable[(int)AtomicOrderingCABI::seq_cst] =
2184         (int)AtomicOrderingCABI::seq_cst;
2185 
2186     return ConstantDataVector::get(IRB.getContext(), OrderingTable);
2187   }
2188 
2189   AtomicOrdering addAcquireOrdering(AtomicOrdering a) {
2190     switch (a) {
2191     case AtomicOrdering::NotAtomic:
2192       return AtomicOrdering::NotAtomic;
2193     case AtomicOrdering::Unordered:
2194     case AtomicOrdering::Monotonic:
2195     case AtomicOrdering::Acquire:
2196       return AtomicOrdering::Acquire;
2197     case AtomicOrdering::Release:
2198     case AtomicOrdering::AcquireRelease:
2199       return AtomicOrdering::AcquireRelease;
2200     case AtomicOrdering::SequentiallyConsistent:
2201       return AtomicOrdering::SequentiallyConsistent;
2202     }
2203     llvm_unreachable("Unknown ordering");
2204   }
2205 
2206   Value *makeAddAcquireOrderingTable(IRBuilder<> &IRB) {
2207     constexpr int NumOrderings = (int)AtomicOrderingCABI::seq_cst + 1;
2208     uint32_t OrderingTable[NumOrderings] = {};
2209 
2210     OrderingTable[(int)AtomicOrderingCABI::relaxed] =
2211         OrderingTable[(int)AtomicOrderingCABI::acquire] =
2212             OrderingTable[(int)AtomicOrderingCABI::consume] =
2213                 (int)AtomicOrderingCABI::acquire;
2214     OrderingTable[(int)AtomicOrderingCABI::release] =
2215         OrderingTable[(int)AtomicOrderingCABI::acq_rel] =
2216             (int)AtomicOrderingCABI::acq_rel;
2217     OrderingTable[(int)AtomicOrderingCABI::seq_cst] =
2218         (int)AtomicOrderingCABI::seq_cst;
2219 
2220     return ConstantDataVector::get(IRB.getContext(), OrderingTable);
2221   }
2222 
2223   // ------------------- Visitors.
2224   using InstVisitor<MemorySanitizerVisitor>::visit;
2225   void visit(Instruction &I) {
2226     if (I.getMetadata(LLVMContext::MD_nosanitize))
2227       return;
2228     // Don't want to visit if we're in the prologue
2229     if (isInPrologue(I))
2230       return;
2231     if (!DebugCounter::shouldExecute(DebugInstrumentInstruction)) {
2232       LLVM_DEBUG(dbgs() << "Skipping instruction: " << I << "\n");
2233       // We still need to set the shadow and origin to clean values.
2234       setShadow(&I, getCleanShadow(&I));
2235       setOrigin(&I, getCleanOrigin());
2236       return;
2237     }
2238 
2239     Instructions.push_back(&I);
2240   }
2241 
2242   /// Instrument LoadInst
2243   ///
2244   /// Loads the corresponding shadow and (optionally) origin.
2245   /// Optionally, checks that the load address is fully defined.
2246   void visitLoadInst(LoadInst &I) {
2247     assert(I.getType()->isSized() && "Load type must have size");
2248     assert(!I.getMetadata(LLVMContext::MD_nosanitize));
2249     NextNodeIRBuilder IRB(&I);
2250     Type *ShadowTy = getShadowTy(&I);
2251     Value *Addr = I.getPointerOperand();
2252     Value *ShadowPtr = nullptr, *OriginPtr = nullptr;
2253     const Align Alignment = I.getAlign();
2254     if (PropagateShadow) {
2255       std::tie(ShadowPtr, OriginPtr) =
2256           getShadowOriginPtr(Addr, IRB, ShadowTy, Alignment, /*isStore*/ false);
2257       setShadow(&I,
2258                 IRB.CreateAlignedLoad(ShadowTy, ShadowPtr, Alignment, "_msld"));
2259     } else {
2260       setShadow(&I, getCleanShadow(&I));
2261     }
2262 
2263     if (ClCheckAccessAddress)
2264       insertShadowCheck(I.getPointerOperand(), &I);
2265 
2266     if (I.isAtomic())
2267       I.setOrdering(addAcquireOrdering(I.getOrdering()));
2268 
2269     if (MS.TrackOrigins) {
2270       if (PropagateShadow) {
2271         const Align OriginAlignment = std::max(kMinOriginAlignment, Alignment);
2272         setOrigin(
2273             &I, IRB.CreateAlignedLoad(MS.OriginTy, OriginPtr, OriginAlignment));
2274       } else {
2275         setOrigin(&I, getCleanOrigin());
2276       }
2277     }
2278   }
2279 
2280   /// Instrument StoreInst
2281   ///
2282   /// Stores the corresponding shadow and (optionally) origin.
2283   /// Optionally, checks that the store address is fully defined.
2284   void visitStoreInst(StoreInst &I) {
2285     StoreList.push_back(&I);
2286     if (ClCheckAccessAddress)
2287       insertShadowCheck(I.getPointerOperand(), &I);
2288   }
2289 
2290   void handleCASOrRMW(Instruction &I) {
2291     assert(isa<AtomicRMWInst>(I) || isa<AtomicCmpXchgInst>(I));
2292 
2293     IRBuilder<> IRB(&I);
2294     Value *Addr = I.getOperand(0);
2295     Value *Val = I.getOperand(1);
2296     Value *ShadowPtr = getShadowOriginPtr(Addr, IRB, getShadowTy(Val), Align(1),
2297                                           /*isStore*/ true)
2298                            .first;
2299 
2300     if (ClCheckAccessAddress)
2301       insertShadowCheck(Addr, &I);
2302 
2303     // Only test the conditional argument of cmpxchg instruction.
2304     // The other argument can potentially be uninitialized, but we can not
2305     // detect this situation reliably without possible false positives.
2306     if (isa<AtomicCmpXchgInst>(I))
2307       insertShadowCheck(Val, &I);
2308 
2309     IRB.CreateStore(getCleanShadow(Val), ShadowPtr);
2310 
2311     setShadow(&I, getCleanShadow(&I));
2312     setOrigin(&I, getCleanOrigin());
2313   }
2314 
2315   void visitAtomicRMWInst(AtomicRMWInst &I) {
2316     handleCASOrRMW(I);
2317     I.setOrdering(addReleaseOrdering(I.getOrdering()));
2318   }
2319 
2320   void visitAtomicCmpXchgInst(AtomicCmpXchgInst &I) {
2321     handleCASOrRMW(I);
2322     I.setSuccessOrdering(addReleaseOrdering(I.getSuccessOrdering()));
2323   }
2324 
2325   // Vector manipulation.
2326   void visitExtractElementInst(ExtractElementInst &I) {
2327     insertShadowCheck(I.getOperand(1), &I);
2328     IRBuilder<> IRB(&I);
2329     setShadow(&I, IRB.CreateExtractElement(getShadow(&I, 0), I.getOperand(1),
2330                                            "_msprop"));
2331     setOrigin(&I, getOrigin(&I, 0));
2332   }
2333 
2334   void visitInsertElementInst(InsertElementInst &I) {
2335     insertShadowCheck(I.getOperand(2), &I);
2336     IRBuilder<> IRB(&I);
2337     auto *Shadow0 = getShadow(&I, 0);
2338     auto *Shadow1 = getShadow(&I, 1);
2339     setShadow(&I, IRB.CreateInsertElement(Shadow0, Shadow1, I.getOperand(2),
2340                                           "_msprop"));
2341     setOriginForNaryOp(I);
2342   }
2343 
2344   void visitShuffleVectorInst(ShuffleVectorInst &I) {
2345     IRBuilder<> IRB(&I);
2346     auto *Shadow0 = getShadow(&I, 0);
2347     auto *Shadow1 = getShadow(&I, 1);
2348     setShadow(&I, IRB.CreateShuffleVector(Shadow0, Shadow1, I.getShuffleMask(),
2349                                           "_msprop"));
2350     setOriginForNaryOp(I);
2351   }
2352 
2353   // Casts.
2354   void visitSExtInst(SExtInst &I) {
2355     IRBuilder<> IRB(&I);
2356     setShadow(&I, IRB.CreateSExt(getShadow(&I, 0), I.getType(), "_msprop"));
2357     setOrigin(&I, getOrigin(&I, 0));
2358   }
2359 
2360   void visitZExtInst(ZExtInst &I) {
2361     IRBuilder<> IRB(&I);
2362     setShadow(&I, IRB.CreateZExt(getShadow(&I, 0), I.getType(), "_msprop"));
2363     setOrigin(&I, getOrigin(&I, 0));
2364   }
2365 
2366   void visitTruncInst(TruncInst &I) {
2367     IRBuilder<> IRB(&I);
2368     setShadow(&I, IRB.CreateTrunc(getShadow(&I, 0), I.getType(), "_msprop"));
2369     setOrigin(&I, getOrigin(&I, 0));
2370   }
2371 
2372   void visitBitCastInst(BitCastInst &I) {
2373     // Special case: if this is the bitcast (there is exactly 1 allowed) between
2374     // a musttail call and a ret, don't instrument. New instructions are not
2375     // allowed after a musttail call.
2376     if (auto *CI = dyn_cast<CallInst>(I.getOperand(0)))
2377       if (CI->isMustTailCall())
2378         return;
2379     IRBuilder<> IRB(&I);
2380     setShadow(&I, IRB.CreateBitCast(getShadow(&I, 0), getShadowTy(&I)));
2381     setOrigin(&I, getOrigin(&I, 0));
2382   }
2383 
2384   void visitPtrToIntInst(PtrToIntInst &I) {
2385     IRBuilder<> IRB(&I);
2386     setShadow(&I, IRB.CreateIntCast(getShadow(&I, 0), getShadowTy(&I), false,
2387                                     "_msprop_ptrtoint"));
2388     setOrigin(&I, getOrigin(&I, 0));
2389   }
2390 
2391   void visitIntToPtrInst(IntToPtrInst &I) {
2392     IRBuilder<> IRB(&I);
2393     setShadow(&I, IRB.CreateIntCast(getShadow(&I, 0), getShadowTy(&I), false,
2394                                     "_msprop_inttoptr"));
2395     setOrigin(&I, getOrigin(&I, 0));
2396   }
2397 
2398   void visitFPToSIInst(CastInst &I) { handleShadowOr(I); }
2399   void visitFPToUIInst(CastInst &I) { handleShadowOr(I); }
2400   void visitSIToFPInst(CastInst &I) { handleShadowOr(I); }
2401   void visitUIToFPInst(CastInst &I) { handleShadowOr(I); }
2402   void visitFPExtInst(CastInst &I) { handleShadowOr(I); }
2403   void visitFPTruncInst(CastInst &I) { handleShadowOr(I); }
2404 
2405   /// Propagate shadow for bitwise AND.
2406   ///
2407   /// This code is exact, i.e. if, for example, a bit in the left argument
2408   /// is defined and 0, then neither the value not definedness of the
2409   /// corresponding bit in B don't affect the resulting shadow.
2410   void visitAnd(BinaryOperator &I) {
2411     IRBuilder<> IRB(&I);
2412     //  "And" of 0 and a poisoned value results in unpoisoned value.
2413     //  1&1 => 1;     0&1 => 0;     p&1 => p;
2414     //  1&0 => 0;     0&0 => 0;     p&0 => 0;
2415     //  1&p => p;     0&p => 0;     p&p => p;
2416     //  S = (S1 & S2) | (V1 & S2) | (S1 & V2)
2417     Value *S1 = getShadow(&I, 0);
2418     Value *S2 = getShadow(&I, 1);
2419     Value *V1 = I.getOperand(0);
2420     Value *V2 = I.getOperand(1);
2421     if (V1->getType() != S1->getType()) {
2422       V1 = IRB.CreateIntCast(V1, S1->getType(), false);
2423       V2 = IRB.CreateIntCast(V2, S2->getType(), false);
2424     }
2425     Value *S1S2 = IRB.CreateAnd(S1, S2);
2426     Value *V1S2 = IRB.CreateAnd(V1, S2);
2427     Value *S1V2 = IRB.CreateAnd(S1, V2);
2428     setShadow(&I, IRB.CreateOr({S1S2, V1S2, S1V2}));
2429     setOriginForNaryOp(I);
2430   }
2431 
2432   void visitOr(BinaryOperator &I) {
2433     IRBuilder<> IRB(&I);
2434     //  "Or" of 1 and a poisoned value results in unpoisoned value.
2435     //  1|1 => 1;     0|1 => 1;     p|1 => 1;
2436     //  1|0 => 1;     0|0 => 0;     p|0 => p;
2437     //  1|p => 1;     0|p => p;     p|p => p;
2438     //  S = (S1 & S2) | (~V1 & S2) | (S1 & ~V2)
2439     Value *S1 = getShadow(&I, 0);
2440     Value *S2 = getShadow(&I, 1);
2441     Value *V1 = IRB.CreateNot(I.getOperand(0));
2442     Value *V2 = IRB.CreateNot(I.getOperand(1));
2443     if (V1->getType() != S1->getType()) {
2444       V1 = IRB.CreateIntCast(V1, S1->getType(), false);
2445       V2 = IRB.CreateIntCast(V2, S2->getType(), false);
2446     }
2447     Value *S1S2 = IRB.CreateAnd(S1, S2);
2448     Value *V1S2 = IRB.CreateAnd(V1, S2);
2449     Value *S1V2 = IRB.CreateAnd(S1, V2);
2450     setShadow(&I, IRB.CreateOr({S1S2, V1S2, S1V2}));
2451     setOriginForNaryOp(I);
2452   }
2453 
2454   /// Default propagation of shadow and/or origin.
2455   ///
2456   /// This class implements the general case of shadow propagation, used in all
2457   /// cases where we don't know and/or don't care about what the operation
2458   /// actually does. It converts all input shadow values to a common type
2459   /// (extending or truncating as necessary), and bitwise OR's them.
2460   ///
2461   /// This is much cheaper than inserting checks (i.e. requiring inputs to be
2462   /// fully initialized), and less prone to false positives.
2463   ///
2464   /// This class also implements the general case of origin propagation. For a
2465   /// Nary operation, result origin is set to the origin of an argument that is
2466   /// not entirely initialized. If there is more than one such arguments, the
2467   /// rightmost of them is picked. It does not matter which one is picked if all
2468   /// arguments are initialized.
2469   template <bool CombineShadow> class Combiner {
2470     Value *Shadow = nullptr;
2471     Value *Origin = nullptr;
2472     IRBuilder<> &IRB;
2473     MemorySanitizerVisitor *MSV;
2474 
2475   public:
2476     Combiner(MemorySanitizerVisitor *MSV, IRBuilder<> &IRB)
2477         : IRB(IRB), MSV(MSV) {}
2478 
2479     /// Add a pair of shadow and origin values to the mix.
2480     Combiner &Add(Value *OpShadow, Value *OpOrigin) {
2481       if (CombineShadow) {
2482         assert(OpShadow);
2483         if (!Shadow)
2484           Shadow = OpShadow;
2485         else {
2486           OpShadow = MSV->CreateShadowCast(IRB, OpShadow, Shadow->getType());
2487           Shadow = IRB.CreateOr(Shadow, OpShadow, "_msprop");
2488         }
2489       }
2490 
2491       if (MSV->MS.TrackOrigins) {
2492         assert(OpOrigin);
2493         if (!Origin) {
2494           Origin = OpOrigin;
2495         } else {
2496           Constant *ConstOrigin = dyn_cast<Constant>(OpOrigin);
2497           // No point in adding something that might result in 0 origin value.
2498           if (!ConstOrigin || !ConstOrigin->isNullValue()) {
2499             Value *Cond = MSV->convertToBool(OpShadow, IRB);
2500             Origin = IRB.CreateSelect(Cond, OpOrigin, Origin);
2501           }
2502         }
2503       }
2504       return *this;
2505     }
2506 
2507     /// Add an application value to the mix.
2508     Combiner &Add(Value *V) {
2509       Value *OpShadow = MSV->getShadow(V);
2510       Value *OpOrigin = MSV->MS.TrackOrigins ? MSV->getOrigin(V) : nullptr;
2511       return Add(OpShadow, OpOrigin);
2512     }
2513 
2514     /// Set the current combined values as the given instruction's shadow
2515     /// and origin.
2516     void Done(Instruction *I) {
2517       if (CombineShadow) {
2518         assert(Shadow);
2519         Shadow = MSV->CreateShadowCast(IRB, Shadow, MSV->getShadowTy(I));
2520         MSV->setShadow(I, Shadow);
2521       }
2522       if (MSV->MS.TrackOrigins) {
2523         assert(Origin);
2524         MSV->setOrigin(I, Origin);
2525       }
2526     }
2527 
2528     /// Store the current combined value at the specified origin
2529     /// location.
2530     void DoneAndStoreOrigin(TypeSize TS, Value *OriginPtr) {
2531       if (MSV->MS.TrackOrigins) {
2532         assert(Origin);
2533         MSV->paintOrigin(IRB, Origin, OriginPtr, TS, kMinOriginAlignment);
2534       }
2535     }
2536   };
2537 
2538   using ShadowAndOriginCombiner = Combiner<true>;
2539   using OriginCombiner = Combiner<false>;
2540 
2541   /// Propagate origin for arbitrary operation.
2542   void setOriginForNaryOp(Instruction &I) {
2543     if (!MS.TrackOrigins)
2544       return;
2545     IRBuilder<> IRB(&I);
2546     OriginCombiner OC(this, IRB);
2547     for (Use &Op : I.operands())
2548       OC.Add(Op.get());
2549     OC.Done(&I);
2550   }
2551 
2552   size_t VectorOrPrimitiveTypeSizeInBits(Type *Ty) {
2553     assert(!(Ty->isVectorTy() && Ty->getScalarType()->isPointerTy()) &&
2554            "Vector of pointers is not a valid shadow type");
2555     return Ty->isVectorTy() ? cast<FixedVectorType>(Ty)->getNumElements() *
2556                                   Ty->getScalarSizeInBits()
2557                             : Ty->getPrimitiveSizeInBits();
2558   }
2559 
2560   /// Cast between two shadow types, extending or truncating as
2561   /// necessary.
2562   Value *CreateShadowCast(IRBuilder<> &IRB, Value *V, Type *dstTy,
2563                           bool Signed = false) {
2564     Type *srcTy = V->getType();
2565     if (srcTy == dstTy)
2566       return V;
2567     size_t srcSizeInBits = VectorOrPrimitiveTypeSizeInBits(srcTy);
2568     size_t dstSizeInBits = VectorOrPrimitiveTypeSizeInBits(dstTy);
2569     if (srcSizeInBits > 1 && dstSizeInBits == 1)
2570       return IRB.CreateICmpNE(V, getCleanShadow(V));
2571 
2572     if (dstTy->isIntegerTy() && srcTy->isIntegerTy())
2573       return IRB.CreateIntCast(V, dstTy, Signed);
2574     if (dstTy->isVectorTy() && srcTy->isVectorTy() &&
2575         cast<VectorType>(dstTy)->getElementCount() ==
2576             cast<VectorType>(srcTy)->getElementCount())
2577       return IRB.CreateIntCast(V, dstTy, Signed);
2578     Value *V1 = IRB.CreateBitCast(V, Type::getIntNTy(*MS.C, srcSizeInBits));
2579     Value *V2 =
2580         IRB.CreateIntCast(V1, Type::getIntNTy(*MS.C, dstSizeInBits), Signed);
2581     return IRB.CreateBitCast(V2, dstTy);
2582     // TODO: handle struct types.
2583   }
2584 
2585   /// Cast an application value to the type of its own shadow.
2586   Value *CreateAppToShadowCast(IRBuilder<> &IRB, Value *V) {
2587     Type *ShadowTy = getShadowTy(V);
2588     if (V->getType() == ShadowTy)
2589       return V;
2590     if (V->getType()->isPtrOrPtrVectorTy())
2591       return IRB.CreatePtrToInt(V, ShadowTy);
2592     else
2593       return IRB.CreateBitCast(V, ShadowTy);
2594   }
2595 
2596   /// Propagate shadow for arbitrary operation.
2597   void handleShadowOr(Instruction &I) {
2598     IRBuilder<> IRB(&I);
2599     ShadowAndOriginCombiner SC(this, IRB);
2600     for (Use &Op : I.operands())
2601       SC.Add(Op.get());
2602     SC.Done(&I);
2603   }
2604 
2605   void visitFNeg(UnaryOperator &I) { handleShadowOr(I); }
2606 
2607   // Handle multiplication by constant.
2608   //
2609   // Handle a special case of multiplication by constant that may have one or
2610   // more zeros in the lower bits. This makes corresponding number of lower bits
2611   // of the result zero as well. We model it by shifting the other operand
2612   // shadow left by the required number of bits. Effectively, we transform
2613   // (X * (A * 2**B)) to ((X << B) * A) and instrument (X << B) as (Sx << B).
2614   // We use multiplication by 2**N instead of shift to cover the case of
2615   // multiplication by 0, which may occur in some elements of a vector operand.
2616   void handleMulByConstant(BinaryOperator &I, Constant *ConstArg,
2617                            Value *OtherArg) {
2618     Constant *ShadowMul;
2619     Type *Ty = ConstArg->getType();
2620     if (auto *VTy = dyn_cast<VectorType>(Ty)) {
2621       unsigned NumElements = cast<FixedVectorType>(VTy)->getNumElements();
2622       Type *EltTy = VTy->getElementType();
2623       SmallVector<Constant *, 16> Elements;
2624       for (unsigned Idx = 0; Idx < NumElements; ++Idx) {
2625         if (ConstantInt *Elt =
2626                 dyn_cast<ConstantInt>(ConstArg->getAggregateElement(Idx))) {
2627           const APInt &V = Elt->getValue();
2628           APInt V2 = APInt(V.getBitWidth(), 1) << V.countr_zero();
2629           Elements.push_back(ConstantInt::get(EltTy, V2));
2630         } else {
2631           Elements.push_back(ConstantInt::get(EltTy, 1));
2632         }
2633       }
2634       ShadowMul = ConstantVector::get(Elements);
2635     } else {
2636       if (ConstantInt *Elt = dyn_cast<ConstantInt>(ConstArg)) {
2637         const APInt &V = Elt->getValue();
2638         APInt V2 = APInt(V.getBitWidth(), 1) << V.countr_zero();
2639         ShadowMul = ConstantInt::get(Ty, V2);
2640       } else {
2641         ShadowMul = ConstantInt::get(Ty, 1);
2642       }
2643     }
2644 
2645     IRBuilder<> IRB(&I);
2646     setShadow(&I,
2647               IRB.CreateMul(getShadow(OtherArg), ShadowMul, "msprop_mul_cst"));
2648     setOrigin(&I, getOrigin(OtherArg));
2649   }
2650 
2651   void visitMul(BinaryOperator &I) {
2652     Constant *constOp0 = dyn_cast<Constant>(I.getOperand(0));
2653     Constant *constOp1 = dyn_cast<Constant>(I.getOperand(1));
2654     if (constOp0 && !constOp1)
2655       handleMulByConstant(I, constOp0, I.getOperand(1));
2656     else if (constOp1 && !constOp0)
2657       handleMulByConstant(I, constOp1, I.getOperand(0));
2658     else
2659       handleShadowOr(I);
2660   }
2661 
2662   void visitFAdd(BinaryOperator &I) { handleShadowOr(I); }
2663   void visitFSub(BinaryOperator &I) { handleShadowOr(I); }
2664   void visitFMul(BinaryOperator &I) { handleShadowOr(I); }
2665   void visitAdd(BinaryOperator &I) { handleShadowOr(I); }
2666   void visitSub(BinaryOperator &I) { handleShadowOr(I); }
2667   void visitXor(BinaryOperator &I) { handleShadowOr(I); }
2668 
2669   void handleIntegerDiv(Instruction &I) {
2670     IRBuilder<> IRB(&I);
2671     // Strict on the second argument.
2672     insertShadowCheck(I.getOperand(1), &I);
2673     setShadow(&I, getShadow(&I, 0));
2674     setOrigin(&I, getOrigin(&I, 0));
2675   }
2676 
2677   void visitUDiv(BinaryOperator &I) { handleIntegerDiv(I); }
2678   void visitSDiv(BinaryOperator &I) { handleIntegerDiv(I); }
2679   void visitURem(BinaryOperator &I) { handleIntegerDiv(I); }
2680   void visitSRem(BinaryOperator &I) { handleIntegerDiv(I); }
2681 
2682   // Floating point division is side-effect free. We can not require that the
2683   // divisor is fully initialized and must propagate shadow. See PR37523.
2684   void visitFDiv(BinaryOperator &I) { handleShadowOr(I); }
2685   void visitFRem(BinaryOperator &I) { handleShadowOr(I); }
2686 
2687   /// Instrument == and != comparisons.
2688   ///
2689   /// Sometimes the comparison result is known even if some of the bits of the
2690   /// arguments are not.
2691   void handleEqualityComparison(ICmpInst &I) {
2692     IRBuilder<> IRB(&I);
2693     Value *A = I.getOperand(0);
2694     Value *B = I.getOperand(1);
2695     Value *Sa = getShadow(A);
2696     Value *Sb = getShadow(B);
2697 
2698     // Get rid of pointers and vectors of pointers.
2699     // For ints (and vectors of ints), types of A and Sa match,
2700     // and this is a no-op.
2701     A = IRB.CreatePointerCast(A, Sa->getType());
2702     B = IRB.CreatePointerCast(B, Sb->getType());
2703 
2704     // A == B  <==>  (C = A^B) == 0
2705     // A != B  <==>  (C = A^B) != 0
2706     // Sc = Sa | Sb
2707     Value *C = IRB.CreateXor(A, B);
2708     Value *Sc = IRB.CreateOr(Sa, Sb);
2709     // Now dealing with i = (C == 0) comparison (or C != 0, does not matter now)
2710     // Result is defined if one of the following is true
2711     // * there is a defined 1 bit in C
2712     // * C is fully defined
2713     // Si = !(C & ~Sc) && Sc
2714     Value *Zero = Constant::getNullValue(Sc->getType());
2715     Value *MinusOne = Constant::getAllOnesValue(Sc->getType());
2716     Value *LHS = IRB.CreateICmpNE(Sc, Zero);
2717     Value *RHS =
2718         IRB.CreateICmpEQ(IRB.CreateAnd(IRB.CreateXor(Sc, MinusOne), C), Zero);
2719     Value *Si = IRB.CreateAnd(LHS, RHS);
2720     Si->setName("_msprop_icmp");
2721     setShadow(&I, Si);
2722     setOriginForNaryOp(I);
2723   }
2724 
2725   /// Instrument relational comparisons.
2726   ///
2727   /// This function does exact shadow propagation for all relational
2728   /// comparisons of integers, pointers and vectors of those.
2729   /// FIXME: output seems suboptimal when one of the operands is a constant
2730   void handleRelationalComparisonExact(ICmpInst &I) {
2731     IRBuilder<> IRB(&I);
2732     Value *A = I.getOperand(0);
2733     Value *B = I.getOperand(1);
2734     Value *Sa = getShadow(A);
2735     Value *Sb = getShadow(B);
2736 
2737     // Get rid of pointers and vectors of pointers.
2738     // For ints (and vectors of ints), types of A and Sa match,
2739     // and this is a no-op.
2740     A = IRB.CreatePointerCast(A, Sa->getType());
2741     B = IRB.CreatePointerCast(B, Sb->getType());
2742 
2743     // Let [a0, a1] be the interval of possible values of A, taking into account
2744     // its undefined bits. Let [b0, b1] be the interval of possible values of B.
2745     // Then (A cmp B) is defined iff (a0 cmp b1) == (a1 cmp b0).
2746     bool IsSigned = I.isSigned();
2747 
2748     auto GetMinMaxUnsigned = [&](Value *V, Value *S) {
2749       if (IsSigned) {
2750         // Sign-flip to map from signed range to unsigned range. Relation A vs B
2751         // should be preserved, if checked with `getUnsignedPredicate()`.
2752         // Relationship between Amin, Amax, Bmin, Bmax also will not be
2753         // affected, as they are created by effectively adding/substructing from
2754         // A (or B) a value, derived from shadow, with no overflow, either
2755         // before or after sign flip.
2756         APInt MinVal =
2757             APInt::getSignedMinValue(V->getType()->getScalarSizeInBits());
2758         V = IRB.CreateXor(V, ConstantInt::get(V->getType(), MinVal));
2759       }
2760       // Minimize undefined bits.
2761       Value *Min = IRB.CreateAnd(V, IRB.CreateNot(S));
2762       Value *Max = IRB.CreateOr(V, S);
2763       return std::make_pair(Min, Max);
2764     };
2765 
2766     auto [Amin, Amax] = GetMinMaxUnsigned(A, Sa);
2767     auto [Bmin, Bmax] = GetMinMaxUnsigned(B, Sb);
2768     Value *S1 = IRB.CreateICmp(I.getUnsignedPredicate(), Amin, Bmax);
2769     Value *S2 = IRB.CreateICmp(I.getUnsignedPredicate(), Amax, Bmin);
2770 
2771     Value *Si = IRB.CreateXor(S1, S2);
2772     setShadow(&I, Si);
2773     setOriginForNaryOp(I);
2774   }
2775 
2776   /// Instrument signed relational comparisons.
2777   ///
2778   /// Handle sign bit tests: x<0, x>=0, x<=-1, x>-1 by propagating the highest
2779   /// bit of the shadow. Everything else is delegated to handleShadowOr().
2780   void handleSignedRelationalComparison(ICmpInst &I) {
2781     Constant *constOp;
2782     Value *op = nullptr;
2783     CmpInst::Predicate pre;
2784     if ((constOp = dyn_cast<Constant>(I.getOperand(1)))) {
2785       op = I.getOperand(0);
2786       pre = I.getPredicate();
2787     } else if ((constOp = dyn_cast<Constant>(I.getOperand(0)))) {
2788       op = I.getOperand(1);
2789       pre = I.getSwappedPredicate();
2790     } else {
2791       handleShadowOr(I);
2792       return;
2793     }
2794 
2795     if ((constOp->isNullValue() &&
2796          (pre == CmpInst::ICMP_SLT || pre == CmpInst::ICMP_SGE)) ||
2797         (constOp->isAllOnesValue() &&
2798          (pre == CmpInst::ICMP_SGT || pre == CmpInst::ICMP_SLE))) {
2799       IRBuilder<> IRB(&I);
2800       Value *Shadow = IRB.CreateICmpSLT(getShadow(op), getCleanShadow(op),
2801                                         "_msprop_icmp_s");
2802       setShadow(&I, Shadow);
2803       setOrigin(&I, getOrigin(op));
2804     } else {
2805       handleShadowOr(I);
2806     }
2807   }
2808 
2809   void visitICmpInst(ICmpInst &I) {
2810     if (!ClHandleICmp) {
2811       handleShadowOr(I);
2812       return;
2813     }
2814     if (I.isEquality()) {
2815       handleEqualityComparison(I);
2816       return;
2817     }
2818 
2819     assert(I.isRelational());
2820     if (ClHandleICmpExact) {
2821       handleRelationalComparisonExact(I);
2822       return;
2823     }
2824     if (I.isSigned()) {
2825       handleSignedRelationalComparison(I);
2826       return;
2827     }
2828 
2829     assert(I.isUnsigned());
2830     if ((isa<Constant>(I.getOperand(0)) || isa<Constant>(I.getOperand(1)))) {
2831       handleRelationalComparisonExact(I);
2832       return;
2833     }
2834 
2835     handleShadowOr(I);
2836   }
2837 
2838   void visitFCmpInst(FCmpInst &I) { handleShadowOr(I); }
2839 
2840   void handleShift(BinaryOperator &I) {
2841     IRBuilder<> IRB(&I);
2842     // If any of the S2 bits are poisoned, the whole thing is poisoned.
2843     // Otherwise perform the same shift on S1.
2844     Value *S1 = getShadow(&I, 0);
2845     Value *S2 = getShadow(&I, 1);
2846     Value *S2Conv =
2847         IRB.CreateSExt(IRB.CreateICmpNE(S2, getCleanShadow(S2)), S2->getType());
2848     Value *V2 = I.getOperand(1);
2849     Value *Shift = IRB.CreateBinOp(I.getOpcode(), S1, V2);
2850     setShadow(&I, IRB.CreateOr(Shift, S2Conv));
2851     setOriginForNaryOp(I);
2852   }
2853 
2854   void visitShl(BinaryOperator &I) { handleShift(I); }
2855   void visitAShr(BinaryOperator &I) { handleShift(I); }
2856   void visitLShr(BinaryOperator &I) { handleShift(I); }
2857 
2858   void handleFunnelShift(IntrinsicInst &I) {
2859     IRBuilder<> IRB(&I);
2860     // If any of the S2 bits are poisoned, the whole thing is poisoned.
2861     // Otherwise perform the same shift on S0 and S1.
2862     Value *S0 = getShadow(&I, 0);
2863     Value *S1 = getShadow(&I, 1);
2864     Value *S2 = getShadow(&I, 2);
2865     Value *S2Conv =
2866         IRB.CreateSExt(IRB.CreateICmpNE(S2, getCleanShadow(S2)), S2->getType());
2867     Value *V2 = I.getOperand(2);
2868     Value *Shift = IRB.CreateIntrinsic(I.getIntrinsicID(), S2Conv->getType(),
2869                                        {S0, S1, V2});
2870     setShadow(&I, IRB.CreateOr(Shift, S2Conv));
2871     setOriginForNaryOp(I);
2872   }
2873 
2874   /// Instrument llvm.memmove
2875   ///
2876   /// At this point we don't know if llvm.memmove will be inlined or not.
2877   /// If we don't instrument it and it gets inlined,
2878   /// our interceptor will not kick in and we will lose the memmove.
2879   /// If we instrument the call here, but it does not get inlined,
2880   /// we will memove the shadow twice: which is bad in case
2881   /// of overlapping regions. So, we simply lower the intrinsic to a call.
2882   ///
2883   /// Similar situation exists for memcpy and memset.
2884   void visitMemMoveInst(MemMoveInst &I) {
2885     getShadow(I.getArgOperand(1)); // Ensure shadow initialized
2886     IRBuilder<> IRB(&I);
2887     IRB.CreateCall(MS.MemmoveFn,
2888                    {I.getArgOperand(0), I.getArgOperand(1),
2889                     IRB.CreateIntCast(I.getArgOperand(2), MS.IntptrTy, false)});
2890     I.eraseFromParent();
2891   }
2892 
2893   /// Instrument memcpy
2894   ///
2895   /// Similar to memmove: avoid copying shadow twice. This is somewhat
2896   /// unfortunate as it may slowdown small constant memcpys.
2897   /// FIXME: consider doing manual inline for small constant sizes and proper
2898   /// alignment.
2899   ///
2900   /// Note: This also handles memcpy.inline, which promises no calls to external
2901   /// functions as an optimization. However, with instrumentation enabled this
2902   /// is difficult to promise; additionally, we know that the MSan runtime
2903   /// exists and provides __msan_memcpy(). Therefore, we assume that with
2904   /// instrumentation it's safe to turn memcpy.inline into a call to
2905   /// __msan_memcpy(). Should this be wrong, such as when implementing memcpy()
2906   /// itself, instrumentation should be disabled with the no_sanitize attribute.
2907   void visitMemCpyInst(MemCpyInst &I) {
2908     getShadow(I.getArgOperand(1)); // Ensure shadow initialized
2909     IRBuilder<> IRB(&I);
2910     IRB.CreateCall(MS.MemcpyFn,
2911                    {I.getArgOperand(0), I.getArgOperand(1),
2912                     IRB.CreateIntCast(I.getArgOperand(2), MS.IntptrTy, false)});
2913     I.eraseFromParent();
2914   }
2915 
2916   // Same as memcpy.
2917   void visitMemSetInst(MemSetInst &I) {
2918     IRBuilder<> IRB(&I);
2919     IRB.CreateCall(
2920         MS.MemsetFn,
2921         {I.getArgOperand(0),
2922          IRB.CreateIntCast(I.getArgOperand(1), IRB.getInt32Ty(), false),
2923          IRB.CreateIntCast(I.getArgOperand(2), MS.IntptrTy, false)});
2924     I.eraseFromParent();
2925   }
2926 
2927   void visitVAStartInst(VAStartInst &I) { VAHelper->visitVAStartInst(I); }
2928 
2929   void visitVACopyInst(VACopyInst &I) { VAHelper->visitVACopyInst(I); }
2930 
2931   /// Handle vector store-like intrinsics.
2932   ///
2933   /// Instrument intrinsics that look like a simple SIMD store: writes memory,
2934   /// has 1 pointer argument and 1 vector argument, returns void.
2935   bool handleVectorStoreIntrinsic(IntrinsicInst &I) {
2936     IRBuilder<> IRB(&I);
2937     Value *Addr = I.getArgOperand(0);
2938     Value *Shadow = getShadow(&I, 1);
2939     Value *ShadowPtr, *OriginPtr;
2940 
2941     // We don't know the pointer alignment (could be unaligned SSE store!).
2942     // Have to assume to worst case.
2943     std::tie(ShadowPtr, OriginPtr) = getShadowOriginPtr(
2944         Addr, IRB, Shadow->getType(), Align(1), /*isStore*/ true);
2945     IRB.CreateAlignedStore(Shadow, ShadowPtr, Align(1));
2946 
2947     if (ClCheckAccessAddress)
2948       insertShadowCheck(Addr, &I);
2949 
2950     // FIXME: factor out common code from materializeStores
2951     if (MS.TrackOrigins)
2952       IRB.CreateStore(getOrigin(&I, 1), OriginPtr);
2953     return true;
2954   }
2955 
2956   /// Handle vector load-like intrinsics.
2957   ///
2958   /// Instrument intrinsics that look like a simple SIMD load: reads memory,
2959   /// has 1 pointer argument, returns a vector.
2960   bool handleVectorLoadIntrinsic(IntrinsicInst &I) {
2961     IRBuilder<> IRB(&I);
2962     Value *Addr = I.getArgOperand(0);
2963 
2964     Type *ShadowTy = getShadowTy(&I);
2965     Value *ShadowPtr = nullptr, *OriginPtr = nullptr;
2966     if (PropagateShadow) {
2967       // We don't know the pointer alignment (could be unaligned SSE load!).
2968       // Have to assume to worst case.
2969       const Align Alignment = Align(1);
2970       std::tie(ShadowPtr, OriginPtr) =
2971           getShadowOriginPtr(Addr, IRB, ShadowTy, Alignment, /*isStore*/ false);
2972       setShadow(&I,
2973                 IRB.CreateAlignedLoad(ShadowTy, ShadowPtr, Alignment, "_msld"));
2974     } else {
2975       setShadow(&I, getCleanShadow(&I));
2976     }
2977 
2978     if (ClCheckAccessAddress)
2979       insertShadowCheck(Addr, &I);
2980 
2981     if (MS.TrackOrigins) {
2982       if (PropagateShadow)
2983         setOrigin(&I, IRB.CreateLoad(MS.OriginTy, OriginPtr));
2984       else
2985         setOrigin(&I, getCleanOrigin());
2986     }
2987     return true;
2988   }
2989 
2990   /// Handle (SIMD arithmetic)-like intrinsics.
2991   ///
2992   /// Instrument intrinsics with any number of arguments of the same type [*],
2993   /// equal to the return type, plus a specified number of trailing flags of
2994   /// any type.
2995   ///
2996   /// [*] The type should be simple (no aggregates or pointers; vectors are
2997   /// fine).
2998   ///
2999   /// Caller guarantees that this intrinsic does not access memory.
3000   [[maybe_unused]] bool
3001   maybeHandleSimpleNomemIntrinsic(IntrinsicInst &I,
3002                                   unsigned int trailingFlags) {
3003     Type *RetTy = I.getType();
3004     if (!(RetTy->isIntOrIntVectorTy() || RetTy->isFPOrFPVectorTy()))
3005       return false;
3006 
3007     unsigned NumArgOperands = I.arg_size();
3008     assert(NumArgOperands >= trailingFlags);
3009     for (unsigned i = 0; i < NumArgOperands - trailingFlags; ++i) {
3010       Type *Ty = I.getArgOperand(i)->getType();
3011       if (Ty != RetTy)
3012         return false;
3013     }
3014 
3015     IRBuilder<> IRB(&I);
3016     ShadowAndOriginCombiner SC(this, IRB);
3017     for (unsigned i = 0; i < NumArgOperands; ++i)
3018       SC.Add(I.getArgOperand(i));
3019     SC.Done(&I);
3020 
3021     return true;
3022   }
3023 
3024   /// Heuristically instrument unknown intrinsics.
3025   ///
3026   /// The main purpose of this code is to do something reasonable with all
3027   /// random intrinsics we might encounter, most importantly - SIMD intrinsics.
3028   /// We recognize several classes of intrinsics by their argument types and
3029   /// ModRefBehaviour and apply special instrumentation when we are reasonably
3030   /// sure that we know what the intrinsic does.
3031   ///
3032   /// We special-case intrinsics where this approach fails. See llvm.bswap
3033   /// handling as an example of that.
3034   bool handleUnknownIntrinsicUnlogged(IntrinsicInst &I) {
3035     unsigned NumArgOperands = I.arg_size();
3036     if (NumArgOperands == 0)
3037       return false;
3038 
3039     if (NumArgOperands == 2 && I.getArgOperand(0)->getType()->isPointerTy() &&
3040         I.getArgOperand(1)->getType()->isVectorTy() &&
3041         I.getType()->isVoidTy() && !I.onlyReadsMemory()) {
3042       // This looks like a vector store.
3043       return handleVectorStoreIntrinsic(I);
3044     }
3045 
3046     if (NumArgOperands == 1 && I.getArgOperand(0)->getType()->isPointerTy() &&
3047         I.getType()->isVectorTy() && I.onlyReadsMemory()) {
3048       // This looks like a vector load.
3049       return handleVectorLoadIntrinsic(I);
3050     }
3051 
3052     if (I.doesNotAccessMemory())
3053       if (maybeHandleSimpleNomemIntrinsic(I, /*trailingFlags=*/0))
3054         return true;
3055 
3056     // FIXME: detect and handle SSE maskstore/maskload?
3057     // Some cases are now handled in handleAVXMasked{Load,Store}.
3058     return false;
3059   }
3060 
3061   bool handleUnknownIntrinsic(IntrinsicInst &I) {
3062     if (handleUnknownIntrinsicUnlogged(I)) {
3063       if (ClDumpStrictIntrinsics)
3064         dumpInst(I);
3065 
3066       LLVM_DEBUG(dbgs() << "UNKNOWN INTRINSIC HANDLED HEURISTICALLY: " << I
3067                         << "\n");
3068       return true;
3069     } else
3070       return false;
3071   }
3072 
3073   void handleInvariantGroup(IntrinsicInst &I) {
3074     setShadow(&I, getShadow(&I, 0));
3075     setOrigin(&I, getOrigin(&I, 0));
3076   }
3077 
3078   void handleLifetimeStart(IntrinsicInst &I) {
3079     if (!PoisonStack)
3080       return;
3081     AllocaInst *AI = llvm::findAllocaForValue(I.getArgOperand(1));
3082     if (!AI)
3083       InstrumentLifetimeStart = false;
3084     LifetimeStartList.push_back(std::make_pair(&I, AI));
3085   }
3086 
3087   void handleBswap(IntrinsicInst &I) {
3088     IRBuilder<> IRB(&I);
3089     Value *Op = I.getArgOperand(0);
3090     Type *OpType = Op->getType();
3091     setShadow(&I, IRB.CreateIntrinsic(Intrinsic::bswap, ArrayRef(&OpType, 1),
3092                                       getShadow(Op)));
3093     setOrigin(&I, getOrigin(Op));
3094   }
3095 
3096   void handleCountZeroes(IntrinsicInst &I) {
3097     IRBuilder<> IRB(&I);
3098     Value *Src = I.getArgOperand(0);
3099 
3100     // Set the Output shadow based on input Shadow
3101     Value *BoolShadow = IRB.CreateIsNotNull(getShadow(Src), "_mscz_bs");
3102 
3103     // If zero poison is requested, mix in with the shadow
3104     Constant *IsZeroPoison = cast<Constant>(I.getOperand(1));
3105     if (!IsZeroPoison->isZeroValue()) {
3106       Value *BoolZeroPoison = IRB.CreateIsNull(Src, "_mscz_bzp");
3107       BoolShadow = IRB.CreateOr(BoolShadow, BoolZeroPoison, "_mscz_bs");
3108     }
3109 
3110     Value *OutputShadow =
3111         IRB.CreateSExt(BoolShadow, getShadowTy(Src), "_mscz_os");
3112 
3113     setShadow(&I, OutputShadow);
3114     setOriginForNaryOp(I);
3115   }
3116 
3117   // Instrument vector convert intrinsic.
3118   //
3119   // This function instruments intrinsics like cvtsi2ss:
3120   // %Out = int_xxx_cvtyyy(%ConvertOp)
3121   // or
3122   // %Out = int_xxx_cvtyyy(%CopyOp, %ConvertOp)
3123   // Intrinsic converts \p NumUsedElements elements of \p ConvertOp to the same
3124   // number \p Out elements, and (if has 2 arguments) copies the rest of the
3125   // elements from \p CopyOp.
3126   // In most cases conversion involves floating-point value which may trigger a
3127   // hardware exception when not fully initialized. For this reason we require
3128   // \p ConvertOp[0:NumUsedElements] to be fully initialized and trap otherwise.
3129   // We copy the shadow of \p CopyOp[NumUsedElements:] to \p
3130   // Out[NumUsedElements:]. This means that intrinsics without \p CopyOp always
3131   // return a fully initialized value.
3132   void handleVectorConvertIntrinsic(IntrinsicInst &I, int NumUsedElements,
3133                                     bool HasRoundingMode = false) {
3134     IRBuilder<> IRB(&I);
3135     Value *CopyOp, *ConvertOp;
3136 
3137     assert((!HasRoundingMode ||
3138             isa<ConstantInt>(I.getArgOperand(I.arg_size() - 1))) &&
3139            "Invalid rounding mode");
3140 
3141     switch (I.arg_size() - HasRoundingMode) {
3142     case 2:
3143       CopyOp = I.getArgOperand(0);
3144       ConvertOp = I.getArgOperand(1);
3145       break;
3146     case 1:
3147       ConvertOp = I.getArgOperand(0);
3148       CopyOp = nullptr;
3149       break;
3150     default:
3151       llvm_unreachable("Cvt intrinsic with unsupported number of arguments.");
3152     }
3153 
3154     // The first *NumUsedElements* elements of ConvertOp are converted to the
3155     // same number of output elements. The rest of the output is copied from
3156     // CopyOp, or (if not available) filled with zeroes.
3157     // Combine shadow for elements of ConvertOp that are used in this operation,
3158     // and insert a check.
3159     // FIXME: consider propagating shadow of ConvertOp, at least in the case of
3160     // int->any conversion.
3161     Value *ConvertShadow = getShadow(ConvertOp);
3162     Value *AggShadow = nullptr;
3163     if (ConvertOp->getType()->isVectorTy()) {
3164       AggShadow = IRB.CreateExtractElement(
3165           ConvertShadow, ConstantInt::get(IRB.getInt32Ty(), 0));
3166       for (int i = 1; i < NumUsedElements; ++i) {
3167         Value *MoreShadow = IRB.CreateExtractElement(
3168             ConvertShadow, ConstantInt::get(IRB.getInt32Ty(), i));
3169         AggShadow = IRB.CreateOr(AggShadow, MoreShadow);
3170       }
3171     } else {
3172       AggShadow = ConvertShadow;
3173     }
3174     assert(AggShadow->getType()->isIntegerTy());
3175     insertShadowCheck(AggShadow, getOrigin(ConvertOp), &I);
3176 
3177     // Build result shadow by zero-filling parts of CopyOp shadow that come from
3178     // ConvertOp.
3179     if (CopyOp) {
3180       assert(CopyOp->getType() == I.getType());
3181       assert(CopyOp->getType()->isVectorTy());
3182       Value *ResultShadow = getShadow(CopyOp);
3183       Type *EltTy = cast<VectorType>(ResultShadow->getType())->getElementType();
3184       for (int i = 0; i < NumUsedElements; ++i) {
3185         ResultShadow = IRB.CreateInsertElement(
3186             ResultShadow, ConstantInt::getNullValue(EltTy),
3187             ConstantInt::get(IRB.getInt32Ty(), i));
3188       }
3189       setShadow(&I, ResultShadow);
3190       setOrigin(&I, getOrigin(CopyOp));
3191     } else {
3192       setShadow(&I, getCleanShadow(&I));
3193       setOrigin(&I, getCleanOrigin());
3194     }
3195   }
3196 
3197   // Given a scalar or vector, extract lower 64 bits (or less), and return all
3198   // zeroes if it is zero, and all ones otherwise.
3199   Value *Lower64ShadowExtend(IRBuilder<> &IRB, Value *S, Type *T) {
3200     if (S->getType()->isVectorTy())
3201       S = CreateShadowCast(IRB, S, IRB.getInt64Ty(), /* Signed */ true);
3202     assert(S->getType()->getPrimitiveSizeInBits() <= 64);
3203     Value *S2 = IRB.CreateICmpNE(S, getCleanShadow(S));
3204     return CreateShadowCast(IRB, S2, T, /* Signed */ true);
3205   }
3206 
3207   // Given a vector, extract its first element, and return all
3208   // zeroes if it is zero, and all ones otherwise.
3209   Value *LowerElementShadowExtend(IRBuilder<> &IRB, Value *S, Type *T) {
3210     Value *S1 = IRB.CreateExtractElement(S, (uint64_t)0);
3211     Value *S2 = IRB.CreateICmpNE(S1, getCleanShadow(S1));
3212     return CreateShadowCast(IRB, S2, T, /* Signed */ true);
3213   }
3214 
3215   Value *VariableShadowExtend(IRBuilder<> &IRB, Value *S) {
3216     Type *T = S->getType();
3217     assert(T->isVectorTy());
3218     Value *S2 = IRB.CreateICmpNE(S, getCleanShadow(S));
3219     return IRB.CreateSExt(S2, T);
3220   }
3221 
3222   // Instrument vector shift intrinsic.
3223   //
3224   // This function instruments intrinsics like int_x86_avx2_psll_w.
3225   // Intrinsic shifts %In by %ShiftSize bits.
3226   // %ShiftSize may be a vector. In that case the lower 64 bits determine shift
3227   // size, and the rest is ignored. Behavior is defined even if shift size is
3228   // greater than register (or field) width.
3229   void handleVectorShiftIntrinsic(IntrinsicInst &I, bool Variable) {
3230     assert(I.arg_size() == 2);
3231     IRBuilder<> IRB(&I);
3232     // If any of the S2 bits are poisoned, the whole thing is poisoned.
3233     // Otherwise perform the same shift on S1.
3234     Value *S1 = getShadow(&I, 0);
3235     Value *S2 = getShadow(&I, 1);
3236     Value *S2Conv = Variable ? VariableShadowExtend(IRB, S2)
3237                              : Lower64ShadowExtend(IRB, S2, getShadowTy(&I));
3238     Value *V1 = I.getOperand(0);
3239     Value *V2 = I.getOperand(1);
3240     Value *Shift = IRB.CreateCall(I.getFunctionType(), I.getCalledOperand(),
3241                                   {IRB.CreateBitCast(S1, V1->getType()), V2});
3242     Shift = IRB.CreateBitCast(Shift, getShadowTy(&I));
3243     setShadow(&I, IRB.CreateOr(Shift, S2Conv));
3244     setOriginForNaryOp(I);
3245   }
3246 
3247   // Get an MMX-sized vector type.
3248   Type *getMMXVectorTy(unsigned EltSizeInBits) {
3249     const unsigned X86_MMXSizeInBits = 64;
3250     assert(EltSizeInBits != 0 && (X86_MMXSizeInBits % EltSizeInBits) == 0 &&
3251            "Illegal MMX vector element size");
3252     return FixedVectorType::get(IntegerType::get(*MS.C, EltSizeInBits),
3253                                 X86_MMXSizeInBits / EltSizeInBits);
3254   }
3255 
3256   // Returns a signed counterpart for an (un)signed-saturate-and-pack
3257   // intrinsic.
3258   Intrinsic::ID getSignedPackIntrinsic(Intrinsic::ID id) {
3259     switch (id) {
3260     case Intrinsic::x86_sse2_packsswb_128:
3261     case Intrinsic::x86_sse2_packuswb_128:
3262       return Intrinsic::x86_sse2_packsswb_128;
3263 
3264     case Intrinsic::x86_sse2_packssdw_128:
3265     case Intrinsic::x86_sse41_packusdw:
3266       return Intrinsic::x86_sse2_packssdw_128;
3267 
3268     case Intrinsic::x86_avx2_packsswb:
3269     case Intrinsic::x86_avx2_packuswb:
3270       return Intrinsic::x86_avx2_packsswb;
3271 
3272     case Intrinsic::x86_avx2_packssdw:
3273     case Intrinsic::x86_avx2_packusdw:
3274       return Intrinsic::x86_avx2_packssdw;
3275 
3276     case Intrinsic::x86_mmx_packsswb:
3277     case Intrinsic::x86_mmx_packuswb:
3278       return Intrinsic::x86_mmx_packsswb;
3279 
3280     case Intrinsic::x86_mmx_packssdw:
3281       return Intrinsic::x86_mmx_packssdw;
3282     default:
3283       llvm_unreachable("unexpected intrinsic id");
3284     }
3285   }
3286 
3287   // Instrument vector pack intrinsic.
3288   //
3289   // This function instruments intrinsics like x86_mmx_packsswb, that
3290   // packs elements of 2 input vectors into half as many bits with saturation.
3291   // Shadow is propagated with the signed variant of the same intrinsic applied
3292   // to sext(Sa != zeroinitializer), sext(Sb != zeroinitializer).
3293   // MMXEltSizeInBits is used only for x86mmx arguments.
3294   void handleVectorPackIntrinsic(IntrinsicInst &I,
3295                                  unsigned MMXEltSizeInBits = 0) {
3296     assert(I.arg_size() == 2);
3297     IRBuilder<> IRB(&I);
3298     Value *S1 = getShadow(&I, 0);
3299     Value *S2 = getShadow(&I, 1);
3300     assert(S1->getType()->isVectorTy());
3301 
3302     // SExt and ICmpNE below must apply to individual elements of input vectors.
3303     // In case of x86mmx arguments, cast them to appropriate vector types and
3304     // back.
3305     Type *T =
3306         MMXEltSizeInBits ? getMMXVectorTy(MMXEltSizeInBits) : S1->getType();
3307     if (MMXEltSizeInBits) {
3308       S1 = IRB.CreateBitCast(S1, T);
3309       S2 = IRB.CreateBitCast(S2, T);
3310     }
3311     Value *S1_ext =
3312         IRB.CreateSExt(IRB.CreateICmpNE(S1, Constant::getNullValue(T)), T);
3313     Value *S2_ext =
3314         IRB.CreateSExt(IRB.CreateICmpNE(S2, Constant::getNullValue(T)), T);
3315     if (MMXEltSizeInBits) {
3316       S1_ext = IRB.CreateBitCast(S1_ext, getMMXVectorTy(64));
3317       S2_ext = IRB.CreateBitCast(S2_ext, getMMXVectorTy(64));
3318     }
3319 
3320     Value *S = IRB.CreateIntrinsic(getSignedPackIntrinsic(I.getIntrinsicID()),
3321                                    {}, {S1_ext, S2_ext}, /*FMFSource=*/nullptr,
3322                                    "_msprop_vector_pack");
3323     if (MMXEltSizeInBits)
3324       S = IRB.CreateBitCast(S, getShadowTy(&I));
3325     setShadow(&I, S);
3326     setOriginForNaryOp(I);
3327   }
3328 
3329   // Convert `Mask` into `<n x i1>`.
3330   Constant *createDppMask(unsigned Width, unsigned Mask) {
3331     SmallVector<Constant *, 4> R(Width);
3332     for (auto &M : R) {
3333       M = ConstantInt::getBool(F.getContext(), Mask & 1);
3334       Mask >>= 1;
3335     }
3336     return ConstantVector::get(R);
3337   }
3338 
3339   // Calculate output shadow as array of booleans `<n x i1>`, assuming if any
3340   // arg is poisoned, entire dot product is poisoned.
3341   Value *findDppPoisonedOutput(IRBuilder<> &IRB, Value *S, unsigned SrcMask,
3342                                unsigned DstMask) {
3343     const unsigned Width =
3344         cast<FixedVectorType>(S->getType())->getNumElements();
3345 
3346     S = IRB.CreateSelect(createDppMask(Width, SrcMask), S,
3347                          Constant::getNullValue(S->getType()));
3348     Value *SElem = IRB.CreateOrReduce(S);
3349     Value *IsClean = IRB.CreateIsNull(SElem, "_msdpp");
3350     Value *DstMaskV = createDppMask(Width, DstMask);
3351 
3352     return IRB.CreateSelect(
3353         IsClean, Constant::getNullValue(DstMaskV->getType()), DstMaskV);
3354   }
3355 
3356   // See `Intel Intrinsics Guide` for `_dp_p*` instructions.
3357   //
3358   // 2 and 4 element versions produce single scalar of dot product, and then
3359   // puts it into elements of output vector, selected by 4 lowest bits of the
3360   // mask. Top 4 bits of the mask control which elements of input to use for dot
3361   // product.
3362   //
3363   // 8 element version mask still has only 4 bit for input, and 4 bit for output
3364   // mask. According to the spec it just operates as 4 element version on first
3365   // 4 elements of inputs and output, and then on last 4 elements of inputs and
3366   // output.
3367   void handleDppIntrinsic(IntrinsicInst &I) {
3368     IRBuilder<> IRB(&I);
3369 
3370     Value *S0 = getShadow(&I, 0);
3371     Value *S1 = getShadow(&I, 1);
3372     Value *S = IRB.CreateOr(S0, S1);
3373 
3374     const unsigned Width =
3375         cast<FixedVectorType>(S->getType())->getNumElements();
3376     assert(Width == 2 || Width == 4 || Width == 8);
3377 
3378     const unsigned Mask = cast<ConstantInt>(I.getArgOperand(2))->getZExtValue();
3379     const unsigned SrcMask = Mask >> 4;
3380     const unsigned DstMask = Mask & 0xf;
3381 
3382     // Calculate shadow as `<n x i1>`.
3383     Value *SI1 = findDppPoisonedOutput(IRB, S, SrcMask, DstMask);
3384     if (Width == 8) {
3385       // First 4 elements of shadow are already calculated. `makeDppShadow`
3386       // operats on 32 bit masks, so we can just shift masks, and repeat.
3387       SI1 = IRB.CreateOr(
3388           SI1, findDppPoisonedOutput(IRB, S, SrcMask << 4, DstMask << 4));
3389     }
3390     // Extend to real size of shadow, poisoning either all or none bits of an
3391     // element.
3392     S = IRB.CreateSExt(SI1, S->getType(), "_msdpp");
3393 
3394     setShadow(&I, S);
3395     setOriginForNaryOp(I);
3396   }
3397 
3398   Value *convertBlendvToSelectMask(IRBuilder<> &IRB, Value *C) {
3399     C = CreateAppToShadowCast(IRB, C);
3400     FixedVectorType *FVT = cast<FixedVectorType>(C->getType());
3401     unsigned ElSize = FVT->getElementType()->getPrimitiveSizeInBits();
3402     C = IRB.CreateAShr(C, ElSize - 1);
3403     FVT = FixedVectorType::get(IRB.getInt1Ty(), FVT->getNumElements());
3404     return IRB.CreateTrunc(C, FVT);
3405   }
3406 
3407   // `blendv(f, t, c)` is effectively `select(c[top_bit], t, f)`.
3408   void handleBlendvIntrinsic(IntrinsicInst &I) {
3409     Value *C = I.getOperand(2);
3410     Value *T = I.getOperand(1);
3411     Value *F = I.getOperand(0);
3412 
3413     Value *Sc = getShadow(&I, 2);
3414     Value *Oc = MS.TrackOrigins ? getOrigin(C) : nullptr;
3415 
3416     {
3417       IRBuilder<> IRB(&I);
3418       // Extract top bit from condition and its shadow.
3419       C = convertBlendvToSelectMask(IRB, C);
3420       Sc = convertBlendvToSelectMask(IRB, Sc);
3421 
3422       setShadow(C, Sc);
3423       setOrigin(C, Oc);
3424     }
3425 
3426     handleSelectLikeInst(I, C, T, F);
3427   }
3428 
3429   // Instrument sum-of-absolute-differences intrinsic.
3430   void handleVectorSadIntrinsic(IntrinsicInst &I, bool IsMMX = false) {
3431     const unsigned SignificantBitsPerResultElement = 16;
3432     Type *ResTy = IsMMX ? IntegerType::get(*MS.C, 64) : I.getType();
3433     unsigned ZeroBitsPerResultElement =
3434         ResTy->getScalarSizeInBits() - SignificantBitsPerResultElement;
3435 
3436     IRBuilder<> IRB(&I);
3437     auto *Shadow0 = getShadow(&I, 0);
3438     auto *Shadow1 = getShadow(&I, 1);
3439     Value *S = IRB.CreateOr(Shadow0, Shadow1);
3440     S = IRB.CreateBitCast(S, ResTy);
3441     S = IRB.CreateSExt(IRB.CreateICmpNE(S, Constant::getNullValue(ResTy)),
3442                        ResTy);
3443     S = IRB.CreateLShr(S, ZeroBitsPerResultElement);
3444     S = IRB.CreateBitCast(S, getShadowTy(&I));
3445     setShadow(&I, S);
3446     setOriginForNaryOp(I);
3447   }
3448 
3449   // Instrument multiply-add intrinsic.
3450   void handleVectorPmaddIntrinsic(IntrinsicInst &I,
3451                                   unsigned MMXEltSizeInBits = 0) {
3452     Type *ResTy =
3453         MMXEltSizeInBits ? getMMXVectorTy(MMXEltSizeInBits * 2) : I.getType();
3454     IRBuilder<> IRB(&I);
3455     auto *Shadow0 = getShadow(&I, 0);
3456     auto *Shadow1 = getShadow(&I, 1);
3457     Value *S = IRB.CreateOr(Shadow0, Shadow1);
3458     S = IRB.CreateBitCast(S, ResTy);
3459     S = IRB.CreateSExt(IRB.CreateICmpNE(S, Constant::getNullValue(ResTy)),
3460                        ResTy);
3461     S = IRB.CreateBitCast(S, getShadowTy(&I));
3462     setShadow(&I, S);
3463     setOriginForNaryOp(I);
3464   }
3465 
3466   // Instrument compare-packed intrinsic.
3467   // Basically, an or followed by sext(icmp ne 0) to end up with all-zeros or
3468   // all-ones shadow.
3469   void handleVectorComparePackedIntrinsic(IntrinsicInst &I) {
3470     IRBuilder<> IRB(&I);
3471     Type *ResTy = getShadowTy(&I);
3472     auto *Shadow0 = getShadow(&I, 0);
3473     auto *Shadow1 = getShadow(&I, 1);
3474     Value *S0 = IRB.CreateOr(Shadow0, Shadow1);
3475     Value *S = IRB.CreateSExt(
3476         IRB.CreateICmpNE(S0, Constant::getNullValue(ResTy)), ResTy);
3477     setShadow(&I, S);
3478     setOriginForNaryOp(I);
3479   }
3480 
3481   // Instrument compare-scalar intrinsic.
3482   // This handles both cmp* intrinsics which return the result in the first
3483   // element of a vector, and comi* which return the result as i32.
3484   void handleVectorCompareScalarIntrinsic(IntrinsicInst &I) {
3485     IRBuilder<> IRB(&I);
3486     auto *Shadow0 = getShadow(&I, 0);
3487     auto *Shadow1 = getShadow(&I, 1);
3488     Value *S0 = IRB.CreateOr(Shadow0, Shadow1);
3489     Value *S = LowerElementShadowExtend(IRB, S0, getShadowTy(&I));
3490     setShadow(&I, S);
3491     setOriginForNaryOp(I);
3492   }
3493 
3494   // Instrument generic vector reduction intrinsics
3495   // by ORing together all their fields.
3496   void handleVectorReduceIntrinsic(IntrinsicInst &I) {
3497     IRBuilder<> IRB(&I);
3498     Value *S = IRB.CreateOrReduce(getShadow(&I, 0));
3499     setShadow(&I, S);
3500     setOrigin(&I, getOrigin(&I, 0));
3501   }
3502 
3503   // Instrument vector.reduce.or intrinsic.
3504   // Valid (non-poisoned) set bits in the operand pull low the
3505   // corresponding shadow bits.
3506   void handleVectorReduceOrIntrinsic(IntrinsicInst &I) {
3507     IRBuilder<> IRB(&I);
3508     Value *OperandShadow = getShadow(&I, 0);
3509     Value *OperandUnsetBits = IRB.CreateNot(I.getOperand(0));
3510     Value *OperandUnsetOrPoison = IRB.CreateOr(OperandUnsetBits, OperandShadow);
3511     // Bit N is clean if any field's bit N is 1 and unpoison
3512     Value *OutShadowMask = IRB.CreateAndReduce(OperandUnsetOrPoison);
3513     // Otherwise, it is clean if every field's bit N is unpoison
3514     Value *OrShadow = IRB.CreateOrReduce(OperandShadow);
3515     Value *S = IRB.CreateAnd(OutShadowMask, OrShadow);
3516 
3517     setShadow(&I, S);
3518     setOrigin(&I, getOrigin(&I, 0));
3519   }
3520 
3521   // Instrument vector.reduce.and intrinsic.
3522   // Valid (non-poisoned) unset bits in the operand pull down the
3523   // corresponding shadow bits.
3524   void handleVectorReduceAndIntrinsic(IntrinsicInst &I) {
3525     IRBuilder<> IRB(&I);
3526     Value *OperandShadow = getShadow(&I, 0);
3527     Value *OperandSetOrPoison = IRB.CreateOr(I.getOperand(0), OperandShadow);
3528     // Bit N is clean if any field's bit N is 0 and unpoison
3529     Value *OutShadowMask = IRB.CreateAndReduce(OperandSetOrPoison);
3530     // Otherwise, it is clean if every field's bit N is unpoison
3531     Value *OrShadow = IRB.CreateOrReduce(OperandShadow);
3532     Value *S = IRB.CreateAnd(OutShadowMask, OrShadow);
3533 
3534     setShadow(&I, S);
3535     setOrigin(&I, getOrigin(&I, 0));
3536   }
3537 
3538   void handleStmxcsr(IntrinsicInst &I) {
3539     IRBuilder<> IRB(&I);
3540     Value *Addr = I.getArgOperand(0);
3541     Type *Ty = IRB.getInt32Ty();
3542     Value *ShadowPtr =
3543         getShadowOriginPtr(Addr, IRB, Ty, Align(1), /*isStore*/ true).first;
3544 
3545     IRB.CreateStore(getCleanShadow(Ty), ShadowPtr);
3546 
3547     if (ClCheckAccessAddress)
3548       insertShadowCheck(Addr, &I);
3549   }
3550 
3551   void handleLdmxcsr(IntrinsicInst &I) {
3552     if (!InsertChecks)
3553       return;
3554 
3555     IRBuilder<> IRB(&I);
3556     Value *Addr = I.getArgOperand(0);
3557     Type *Ty = IRB.getInt32Ty();
3558     const Align Alignment = Align(1);
3559     Value *ShadowPtr, *OriginPtr;
3560     std::tie(ShadowPtr, OriginPtr) =
3561         getShadowOriginPtr(Addr, IRB, Ty, Alignment, /*isStore*/ false);
3562 
3563     if (ClCheckAccessAddress)
3564       insertShadowCheck(Addr, &I);
3565 
3566     Value *Shadow = IRB.CreateAlignedLoad(Ty, ShadowPtr, Alignment, "_ldmxcsr");
3567     Value *Origin = MS.TrackOrigins ? IRB.CreateLoad(MS.OriginTy, OriginPtr)
3568                                     : getCleanOrigin();
3569     insertShadowCheck(Shadow, Origin, &I);
3570   }
3571 
3572   void handleMaskedExpandLoad(IntrinsicInst &I) {
3573     IRBuilder<> IRB(&I);
3574     Value *Ptr = I.getArgOperand(0);
3575     MaybeAlign Align = I.getParamAlign(0);
3576     Value *Mask = I.getArgOperand(1);
3577     Value *PassThru = I.getArgOperand(2);
3578 
3579     if (ClCheckAccessAddress) {
3580       insertShadowCheck(Ptr, &I);
3581       insertShadowCheck(Mask, &I);
3582     }
3583 
3584     if (!PropagateShadow) {
3585       setShadow(&I, getCleanShadow(&I));
3586       setOrigin(&I, getCleanOrigin());
3587       return;
3588     }
3589 
3590     Type *ShadowTy = getShadowTy(&I);
3591     Type *ElementShadowTy = cast<VectorType>(ShadowTy)->getElementType();
3592     auto [ShadowPtr, OriginPtr] =
3593         getShadowOriginPtr(Ptr, IRB, ElementShadowTy, Align, /*isStore*/ false);
3594 
3595     Value *Shadow =
3596         IRB.CreateMaskedExpandLoad(ShadowTy, ShadowPtr, Align, Mask,
3597                                    getShadow(PassThru), "_msmaskedexpload");
3598 
3599     setShadow(&I, Shadow);
3600 
3601     // TODO: Store origins.
3602     setOrigin(&I, getCleanOrigin());
3603   }
3604 
3605   void handleMaskedCompressStore(IntrinsicInst &I) {
3606     IRBuilder<> IRB(&I);
3607     Value *Values = I.getArgOperand(0);
3608     Value *Ptr = I.getArgOperand(1);
3609     MaybeAlign Align = I.getParamAlign(1);
3610     Value *Mask = I.getArgOperand(2);
3611 
3612     if (ClCheckAccessAddress) {
3613       insertShadowCheck(Ptr, &I);
3614       insertShadowCheck(Mask, &I);
3615     }
3616 
3617     Value *Shadow = getShadow(Values);
3618     Type *ElementShadowTy =
3619         getShadowTy(cast<VectorType>(Values->getType())->getElementType());
3620     auto [ShadowPtr, OriginPtrs] =
3621         getShadowOriginPtr(Ptr, IRB, ElementShadowTy, Align, /*isStore*/ true);
3622 
3623     IRB.CreateMaskedCompressStore(Shadow, ShadowPtr, Align, Mask);
3624 
3625     // TODO: Store origins.
3626   }
3627 
3628   void handleMaskedGather(IntrinsicInst &I) {
3629     IRBuilder<> IRB(&I);
3630     Value *Ptrs = I.getArgOperand(0);
3631     const Align Alignment(
3632         cast<ConstantInt>(I.getArgOperand(1))->getZExtValue());
3633     Value *Mask = I.getArgOperand(2);
3634     Value *PassThru = I.getArgOperand(3);
3635 
3636     Type *PtrsShadowTy = getShadowTy(Ptrs);
3637     if (ClCheckAccessAddress) {
3638       insertShadowCheck(Mask, &I);
3639       Value *MaskedPtrShadow = IRB.CreateSelect(
3640           Mask, getShadow(Ptrs), Constant::getNullValue((PtrsShadowTy)),
3641           "_msmaskedptrs");
3642       insertShadowCheck(MaskedPtrShadow, getOrigin(Ptrs), &I);
3643     }
3644 
3645     if (!PropagateShadow) {
3646       setShadow(&I, getCleanShadow(&I));
3647       setOrigin(&I, getCleanOrigin());
3648       return;
3649     }
3650 
3651     Type *ShadowTy = getShadowTy(&I);
3652     Type *ElementShadowTy = cast<VectorType>(ShadowTy)->getElementType();
3653     auto [ShadowPtrs, OriginPtrs] = getShadowOriginPtr(
3654         Ptrs, IRB, ElementShadowTy, Alignment, /*isStore*/ false);
3655 
3656     Value *Shadow =
3657         IRB.CreateMaskedGather(ShadowTy, ShadowPtrs, Alignment, Mask,
3658                                getShadow(PassThru), "_msmaskedgather");
3659 
3660     setShadow(&I, Shadow);
3661 
3662     // TODO: Store origins.
3663     setOrigin(&I, getCleanOrigin());
3664   }
3665 
3666   void handleMaskedScatter(IntrinsicInst &I) {
3667     IRBuilder<> IRB(&I);
3668     Value *Values = I.getArgOperand(0);
3669     Value *Ptrs = I.getArgOperand(1);
3670     const Align Alignment(
3671         cast<ConstantInt>(I.getArgOperand(2))->getZExtValue());
3672     Value *Mask = I.getArgOperand(3);
3673 
3674     Type *PtrsShadowTy = getShadowTy(Ptrs);
3675     if (ClCheckAccessAddress) {
3676       insertShadowCheck(Mask, &I);
3677       Value *MaskedPtrShadow = IRB.CreateSelect(
3678           Mask, getShadow(Ptrs), Constant::getNullValue((PtrsShadowTy)),
3679           "_msmaskedptrs");
3680       insertShadowCheck(MaskedPtrShadow, getOrigin(Ptrs), &I);
3681     }
3682 
3683     Value *Shadow = getShadow(Values);
3684     Type *ElementShadowTy =
3685         getShadowTy(cast<VectorType>(Values->getType())->getElementType());
3686     auto [ShadowPtrs, OriginPtrs] = getShadowOriginPtr(
3687         Ptrs, IRB, ElementShadowTy, Alignment, /*isStore*/ true);
3688 
3689     IRB.CreateMaskedScatter(Shadow, ShadowPtrs, Alignment, Mask);
3690 
3691     // TODO: Store origin.
3692   }
3693 
3694   // Intrinsic::masked_store
3695   //
3696   // Note: handleAVXMaskedStore handles AVX/AVX2 variants, though AVX512 masked
3697   //       stores are lowered to Intrinsic::masked_store.
3698   void handleMaskedStore(IntrinsicInst &I) {
3699     IRBuilder<> IRB(&I);
3700     Value *V = I.getArgOperand(0);
3701     Value *Ptr = I.getArgOperand(1);
3702     const Align Alignment(
3703         cast<ConstantInt>(I.getArgOperand(2))->getZExtValue());
3704     Value *Mask = I.getArgOperand(3);
3705     Value *Shadow = getShadow(V);
3706 
3707     if (ClCheckAccessAddress) {
3708       insertShadowCheck(Ptr, &I);
3709       insertShadowCheck(Mask, &I);
3710     }
3711 
3712     Value *ShadowPtr;
3713     Value *OriginPtr;
3714     std::tie(ShadowPtr, OriginPtr) = getShadowOriginPtr(
3715         Ptr, IRB, Shadow->getType(), Alignment, /*isStore*/ true);
3716 
3717     IRB.CreateMaskedStore(Shadow, ShadowPtr, Alignment, Mask);
3718 
3719     if (!MS.TrackOrigins)
3720       return;
3721 
3722     auto &DL = F.getDataLayout();
3723     paintOrigin(IRB, getOrigin(V), OriginPtr,
3724                 DL.getTypeStoreSize(Shadow->getType()),
3725                 std::max(Alignment, kMinOriginAlignment));
3726   }
3727 
3728   // Intrinsic::masked_load
3729   //
3730   // Note: handleAVXMaskedLoad handles AVX/AVX2 variants, though AVX512 masked
3731   //       loads are lowered to Intrinsic::masked_load.
3732   void handleMaskedLoad(IntrinsicInst &I) {
3733     IRBuilder<> IRB(&I);
3734     Value *Ptr = I.getArgOperand(0);
3735     const Align Alignment(
3736         cast<ConstantInt>(I.getArgOperand(1))->getZExtValue());
3737     Value *Mask = I.getArgOperand(2);
3738     Value *PassThru = I.getArgOperand(3);
3739 
3740     if (ClCheckAccessAddress) {
3741       insertShadowCheck(Ptr, &I);
3742       insertShadowCheck(Mask, &I);
3743     }
3744 
3745     if (!PropagateShadow) {
3746       setShadow(&I, getCleanShadow(&I));
3747       setOrigin(&I, getCleanOrigin());
3748       return;
3749     }
3750 
3751     Type *ShadowTy = getShadowTy(&I);
3752     Value *ShadowPtr, *OriginPtr;
3753     std::tie(ShadowPtr, OriginPtr) =
3754         getShadowOriginPtr(Ptr, IRB, ShadowTy, Alignment, /*isStore*/ false);
3755     setShadow(&I, IRB.CreateMaskedLoad(ShadowTy, ShadowPtr, Alignment, Mask,
3756                                        getShadow(PassThru), "_msmaskedld"));
3757 
3758     if (!MS.TrackOrigins)
3759       return;
3760 
3761     // Choose between PassThru's and the loaded value's origins.
3762     Value *MaskedPassThruShadow = IRB.CreateAnd(
3763         getShadow(PassThru), IRB.CreateSExt(IRB.CreateNeg(Mask), ShadowTy));
3764 
3765     Value *NotNull = convertToBool(MaskedPassThruShadow, IRB, "_mscmp");
3766 
3767     Value *PtrOrigin = IRB.CreateLoad(MS.OriginTy, OriginPtr);
3768     Value *Origin = IRB.CreateSelect(NotNull, getOrigin(PassThru), PtrOrigin);
3769 
3770     setOrigin(&I, Origin);
3771   }
3772 
3773   // e.g., void @llvm.x86.avx.maskstore.ps.256(ptr, <8 x i32>, <8 x float>)
3774   //                                           dst  mask       src
3775   //
3776   // AVX512 masked stores are lowered to Intrinsic::masked_load and are handled
3777   // by handleMaskedStore.
3778   //
3779   // This function handles AVX and AVX2 masked stores; these use the MSBs of a
3780   // vector of integers, unlike the LLVM masked intrinsics, which require a
3781   // vector of booleans. X86InstCombineIntrinsic.cpp::simplifyX86MaskedLoad
3782   // mentions that the x86 backend does not know how to efficiently convert
3783   // from a vector of booleans back into the AVX mask format; therefore, they
3784   // (and we) do not reduce AVX/AVX2 masked intrinsics into LLVM masked
3785   // intrinsics.
3786   void handleAVXMaskedStore(IntrinsicInst &I) {
3787     IRBuilder<> IRB(&I);
3788 
3789     Value *Dst = I.getArgOperand(0);
3790     assert(Dst->getType()->isPointerTy() && "Destination is not a pointer!");
3791 
3792     Value *Mask = I.getArgOperand(1);
3793     assert(isa<VectorType>(Mask->getType()) && "Mask is not a vector!");
3794 
3795     Value *Src = I.getArgOperand(2);
3796     assert(isa<VectorType>(Src->getType()) && "Source is not a vector!");
3797 
3798     const Align Alignment = Align(1);
3799 
3800     Value *SrcShadow = getShadow(Src);
3801 
3802     if (ClCheckAccessAddress) {
3803       insertShadowCheck(Dst, &I);
3804       insertShadowCheck(Mask, &I);
3805     }
3806 
3807     Value *DstShadowPtr;
3808     Value *DstOriginPtr;
3809     std::tie(DstShadowPtr, DstOriginPtr) = getShadowOriginPtr(
3810         Dst, IRB, SrcShadow->getType(), Alignment, /*isStore*/ true);
3811 
3812     SmallVector<Value *, 2> ShadowArgs;
3813     ShadowArgs.append(1, DstShadowPtr);
3814     ShadowArgs.append(1, Mask);
3815     // The intrinsic may require floating-point but shadows can be arbitrary
3816     // bit patterns, of which some would be interpreted as "invalid"
3817     // floating-point values (NaN etc.); we assume the intrinsic will happily
3818     // copy them.
3819     ShadowArgs.append(1, IRB.CreateBitCast(SrcShadow, Src->getType()));
3820 
3821     CallInst *CI =
3822         IRB.CreateIntrinsic(IRB.getVoidTy(), I.getIntrinsicID(), ShadowArgs);
3823     setShadow(&I, CI);
3824 
3825     if (!MS.TrackOrigins)
3826       return;
3827 
3828     // Approximation only
3829     auto &DL = F.getDataLayout();
3830     paintOrigin(IRB, getOrigin(Src), DstOriginPtr,
3831                 DL.getTypeStoreSize(SrcShadow->getType()),
3832                 std::max(Alignment, kMinOriginAlignment));
3833   }
3834 
3835   // e.g., <8 x float> @llvm.x86.avx.maskload.ps.256(ptr, <8 x i32>)
3836   //       return                                    src  mask
3837   //
3838   // Masked-off values are replaced with 0, which conveniently also represents
3839   // initialized memory.
3840   //
3841   // AVX512 masked stores are lowered to Intrinsic::masked_load and are handled
3842   // by handleMaskedStore.
3843   //
3844   // We do not combine this with handleMaskedLoad; see comment in
3845   // handleAVXMaskedStore for the rationale.
3846   //
3847   // This is subtly different than handleIntrinsicByApplyingToShadow(I, 1)
3848   // because we need to apply getShadowOriginPtr, not getShadow, to the first
3849   // parameter.
3850   void handleAVXMaskedLoad(IntrinsicInst &I) {
3851     IRBuilder<> IRB(&I);
3852 
3853     Value *Src = I.getArgOperand(0);
3854     assert(Src->getType()->isPointerTy() && "Source is not a pointer!");
3855 
3856     Value *Mask = I.getArgOperand(1);
3857     assert(isa<VectorType>(Mask->getType()) && "Mask is not a vector!");
3858 
3859     const Align Alignment = Align(1);
3860 
3861     if (ClCheckAccessAddress) {
3862       insertShadowCheck(Mask, &I);
3863     }
3864 
3865     Type *SrcShadowTy = getShadowTy(Src);
3866     Value *SrcShadowPtr, *SrcOriginPtr;
3867     std::tie(SrcShadowPtr, SrcOriginPtr) =
3868         getShadowOriginPtr(Src, IRB, SrcShadowTy, Alignment, /*isStore*/ false);
3869 
3870     SmallVector<Value *, 2> ShadowArgs;
3871     ShadowArgs.append(1, SrcShadowPtr);
3872     ShadowArgs.append(1, Mask);
3873 
3874     CallInst *CI =
3875         IRB.CreateIntrinsic(I.getType(), I.getIntrinsicID(), ShadowArgs);
3876     // The intrinsic may require floating-point but shadows can be arbitrary
3877     // bit patterns, of which some would be interpreted as "invalid"
3878     // floating-point values (NaN etc.); we assume the intrinsic will happily
3879     // copy them.
3880     setShadow(&I, IRB.CreateBitCast(CI, getShadowTy(&I)));
3881 
3882     if (!MS.TrackOrigins)
3883       return;
3884 
3885     // The "pass-through" value is always zero (initialized). To the extent
3886     // that that results in initialized aligned 4-byte chunks, the origin value
3887     // is ignored. It is therefore correct to simply copy the origin from src.
3888     Value *PtrSrcOrigin = IRB.CreateLoad(MS.OriginTy, SrcOriginPtr);
3889     setOrigin(&I, PtrSrcOrigin);
3890   }
3891 
3892   // Instrument BMI / BMI2 intrinsics.
3893   // All of these intrinsics are Z = I(X, Y)
3894   // where the types of all operands and the result match, and are either i32 or
3895   // i64. The following instrumentation happens to work for all of them:
3896   //   Sz = I(Sx, Y) | (sext (Sy != 0))
3897   void handleBmiIntrinsic(IntrinsicInst &I) {
3898     IRBuilder<> IRB(&I);
3899     Type *ShadowTy = getShadowTy(&I);
3900 
3901     // If any bit of the mask operand is poisoned, then the whole thing is.
3902     Value *SMask = getShadow(&I, 1);
3903     SMask = IRB.CreateSExt(IRB.CreateICmpNE(SMask, getCleanShadow(ShadowTy)),
3904                            ShadowTy);
3905     // Apply the same intrinsic to the shadow of the first operand.
3906     Value *S = IRB.CreateCall(I.getCalledFunction(),
3907                               {getShadow(&I, 0), I.getOperand(1)});
3908     S = IRB.CreateOr(SMask, S);
3909     setShadow(&I, S);
3910     setOriginForNaryOp(I);
3911   }
3912 
3913   static SmallVector<int, 8> getPclmulMask(unsigned Width, bool OddElements) {
3914     SmallVector<int, 8> Mask;
3915     for (unsigned X = OddElements ? 1 : 0; X < Width; X += 2) {
3916       Mask.append(2, X);
3917     }
3918     return Mask;
3919   }
3920 
3921   // Instrument pclmul intrinsics.
3922   // These intrinsics operate either on odd or on even elements of the input
3923   // vectors, depending on the constant in the 3rd argument, ignoring the rest.
3924   // Replace the unused elements with copies of the used ones, ex:
3925   //   (0, 1, 2, 3) -> (0, 0, 2, 2) (even case)
3926   // or
3927   //   (0, 1, 2, 3) -> (1, 1, 3, 3) (odd case)
3928   // and then apply the usual shadow combining logic.
3929   void handlePclmulIntrinsic(IntrinsicInst &I) {
3930     IRBuilder<> IRB(&I);
3931     unsigned Width =
3932         cast<FixedVectorType>(I.getArgOperand(0)->getType())->getNumElements();
3933     assert(isa<ConstantInt>(I.getArgOperand(2)) &&
3934            "pclmul 3rd operand must be a constant");
3935     unsigned Imm = cast<ConstantInt>(I.getArgOperand(2))->getZExtValue();
3936     Value *Shuf0 = IRB.CreateShuffleVector(getShadow(&I, 0),
3937                                            getPclmulMask(Width, Imm & 0x01));
3938     Value *Shuf1 = IRB.CreateShuffleVector(getShadow(&I, 1),
3939                                            getPclmulMask(Width, Imm & 0x10));
3940     ShadowAndOriginCombiner SOC(this, IRB);
3941     SOC.Add(Shuf0, getOrigin(&I, 0));
3942     SOC.Add(Shuf1, getOrigin(&I, 1));
3943     SOC.Done(&I);
3944   }
3945 
3946   // Instrument _mm_*_sd|ss intrinsics
3947   void handleUnarySdSsIntrinsic(IntrinsicInst &I) {
3948     IRBuilder<> IRB(&I);
3949     unsigned Width =
3950         cast<FixedVectorType>(I.getArgOperand(0)->getType())->getNumElements();
3951     Value *First = getShadow(&I, 0);
3952     Value *Second = getShadow(&I, 1);
3953     // First element of second operand, remaining elements of first operand
3954     SmallVector<int, 16> Mask;
3955     Mask.push_back(Width);
3956     for (unsigned i = 1; i < Width; i++)
3957       Mask.push_back(i);
3958     Value *Shadow = IRB.CreateShuffleVector(First, Second, Mask);
3959 
3960     setShadow(&I, Shadow);
3961     setOriginForNaryOp(I);
3962   }
3963 
3964   void handleVtestIntrinsic(IntrinsicInst &I) {
3965     IRBuilder<> IRB(&I);
3966     Value *Shadow0 = getShadow(&I, 0);
3967     Value *Shadow1 = getShadow(&I, 1);
3968     Value *Or = IRB.CreateOr(Shadow0, Shadow1);
3969     Value *NZ = IRB.CreateICmpNE(Or, Constant::getNullValue(Or->getType()));
3970     Value *Scalar = convertShadowToScalar(NZ, IRB);
3971     Value *Shadow = IRB.CreateZExt(Scalar, getShadowTy(&I));
3972 
3973     setShadow(&I, Shadow);
3974     setOriginForNaryOp(I);
3975   }
3976 
3977   void handleBinarySdSsIntrinsic(IntrinsicInst &I) {
3978     IRBuilder<> IRB(&I);
3979     unsigned Width =
3980         cast<FixedVectorType>(I.getArgOperand(0)->getType())->getNumElements();
3981     Value *First = getShadow(&I, 0);
3982     Value *Second = getShadow(&I, 1);
3983     Value *OrShadow = IRB.CreateOr(First, Second);
3984     // First element of both OR'd together, remaining elements of first operand
3985     SmallVector<int, 16> Mask;
3986     Mask.push_back(Width);
3987     for (unsigned i = 1; i < Width; i++)
3988       Mask.push_back(i);
3989     Value *Shadow = IRB.CreateShuffleVector(First, OrShadow, Mask);
3990 
3991     setShadow(&I, Shadow);
3992     setOriginForNaryOp(I);
3993   }
3994 
3995   // _mm_round_ps / _mm_round_ps.
3996   // Similar to maybeHandleSimpleNomemIntrinsic except
3997   // the second argument is guranteed to be a constant integer.
3998   void handleRoundPdPsIntrinsic(IntrinsicInst &I) {
3999     assert(I.getArgOperand(0)->getType() == I.getType());
4000     assert(I.arg_size() == 2);
4001     assert(isa<ConstantInt>(I.getArgOperand(1)));
4002 
4003     IRBuilder<> IRB(&I);
4004     ShadowAndOriginCombiner SC(this, IRB);
4005     SC.Add(I.getArgOperand(0));
4006     SC.Done(&I);
4007   }
4008 
4009   // Instrument abs intrinsic.
4010   // handleUnknownIntrinsic can't handle it because of the last
4011   // is_int_min_poison argument which does not match the result type.
4012   void handleAbsIntrinsic(IntrinsicInst &I) {
4013     assert(I.getType()->isIntOrIntVectorTy());
4014     assert(I.getArgOperand(0)->getType() == I.getType());
4015 
4016     // FIXME: Handle is_int_min_poison.
4017     IRBuilder<> IRB(&I);
4018     setShadow(&I, getShadow(&I, 0));
4019     setOrigin(&I, getOrigin(&I, 0));
4020   }
4021 
4022   void handleIsFpClass(IntrinsicInst &I) {
4023     IRBuilder<> IRB(&I);
4024     Value *Shadow = getShadow(&I, 0);
4025     setShadow(&I, IRB.CreateICmpNE(Shadow, getCleanShadow(Shadow)));
4026     setOrigin(&I, getOrigin(&I, 0));
4027   }
4028 
4029   void handleArithmeticWithOverflow(IntrinsicInst &I) {
4030     IRBuilder<> IRB(&I);
4031     Value *Shadow0 = getShadow(&I, 0);
4032     Value *Shadow1 = getShadow(&I, 1);
4033     Value *ShadowElt0 = IRB.CreateOr(Shadow0, Shadow1);
4034     Value *ShadowElt1 =
4035         IRB.CreateICmpNE(ShadowElt0, getCleanShadow(ShadowElt0));
4036 
4037     Value *Shadow = PoisonValue::get(getShadowTy(&I));
4038     Shadow = IRB.CreateInsertValue(Shadow, ShadowElt0, 0);
4039     Shadow = IRB.CreateInsertValue(Shadow, ShadowElt1, 1);
4040 
4041     setShadow(&I, Shadow);
4042     setOriginForNaryOp(I);
4043   }
4044 
4045   void handleAVXHorizontalAddSubIntrinsic(IntrinsicInst &I) {
4046     // Approximation only:
4047     //    output         = horizontal_add/sub(A, B)
4048     // => shadow[output] = horizontal_add(shadow[A], shadow[B])
4049     //
4050     // We always use horizontal add instead of subtract, because subtracting
4051     // a fully uninitialized shadow would result in a fully initialized shadow.
4052     //
4053     // - If we add two adjacent zero (initialized) shadow values, the
4054     //   result always be zero i.e., no false positives.
4055     // - If we add two shadows, one of which is uninitialized, the
4056     //   result will always be non-zero i.e., no false negatives.
4057     // - However, we can have false negatives if we do an addition that wraps
4058     //   to zero; we consider this an acceptable tradeoff for performance.
4059     //
4060     // To make shadow propagation precise, we want the equivalent of
4061     // "horizontal OR", but this is not available for SSE3/SSSE3/AVX/AVX2.
4062 
4063     Intrinsic::ID shadowIntrinsicID = I.getIntrinsicID();
4064 
4065     switch (I.getIntrinsicID()) {
4066     case Intrinsic::x86_sse3_hsub_ps:
4067       shadowIntrinsicID = Intrinsic::x86_sse3_hadd_ps;
4068       break;
4069 
4070     case Intrinsic::x86_sse3_hsub_pd:
4071       shadowIntrinsicID = Intrinsic::x86_sse3_hadd_pd;
4072       break;
4073 
4074     case Intrinsic::x86_ssse3_phsub_d:
4075       shadowIntrinsicID = Intrinsic::x86_ssse3_phadd_d;
4076       break;
4077 
4078     case Intrinsic::x86_ssse3_phsub_d_128:
4079       shadowIntrinsicID = Intrinsic::x86_ssse3_phadd_d_128;
4080       break;
4081 
4082     case Intrinsic::x86_ssse3_phsub_w:
4083       shadowIntrinsicID = Intrinsic::x86_ssse3_phadd_w;
4084       break;
4085 
4086     case Intrinsic::x86_ssse3_phsub_w_128:
4087       shadowIntrinsicID = Intrinsic::x86_ssse3_phadd_w_128;
4088       break;
4089 
4090     case Intrinsic::x86_ssse3_phsub_sw:
4091       shadowIntrinsicID = Intrinsic::x86_ssse3_phadd_sw;
4092       break;
4093 
4094     case Intrinsic::x86_ssse3_phsub_sw_128:
4095       shadowIntrinsicID = Intrinsic::x86_ssse3_phadd_sw_128;
4096       break;
4097 
4098     case Intrinsic::x86_avx_hsub_pd_256:
4099       shadowIntrinsicID = Intrinsic::x86_avx_hadd_pd_256;
4100       break;
4101 
4102     case Intrinsic::x86_avx_hsub_ps_256:
4103       shadowIntrinsicID = Intrinsic::x86_avx_hadd_ps_256;
4104       break;
4105 
4106     case Intrinsic::x86_avx2_phsub_d:
4107       shadowIntrinsicID = Intrinsic::x86_avx2_phadd_d;
4108       break;
4109 
4110     case Intrinsic::x86_avx2_phsub_w:
4111       shadowIntrinsicID = Intrinsic::x86_avx2_phadd_w;
4112       break;
4113 
4114     case Intrinsic::x86_avx2_phsub_sw:
4115       shadowIntrinsicID = Intrinsic::x86_avx2_phadd_sw;
4116       break;
4117 
4118     default:
4119       break;
4120     }
4121 
4122     return handleIntrinsicByApplyingToShadow(I, shadowIntrinsicID,
4123                                              /*trailingVerbatimArgs*/ 0);
4124   }
4125 
4126   /// Handle Arm NEON vector store intrinsics (vst{2,3,4}, vst1x_{2,3,4},
4127   /// and vst{2,3,4}lane).
4128   ///
4129   /// Arm NEON vector store intrinsics have the output address (pointer) as the
4130   /// last argument, with the initial arguments being the inputs (and lane
4131   /// number for vst{2,3,4}lane). They return void.
4132   ///
4133   /// - st4 interleaves the output e.g., st4 (inA, inB, inC, inD, outP) writes
4134   ///   abcdabcdabcdabcd... into *outP
4135   /// - st1_x4 is non-interleaved e.g., st1_x4 (inA, inB, inC, inD, outP)
4136   ///   writes aaaa...bbbb...cccc...dddd... into *outP
4137   /// - st4lane has arguments of (inA, inB, inC, inD, lane, outP)
4138   /// These instructions can all be instrumented with essentially the same
4139   /// MSan logic, simply by applying the corresponding intrinsic to the shadow.
4140   void handleNEONVectorStoreIntrinsic(IntrinsicInst &I, bool useLane) {
4141     IRBuilder<> IRB(&I);
4142 
4143     // Don't use getNumOperands() because it includes the callee
4144     int numArgOperands = I.arg_size();
4145 
4146     // The last arg operand is the output (pointer)
4147     assert(numArgOperands >= 1);
4148     Value *Addr = I.getArgOperand(numArgOperands - 1);
4149     assert(Addr->getType()->isPointerTy());
4150     int skipTrailingOperands = 1;
4151 
4152     if (ClCheckAccessAddress)
4153       insertShadowCheck(Addr, &I);
4154 
4155     // Second-last operand is the lane number (for vst{2,3,4}lane)
4156     if (useLane) {
4157       skipTrailingOperands++;
4158       assert(numArgOperands >= static_cast<int>(skipTrailingOperands));
4159       assert(isa<IntegerType>(
4160           I.getArgOperand(numArgOperands - skipTrailingOperands)->getType()));
4161     }
4162 
4163     SmallVector<Value *, 8> ShadowArgs;
4164     // All the initial operands are the inputs
4165     for (int i = 0; i < numArgOperands - skipTrailingOperands; i++) {
4166       assert(isa<FixedVectorType>(I.getArgOperand(i)->getType()));
4167       Value *Shadow = getShadow(&I, i);
4168       ShadowArgs.append(1, Shadow);
4169     }
4170 
4171     // MSan's GetShadowTy assumes the LHS is the type we want the shadow for
4172     // e.g., for:
4173     //     [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
4174     // we know the type of the output (and its shadow) is <16 x i8>.
4175     //
4176     // Arm NEON VST is unusual because the last argument is the output address:
4177     //     define void @st2_16b(<16 x i8> %A, <16 x i8> %B, ptr %P) {
4178     //         call void @llvm.aarch64.neon.st2.v16i8.p0
4179     //                   (<16 x i8> [[A]], <16 x i8> [[B]], ptr [[P]])
4180     // and we have no type information about P's operand. We must manually
4181     // compute the type (<16 x i8> x 2).
4182     FixedVectorType *OutputVectorTy = FixedVectorType::get(
4183         cast<FixedVectorType>(I.getArgOperand(0)->getType())->getElementType(),
4184         cast<FixedVectorType>(I.getArgOperand(0)->getType())->getNumElements() *
4185             (numArgOperands - skipTrailingOperands));
4186     Type *OutputShadowTy = getShadowTy(OutputVectorTy);
4187 
4188     if (useLane)
4189       ShadowArgs.append(1,
4190                         I.getArgOperand(numArgOperands - skipTrailingOperands));
4191 
4192     Value *OutputShadowPtr, *OutputOriginPtr;
4193     // AArch64 NEON does not need alignment (unless OS requires it)
4194     std::tie(OutputShadowPtr, OutputOriginPtr) = getShadowOriginPtr(
4195         Addr, IRB, OutputShadowTy, Align(1), /*isStore*/ true);
4196     ShadowArgs.append(1, OutputShadowPtr);
4197 
4198     CallInst *CI =
4199         IRB.CreateIntrinsic(IRB.getVoidTy(), I.getIntrinsicID(), ShadowArgs);
4200     setShadow(&I, CI);
4201 
4202     if (MS.TrackOrigins) {
4203       // TODO: if we modelled the vst* instruction more precisely, we could
4204       // more accurately track the origins (e.g., if both inputs are
4205       // uninitialized for vst2, we currently blame the second input, even
4206       // though part of the output depends only on the first input).
4207       //
4208       // This is particularly imprecise for vst{2,3,4}lane, since only one
4209       // lane of each input is actually copied to the output.
4210       OriginCombiner OC(this, IRB);
4211       for (int i = 0; i < numArgOperands - skipTrailingOperands; i++)
4212         OC.Add(I.getArgOperand(i));
4213 
4214       const DataLayout &DL = F.getDataLayout();
4215       OC.DoneAndStoreOrigin(DL.getTypeStoreSize(OutputVectorTy),
4216                             OutputOriginPtr);
4217     }
4218   }
4219 
4220   /// Handle intrinsics by applying the intrinsic to the shadows.
4221   ///
4222   /// The trailing arguments are passed verbatim to the intrinsic, though any
4223   /// uninitialized trailing arguments can also taint the shadow e.g., for an
4224   /// intrinsic with one trailing verbatim argument:
4225   ///     out = intrinsic(var1, var2, opType)
4226   /// we compute:
4227   ///     shadow[out] =
4228   ///         intrinsic(shadow[var1], shadow[var2], opType) | shadow[opType]
4229   ///
4230   /// Typically, shadowIntrinsicID will be specified by the caller to be
4231   /// I.getIntrinsicID(), but the caller can choose to replace it with another
4232   /// intrinsic of the same type.
4233   ///
4234   /// CAUTION: this assumes that the intrinsic will handle arbitrary
4235   ///          bit-patterns (for example, if the intrinsic accepts floats for
4236   ///          var1, we require that it doesn't care if inputs are NaNs).
4237   ///
4238   /// For example, this can be applied to the Arm NEON vector table intrinsics
4239   /// (tbl{1,2,3,4}).
4240   ///
4241   /// The origin is approximated using setOriginForNaryOp.
4242   void handleIntrinsicByApplyingToShadow(IntrinsicInst &I,
4243                                          Intrinsic::ID shadowIntrinsicID,
4244                                          unsigned int trailingVerbatimArgs) {
4245     IRBuilder<> IRB(&I);
4246 
4247     assert(trailingVerbatimArgs < I.arg_size());
4248 
4249     SmallVector<Value *, 8> ShadowArgs;
4250     // Don't use getNumOperands() because it includes the callee
4251     for (unsigned int i = 0; i < I.arg_size() - trailingVerbatimArgs; i++) {
4252       Value *Shadow = getShadow(&I, i);
4253 
4254       // Shadows are integer-ish types but some intrinsics require a
4255       // different (e.g., floating-point) type.
4256       ShadowArgs.push_back(
4257           IRB.CreateBitCast(Shadow, I.getArgOperand(i)->getType()));
4258     }
4259 
4260     for (unsigned int i = I.arg_size() - trailingVerbatimArgs; i < I.arg_size();
4261          i++) {
4262       Value *Arg = I.getArgOperand(i);
4263       ShadowArgs.push_back(Arg);
4264     }
4265 
4266     CallInst *CI =
4267         IRB.CreateIntrinsic(I.getType(), shadowIntrinsicID, ShadowArgs);
4268     Value *CombinedShadow = CI;
4269 
4270     // Combine the computed shadow with the shadow of trailing args
4271     for (unsigned int i = I.arg_size() - trailingVerbatimArgs; i < I.arg_size();
4272          i++) {
4273       Value *Shadow =
4274           CreateShadowCast(IRB, getShadow(&I, i), CombinedShadow->getType());
4275       CombinedShadow = IRB.CreateOr(Shadow, CombinedShadow, "_msprop");
4276     }
4277 
4278     setShadow(&I, IRB.CreateBitCast(CombinedShadow, getShadowTy(&I)));
4279 
4280     setOriginForNaryOp(I);
4281   }
4282 
4283   // Approximation only
4284   void handleNEONVectorMultiplyIntrinsic(IntrinsicInst &I) {
4285     handleShadowOr(I);
4286   }
4287 
4288   void visitIntrinsicInst(IntrinsicInst &I) {
4289     switch (I.getIntrinsicID()) {
4290     case Intrinsic::uadd_with_overflow:
4291     case Intrinsic::sadd_with_overflow:
4292     case Intrinsic::usub_with_overflow:
4293     case Intrinsic::ssub_with_overflow:
4294     case Intrinsic::umul_with_overflow:
4295     case Intrinsic::smul_with_overflow:
4296       handleArithmeticWithOverflow(I);
4297       break;
4298     case Intrinsic::abs:
4299       handleAbsIntrinsic(I);
4300       break;
4301     case Intrinsic::is_fpclass:
4302       handleIsFpClass(I);
4303       break;
4304     case Intrinsic::lifetime_start:
4305       handleLifetimeStart(I);
4306       break;
4307     case Intrinsic::launder_invariant_group:
4308     case Intrinsic::strip_invariant_group:
4309       handleInvariantGroup(I);
4310       break;
4311     case Intrinsic::bswap:
4312       handleBswap(I);
4313       break;
4314     case Intrinsic::ctlz:
4315     case Intrinsic::cttz:
4316       handleCountZeroes(I);
4317       break;
4318     case Intrinsic::masked_compressstore:
4319       handleMaskedCompressStore(I);
4320       break;
4321     case Intrinsic::masked_expandload:
4322       handleMaskedExpandLoad(I);
4323       break;
4324     case Intrinsic::masked_gather:
4325       handleMaskedGather(I);
4326       break;
4327     case Intrinsic::masked_scatter:
4328       handleMaskedScatter(I);
4329       break;
4330     case Intrinsic::masked_store:
4331       handleMaskedStore(I);
4332       break;
4333     case Intrinsic::masked_load:
4334       handleMaskedLoad(I);
4335       break;
4336     case Intrinsic::vector_reduce_and:
4337       handleVectorReduceAndIntrinsic(I);
4338       break;
4339     case Intrinsic::vector_reduce_or:
4340       handleVectorReduceOrIntrinsic(I);
4341       break;
4342     case Intrinsic::vector_reduce_add:
4343     case Intrinsic::vector_reduce_xor:
4344     case Intrinsic::vector_reduce_mul:
4345       handleVectorReduceIntrinsic(I);
4346       break;
4347     case Intrinsic::x86_sse_stmxcsr:
4348       handleStmxcsr(I);
4349       break;
4350     case Intrinsic::x86_sse_ldmxcsr:
4351       handleLdmxcsr(I);
4352       break;
4353     case Intrinsic::x86_avx512_vcvtsd2usi64:
4354     case Intrinsic::x86_avx512_vcvtsd2usi32:
4355     case Intrinsic::x86_avx512_vcvtss2usi64:
4356     case Intrinsic::x86_avx512_vcvtss2usi32:
4357     case Intrinsic::x86_avx512_cvttss2usi64:
4358     case Intrinsic::x86_avx512_cvttss2usi:
4359     case Intrinsic::x86_avx512_cvttsd2usi64:
4360     case Intrinsic::x86_avx512_cvttsd2usi:
4361     case Intrinsic::x86_avx512_cvtusi2ss:
4362     case Intrinsic::x86_avx512_cvtusi642sd:
4363     case Intrinsic::x86_avx512_cvtusi642ss:
4364       handleVectorConvertIntrinsic(I, 1, true);
4365       break;
4366     case Intrinsic::x86_sse2_cvtsd2si64:
4367     case Intrinsic::x86_sse2_cvtsd2si:
4368     case Intrinsic::x86_sse2_cvtsd2ss:
4369     case Intrinsic::x86_sse2_cvttsd2si64:
4370     case Intrinsic::x86_sse2_cvttsd2si:
4371     case Intrinsic::x86_sse_cvtss2si64:
4372     case Intrinsic::x86_sse_cvtss2si:
4373     case Intrinsic::x86_sse_cvttss2si64:
4374     case Intrinsic::x86_sse_cvttss2si:
4375       handleVectorConvertIntrinsic(I, 1);
4376       break;
4377     case Intrinsic::x86_sse_cvtps2pi:
4378     case Intrinsic::x86_sse_cvttps2pi:
4379       handleVectorConvertIntrinsic(I, 2);
4380       break;
4381 
4382     case Intrinsic::x86_avx512_psll_w_512:
4383     case Intrinsic::x86_avx512_psll_d_512:
4384     case Intrinsic::x86_avx512_psll_q_512:
4385     case Intrinsic::x86_avx512_pslli_w_512:
4386     case Intrinsic::x86_avx512_pslli_d_512:
4387     case Intrinsic::x86_avx512_pslli_q_512:
4388     case Intrinsic::x86_avx512_psrl_w_512:
4389     case Intrinsic::x86_avx512_psrl_d_512:
4390     case Intrinsic::x86_avx512_psrl_q_512:
4391     case Intrinsic::x86_avx512_psra_w_512:
4392     case Intrinsic::x86_avx512_psra_d_512:
4393     case Intrinsic::x86_avx512_psra_q_512:
4394     case Intrinsic::x86_avx512_psrli_w_512:
4395     case Intrinsic::x86_avx512_psrli_d_512:
4396     case Intrinsic::x86_avx512_psrli_q_512:
4397     case Intrinsic::x86_avx512_psrai_w_512:
4398     case Intrinsic::x86_avx512_psrai_d_512:
4399     case Intrinsic::x86_avx512_psrai_q_512:
4400     case Intrinsic::x86_avx512_psra_q_256:
4401     case Intrinsic::x86_avx512_psra_q_128:
4402     case Intrinsic::x86_avx512_psrai_q_256:
4403     case Intrinsic::x86_avx512_psrai_q_128:
4404     case Intrinsic::x86_avx2_psll_w:
4405     case Intrinsic::x86_avx2_psll_d:
4406     case Intrinsic::x86_avx2_psll_q:
4407     case Intrinsic::x86_avx2_pslli_w:
4408     case Intrinsic::x86_avx2_pslli_d:
4409     case Intrinsic::x86_avx2_pslli_q:
4410     case Intrinsic::x86_avx2_psrl_w:
4411     case Intrinsic::x86_avx2_psrl_d:
4412     case Intrinsic::x86_avx2_psrl_q:
4413     case Intrinsic::x86_avx2_psra_w:
4414     case Intrinsic::x86_avx2_psra_d:
4415     case Intrinsic::x86_avx2_psrli_w:
4416     case Intrinsic::x86_avx2_psrli_d:
4417     case Intrinsic::x86_avx2_psrli_q:
4418     case Intrinsic::x86_avx2_psrai_w:
4419     case Intrinsic::x86_avx2_psrai_d:
4420     case Intrinsic::x86_sse2_psll_w:
4421     case Intrinsic::x86_sse2_psll_d:
4422     case Intrinsic::x86_sse2_psll_q:
4423     case Intrinsic::x86_sse2_pslli_w:
4424     case Intrinsic::x86_sse2_pslli_d:
4425     case Intrinsic::x86_sse2_pslli_q:
4426     case Intrinsic::x86_sse2_psrl_w:
4427     case Intrinsic::x86_sse2_psrl_d:
4428     case Intrinsic::x86_sse2_psrl_q:
4429     case Intrinsic::x86_sse2_psra_w:
4430     case Intrinsic::x86_sse2_psra_d:
4431     case Intrinsic::x86_sse2_psrli_w:
4432     case Intrinsic::x86_sse2_psrli_d:
4433     case Intrinsic::x86_sse2_psrli_q:
4434     case Intrinsic::x86_sse2_psrai_w:
4435     case Intrinsic::x86_sse2_psrai_d:
4436     case Intrinsic::x86_mmx_psll_w:
4437     case Intrinsic::x86_mmx_psll_d:
4438     case Intrinsic::x86_mmx_psll_q:
4439     case Intrinsic::x86_mmx_pslli_w:
4440     case Intrinsic::x86_mmx_pslli_d:
4441     case Intrinsic::x86_mmx_pslli_q:
4442     case Intrinsic::x86_mmx_psrl_w:
4443     case Intrinsic::x86_mmx_psrl_d:
4444     case Intrinsic::x86_mmx_psrl_q:
4445     case Intrinsic::x86_mmx_psra_w:
4446     case Intrinsic::x86_mmx_psra_d:
4447     case Intrinsic::x86_mmx_psrli_w:
4448     case Intrinsic::x86_mmx_psrli_d:
4449     case Intrinsic::x86_mmx_psrli_q:
4450     case Intrinsic::x86_mmx_psrai_w:
4451     case Intrinsic::x86_mmx_psrai_d:
4452     case Intrinsic::aarch64_neon_rshrn:
4453     case Intrinsic::aarch64_neon_sqrshl:
4454     case Intrinsic::aarch64_neon_sqrshrn:
4455     case Intrinsic::aarch64_neon_sqrshrun:
4456     case Intrinsic::aarch64_neon_sqshl:
4457     case Intrinsic::aarch64_neon_sqshlu:
4458     case Intrinsic::aarch64_neon_sqshrn:
4459     case Intrinsic::aarch64_neon_sqshrun:
4460     case Intrinsic::aarch64_neon_srshl:
4461     case Intrinsic::aarch64_neon_sshl:
4462     case Intrinsic::aarch64_neon_uqrshl:
4463     case Intrinsic::aarch64_neon_uqrshrn:
4464     case Intrinsic::aarch64_neon_uqshl:
4465     case Intrinsic::aarch64_neon_uqshrn:
4466     case Intrinsic::aarch64_neon_urshl:
4467     case Intrinsic::aarch64_neon_ushl:
4468       // Not handled here: aarch64_neon_vsli (vector shift left and insert)
4469       handleVectorShiftIntrinsic(I, /* Variable */ false);
4470       break;
4471     case Intrinsic::x86_avx2_psllv_d:
4472     case Intrinsic::x86_avx2_psllv_d_256:
4473     case Intrinsic::x86_avx512_psllv_d_512:
4474     case Intrinsic::x86_avx2_psllv_q:
4475     case Intrinsic::x86_avx2_psllv_q_256:
4476     case Intrinsic::x86_avx512_psllv_q_512:
4477     case Intrinsic::x86_avx2_psrlv_d:
4478     case Intrinsic::x86_avx2_psrlv_d_256:
4479     case Intrinsic::x86_avx512_psrlv_d_512:
4480     case Intrinsic::x86_avx2_psrlv_q:
4481     case Intrinsic::x86_avx2_psrlv_q_256:
4482     case Intrinsic::x86_avx512_psrlv_q_512:
4483     case Intrinsic::x86_avx2_psrav_d:
4484     case Intrinsic::x86_avx2_psrav_d_256:
4485     case Intrinsic::x86_avx512_psrav_d_512:
4486     case Intrinsic::x86_avx512_psrav_q_128:
4487     case Intrinsic::x86_avx512_psrav_q_256:
4488     case Intrinsic::x86_avx512_psrav_q_512:
4489       handleVectorShiftIntrinsic(I, /* Variable */ true);
4490       break;
4491 
4492     case Intrinsic::x86_sse2_packsswb_128:
4493     case Intrinsic::x86_sse2_packssdw_128:
4494     case Intrinsic::x86_sse2_packuswb_128:
4495     case Intrinsic::x86_sse41_packusdw:
4496     case Intrinsic::x86_avx2_packsswb:
4497     case Intrinsic::x86_avx2_packssdw:
4498     case Intrinsic::x86_avx2_packuswb:
4499     case Intrinsic::x86_avx2_packusdw:
4500       handleVectorPackIntrinsic(I);
4501       break;
4502 
4503     case Intrinsic::x86_sse41_pblendvb:
4504     case Intrinsic::x86_sse41_blendvpd:
4505     case Intrinsic::x86_sse41_blendvps:
4506     case Intrinsic::x86_avx_blendv_pd_256:
4507     case Intrinsic::x86_avx_blendv_ps_256:
4508     case Intrinsic::x86_avx2_pblendvb:
4509       handleBlendvIntrinsic(I);
4510       break;
4511 
4512     case Intrinsic::x86_avx_dp_ps_256:
4513     case Intrinsic::x86_sse41_dppd:
4514     case Intrinsic::x86_sse41_dpps:
4515       handleDppIntrinsic(I);
4516       break;
4517 
4518     case Intrinsic::x86_mmx_packsswb:
4519     case Intrinsic::x86_mmx_packuswb:
4520       handleVectorPackIntrinsic(I, 16);
4521       break;
4522 
4523     case Intrinsic::x86_mmx_packssdw:
4524       handleVectorPackIntrinsic(I, 32);
4525       break;
4526 
4527     case Intrinsic::x86_mmx_psad_bw:
4528       handleVectorSadIntrinsic(I, true);
4529       break;
4530     case Intrinsic::x86_sse2_psad_bw:
4531     case Intrinsic::x86_avx2_psad_bw:
4532       handleVectorSadIntrinsic(I);
4533       break;
4534 
4535     case Intrinsic::x86_sse2_pmadd_wd:
4536     case Intrinsic::x86_avx2_pmadd_wd:
4537     case Intrinsic::x86_ssse3_pmadd_ub_sw_128:
4538     case Intrinsic::x86_avx2_pmadd_ub_sw:
4539       handleVectorPmaddIntrinsic(I);
4540       break;
4541 
4542     case Intrinsic::x86_ssse3_pmadd_ub_sw:
4543       handleVectorPmaddIntrinsic(I, 8);
4544       break;
4545 
4546     case Intrinsic::x86_mmx_pmadd_wd:
4547       handleVectorPmaddIntrinsic(I, 16);
4548       break;
4549 
4550     case Intrinsic::x86_sse_cmp_ss:
4551     case Intrinsic::x86_sse2_cmp_sd:
4552     case Intrinsic::x86_sse_comieq_ss:
4553     case Intrinsic::x86_sse_comilt_ss:
4554     case Intrinsic::x86_sse_comile_ss:
4555     case Intrinsic::x86_sse_comigt_ss:
4556     case Intrinsic::x86_sse_comige_ss:
4557     case Intrinsic::x86_sse_comineq_ss:
4558     case Intrinsic::x86_sse_ucomieq_ss:
4559     case Intrinsic::x86_sse_ucomilt_ss:
4560     case Intrinsic::x86_sse_ucomile_ss:
4561     case Intrinsic::x86_sse_ucomigt_ss:
4562     case Intrinsic::x86_sse_ucomige_ss:
4563     case Intrinsic::x86_sse_ucomineq_ss:
4564     case Intrinsic::x86_sse2_comieq_sd:
4565     case Intrinsic::x86_sse2_comilt_sd:
4566     case Intrinsic::x86_sse2_comile_sd:
4567     case Intrinsic::x86_sse2_comigt_sd:
4568     case Intrinsic::x86_sse2_comige_sd:
4569     case Intrinsic::x86_sse2_comineq_sd:
4570     case Intrinsic::x86_sse2_ucomieq_sd:
4571     case Intrinsic::x86_sse2_ucomilt_sd:
4572     case Intrinsic::x86_sse2_ucomile_sd:
4573     case Intrinsic::x86_sse2_ucomigt_sd:
4574     case Intrinsic::x86_sse2_ucomige_sd:
4575     case Intrinsic::x86_sse2_ucomineq_sd:
4576       handleVectorCompareScalarIntrinsic(I);
4577       break;
4578 
4579     case Intrinsic::x86_avx_cmp_pd_256:
4580     case Intrinsic::x86_avx_cmp_ps_256:
4581     case Intrinsic::x86_sse2_cmp_pd:
4582     case Intrinsic::x86_sse_cmp_ps:
4583       handleVectorComparePackedIntrinsic(I);
4584       break;
4585 
4586     case Intrinsic::x86_bmi_bextr_32:
4587     case Intrinsic::x86_bmi_bextr_64:
4588     case Intrinsic::x86_bmi_bzhi_32:
4589     case Intrinsic::x86_bmi_bzhi_64:
4590     case Intrinsic::x86_bmi_pdep_32:
4591     case Intrinsic::x86_bmi_pdep_64:
4592     case Intrinsic::x86_bmi_pext_32:
4593     case Intrinsic::x86_bmi_pext_64:
4594       handleBmiIntrinsic(I);
4595       break;
4596 
4597     case Intrinsic::x86_pclmulqdq:
4598     case Intrinsic::x86_pclmulqdq_256:
4599     case Intrinsic::x86_pclmulqdq_512:
4600       handlePclmulIntrinsic(I);
4601       break;
4602 
4603     case Intrinsic::x86_avx_round_pd_256:
4604     case Intrinsic::x86_avx_round_ps_256:
4605     case Intrinsic::x86_sse41_round_pd:
4606     case Intrinsic::x86_sse41_round_ps:
4607       handleRoundPdPsIntrinsic(I);
4608       break;
4609 
4610     case Intrinsic::x86_sse41_round_sd:
4611     case Intrinsic::x86_sse41_round_ss:
4612       handleUnarySdSsIntrinsic(I);
4613       break;
4614 
4615     case Intrinsic::x86_sse2_max_sd:
4616     case Intrinsic::x86_sse_max_ss:
4617     case Intrinsic::x86_sse2_min_sd:
4618     case Intrinsic::x86_sse_min_ss:
4619       handleBinarySdSsIntrinsic(I);
4620       break;
4621 
4622     case Intrinsic::x86_avx_vtestc_pd:
4623     case Intrinsic::x86_avx_vtestc_pd_256:
4624     case Intrinsic::x86_avx_vtestc_ps:
4625     case Intrinsic::x86_avx_vtestc_ps_256:
4626     case Intrinsic::x86_avx_vtestnzc_pd:
4627     case Intrinsic::x86_avx_vtestnzc_pd_256:
4628     case Intrinsic::x86_avx_vtestnzc_ps:
4629     case Intrinsic::x86_avx_vtestnzc_ps_256:
4630     case Intrinsic::x86_avx_vtestz_pd:
4631     case Intrinsic::x86_avx_vtestz_pd_256:
4632     case Intrinsic::x86_avx_vtestz_ps:
4633     case Intrinsic::x86_avx_vtestz_ps_256:
4634     case Intrinsic::x86_avx_ptestc_256:
4635     case Intrinsic::x86_avx_ptestnzc_256:
4636     case Intrinsic::x86_avx_ptestz_256:
4637     case Intrinsic::x86_sse41_ptestc:
4638     case Intrinsic::x86_sse41_ptestnzc:
4639     case Intrinsic::x86_sse41_ptestz:
4640       handleVtestIntrinsic(I);
4641       break;
4642 
4643     case Intrinsic::x86_sse3_hadd_ps:
4644     case Intrinsic::x86_sse3_hadd_pd:
4645     case Intrinsic::x86_ssse3_phadd_d:
4646     case Intrinsic::x86_ssse3_phadd_d_128:
4647     case Intrinsic::x86_ssse3_phadd_w:
4648     case Intrinsic::x86_ssse3_phadd_w_128:
4649     case Intrinsic::x86_ssse3_phadd_sw:
4650     case Intrinsic::x86_ssse3_phadd_sw_128:
4651     case Intrinsic::x86_avx_hadd_pd_256:
4652     case Intrinsic::x86_avx_hadd_ps_256:
4653     case Intrinsic::x86_avx2_phadd_d:
4654     case Intrinsic::x86_avx2_phadd_w:
4655     case Intrinsic::x86_avx2_phadd_sw:
4656     case Intrinsic::x86_sse3_hsub_ps:
4657     case Intrinsic::x86_sse3_hsub_pd:
4658     case Intrinsic::x86_ssse3_phsub_d:
4659     case Intrinsic::x86_ssse3_phsub_d_128:
4660     case Intrinsic::x86_ssse3_phsub_w:
4661     case Intrinsic::x86_ssse3_phsub_w_128:
4662     case Intrinsic::x86_ssse3_phsub_sw:
4663     case Intrinsic::x86_ssse3_phsub_sw_128:
4664     case Intrinsic::x86_avx_hsub_pd_256:
4665     case Intrinsic::x86_avx_hsub_ps_256:
4666     case Intrinsic::x86_avx2_phsub_d:
4667     case Intrinsic::x86_avx2_phsub_w:
4668     case Intrinsic::x86_avx2_phsub_sw: {
4669       handleAVXHorizontalAddSubIntrinsic(I);
4670       break;
4671     }
4672 
4673     case Intrinsic::x86_avx_maskstore_ps:
4674     case Intrinsic::x86_avx_maskstore_pd:
4675     case Intrinsic::x86_avx_maskstore_ps_256:
4676     case Intrinsic::x86_avx_maskstore_pd_256:
4677     case Intrinsic::x86_avx2_maskstore_d:
4678     case Intrinsic::x86_avx2_maskstore_q:
4679     case Intrinsic::x86_avx2_maskstore_d_256:
4680     case Intrinsic::x86_avx2_maskstore_q_256: {
4681       handleAVXMaskedStore(I);
4682       break;
4683     }
4684 
4685     case Intrinsic::x86_avx_maskload_ps:
4686     case Intrinsic::x86_avx_maskload_pd:
4687     case Intrinsic::x86_avx_maskload_ps_256:
4688     case Intrinsic::x86_avx_maskload_pd_256:
4689     case Intrinsic::x86_avx2_maskload_d:
4690     case Intrinsic::x86_avx2_maskload_q:
4691     case Intrinsic::x86_avx2_maskload_d_256:
4692     case Intrinsic::x86_avx2_maskload_q_256: {
4693       handleAVXMaskedLoad(I);
4694       break;
4695     }
4696 
4697     // Packed
4698     case Intrinsic::x86_avx512_min_ps_512:
4699     case Intrinsic::x86_avx512_min_pd_512:
4700     case Intrinsic::x86_avx512_max_ps_512:
4701     case Intrinsic::x86_avx512_max_pd_512: {
4702       // These AVX512 variants contain the rounding mode as a trailing flag.
4703       // Earlier variants do not have a trailing flag and are already handled
4704       // by maybeHandleSimpleNomemIntrinsic(I, 0) via handleUnknownIntrinsic.
4705       [[maybe_unused]] bool Success =
4706           maybeHandleSimpleNomemIntrinsic(I, /*trailingFlags=*/1);
4707       assert(Success);
4708       break;
4709     }
4710 
4711     case Intrinsic::fshl:
4712     case Intrinsic::fshr:
4713       handleFunnelShift(I);
4714       break;
4715 
4716     case Intrinsic::is_constant:
4717       // The result of llvm.is.constant() is always defined.
4718       setShadow(&I, getCleanShadow(&I));
4719       setOrigin(&I, getCleanOrigin());
4720       break;
4721 
4722     case Intrinsic::aarch64_neon_st1x2:
4723     case Intrinsic::aarch64_neon_st1x3:
4724     case Intrinsic::aarch64_neon_st1x4:
4725     case Intrinsic::aarch64_neon_st2:
4726     case Intrinsic::aarch64_neon_st3:
4727     case Intrinsic::aarch64_neon_st4: {
4728       handleNEONVectorStoreIntrinsic(I, false);
4729       break;
4730     }
4731 
4732     case Intrinsic::aarch64_neon_st2lane:
4733     case Intrinsic::aarch64_neon_st3lane:
4734     case Intrinsic::aarch64_neon_st4lane: {
4735       handleNEONVectorStoreIntrinsic(I, true);
4736       break;
4737     }
4738 
4739     // Arm NEON vector table intrinsics have the source/table register(s) as
4740     // arguments, followed by the index register. They return the output.
4741     //
4742     // 'TBL writes a zero if an index is out-of-range, while TBX leaves the
4743     //  original value unchanged in the destination register.'
4744     // Conveniently, zero denotes a clean shadow, which means out-of-range
4745     // indices for TBL will initialize the user data with zero and also clean
4746     // the shadow. (For TBX, neither the user data nor the shadow will be
4747     // updated, which is also correct.)
4748     case Intrinsic::aarch64_neon_tbl1:
4749     case Intrinsic::aarch64_neon_tbl2:
4750     case Intrinsic::aarch64_neon_tbl3:
4751     case Intrinsic::aarch64_neon_tbl4:
4752     case Intrinsic::aarch64_neon_tbx1:
4753     case Intrinsic::aarch64_neon_tbx2:
4754     case Intrinsic::aarch64_neon_tbx3:
4755     case Intrinsic::aarch64_neon_tbx4: {
4756       // The last trailing argument (index register) should be handled verbatim
4757       handleIntrinsicByApplyingToShadow(
4758           I, /*shadowIntrinsicID=*/I.getIntrinsicID(),
4759           /*trailingVerbatimArgs*/ 1);
4760       break;
4761     }
4762 
4763     case Intrinsic::aarch64_neon_fmulx:
4764     case Intrinsic::aarch64_neon_pmul:
4765     case Intrinsic::aarch64_neon_pmull:
4766     case Intrinsic::aarch64_neon_smull:
4767     case Intrinsic::aarch64_neon_pmull64:
4768     case Intrinsic::aarch64_neon_umull: {
4769       handleNEONVectorMultiplyIntrinsic(I);
4770       break;
4771     }
4772 
4773     default:
4774       if (!handleUnknownIntrinsic(I))
4775         visitInstruction(I);
4776       break;
4777     }
4778   }
4779 
4780   void visitLibAtomicLoad(CallBase &CB) {
4781     // Since we use getNextNode here, we can't have CB terminate the BB.
4782     assert(isa<CallInst>(CB));
4783 
4784     IRBuilder<> IRB(&CB);
4785     Value *Size = CB.getArgOperand(0);
4786     Value *SrcPtr = CB.getArgOperand(1);
4787     Value *DstPtr = CB.getArgOperand(2);
4788     Value *Ordering = CB.getArgOperand(3);
4789     // Convert the call to have at least Acquire ordering to make sure
4790     // the shadow operations aren't reordered before it.
4791     Value *NewOrdering =
4792         IRB.CreateExtractElement(makeAddAcquireOrderingTable(IRB), Ordering);
4793     CB.setArgOperand(3, NewOrdering);
4794 
4795     NextNodeIRBuilder NextIRB(&CB);
4796     Value *SrcShadowPtr, *SrcOriginPtr;
4797     std::tie(SrcShadowPtr, SrcOriginPtr) =
4798         getShadowOriginPtr(SrcPtr, NextIRB, NextIRB.getInt8Ty(), Align(1),
4799                            /*isStore*/ false);
4800     Value *DstShadowPtr =
4801         getShadowOriginPtr(DstPtr, NextIRB, NextIRB.getInt8Ty(), Align(1),
4802                            /*isStore*/ true)
4803             .first;
4804 
4805     NextIRB.CreateMemCpy(DstShadowPtr, Align(1), SrcShadowPtr, Align(1), Size);
4806     if (MS.TrackOrigins) {
4807       Value *SrcOrigin = NextIRB.CreateAlignedLoad(MS.OriginTy, SrcOriginPtr,
4808                                                    kMinOriginAlignment);
4809       Value *NewOrigin = updateOrigin(SrcOrigin, NextIRB);
4810       NextIRB.CreateCall(MS.MsanSetOriginFn, {DstPtr, Size, NewOrigin});
4811     }
4812   }
4813 
4814   void visitLibAtomicStore(CallBase &CB) {
4815     IRBuilder<> IRB(&CB);
4816     Value *Size = CB.getArgOperand(0);
4817     Value *DstPtr = CB.getArgOperand(2);
4818     Value *Ordering = CB.getArgOperand(3);
4819     // Convert the call to have at least Release ordering to make sure
4820     // the shadow operations aren't reordered after it.
4821     Value *NewOrdering =
4822         IRB.CreateExtractElement(makeAddReleaseOrderingTable(IRB), Ordering);
4823     CB.setArgOperand(3, NewOrdering);
4824 
4825     Value *DstShadowPtr =
4826         getShadowOriginPtr(DstPtr, IRB, IRB.getInt8Ty(), Align(1),
4827                            /*isStore*/ true)
4828             .first;
4829 
4830     // Atomic store always paints clean shadow/origin. See file header.
4831     IRB.CreateMemSet(DstShadowPtr, getCleanShadow(IRB.getInt8Ty()), Size,
4832                      Align(1));
4833   }
4834 
4835   void visitCallBase(CallBase &CB) {
4836     assert(!CB.getMetadata(LLVMContext::MD_nosanitize));
4837     if (CB.isInlineAsm()) {
4838       // For inline asm (either a call to asm function, or callbr instruction),
4839       // do the usual thing: check argument shadow and mark all outputs as
4840       // clean. Note that any side effects of the inline asm that are not
4841       // immediately visible in its constraints are not handled.
4842       if (ClHandleAsmConservative)
4843         visitAsmInstruction(CB);
4844       else
4845         visitInstruction(CB);
4846       return;
4847     }
4848     LibFunc LF;
4849     if (TLI->getLibFunc(CB, LF)) {
4850       // libatomic.a functions need to have special handling because there isn't
4851       // a good way to intercept them or compile the library with
4852       // instrumentation.
4853       switch (LF) {
4854       case LibFunc_atomic_load:
4855         if (!isa<CallInst>(CB)) {
4856           llvm::errs() << "MSAN -- cannot instrument invoke of libatomic load."
4857                           "Ignoring!\n";
4858           break;
4859         }
4860         visitLibAtomicLoad(CB);
4861         return;
4862       case LibFunc_atomic_store:
4863         visitLibAtomicStore(CB);
4864         return;
4865       default:
4866         break;
4867       }
4868     }
4869 
4870     if (auto *Call = dyn_cast<CallInst>(&CB)) {
4871       assert(!isa<IntrinsicInst>(Call) && "intrinsics are handled elsewhere");
4872 
4873       // We are going to insert code that relies on the fact that the callee
4874       // will become a non-readonly function after it is instrumented by us. To
4875       // prevent this code from being optimized out, mark that function
4876       // non-readonly in advance.
4877       // TODO: We can likely do better than dropping memory() completely here.
4878       AttributeMask B;
4879       B.addAttribute(Attribute::Memory).addAttribute(Attribute::Speculatable);
4880 
4881       Call->removeFnAttrs(B);
4882       if (Function *Func = Call->getCalledFunction()) {
4883         Func->removeFnAttrs(B);
4884       }
4885 
4886       maybeMarkSanitizerLibraryCallNoBuiltin(Call, TLI);
4887     }
4888     IRBuilder<> IRB(&CB);
4889     bool MayCheckCall = MS.EagerChecks;
4890     if (Function *Func = CB.getCalledFunction()) {
4891       // __sanitizer_unaligned_{load,store} functions may be called by users
4892       // and always expects shadows in the TLS. So don't check them.
4893       MayCheckCall &= !Func->getName().starts_with("__sanitizer_unaligned_");
4894     }
4895 
4896     unsigned ArgOffset = 0;
4897     LLVM_DEBUG(dbgs() << "  CallSite: " << CB << "\n");
4898     for (const auto &[i, A] : llvm::enumerate(CB.args())) {
4899       if (!A->getType()->isSized()) {
4900         LLVM_DEBUG(dbgs() << "Arg " << i << " is not sized: " << CB << "\n");
4901         continue;
4902       }
4903 
4904       if (A->getType()->isScalableTy()) {
4905         LLVM_DEBUG(dbgs() << "Arg  " << i << " is vscale: " << CB << "\n");
4906         // Handle as noundef, but don't reserve tls slots.
4907         insertShadowCheck(A, &CB);
4908         continue;
4909       }
4910 
4911       unsigned Size = 0;
4912       const DataLayout &DL = F.getDataLayout();
4913 
4914       bool ByVal = CB.paramHasAttr(i, Attribute::ByVal);
4915       bool NoUndef = CB.paramHasAttr(i, Attribute::NoUndef);
4916       bool EagerCheck = MayCheckCall && !ByVal && NoUndef;
4917 
4918       if (EagerCheck) {
4919         insertShadowCheck(A, &CB);
4920         Size = DL.getTypeAllocSize(A->getType());
4921       } else {
4922         Value *Store = nullptr;
4923         // Compute the Shadow for arg even if it is ByVal, because
4924         // in that case getShadow() will copy the actual arg shadow to
4925         // __msan_param_tls.
4926         Value *ArgShadow = getShadow(A);
4927         Value *ArgShadowBase = getShadowPtrForArgument(IRB, ArgOffset);
4928         LLVM_DEBUG(dbgs() << "  Arg#" << i << ": " << *A
4929                           << " Shadow: " << *ArgShadow << "\n");
4930         if (ByVal) {
4931           // ByVal requires some special handling as it's too big for a single
4932           // load
4933           assert(A->getType()->isPointerTy() &&
4934                  "ByVal argument is not a pointer!");
4935           Size = DL.getTypeAllocSize(CB.getParamByValType(i));
4936           if (ArgOffset + Size > kParamTLSSize)
4937             break;
4938           const MaybeAlign ParamAlignment(CB.getParamAlign(i));
4939           MaybeAlign Alignment = std::nullopt;
4940           if (ParamAlignment)
4941             Alignment = std::min(*ParamAlignment, kShadowTLSAlignment);
4942           Value *AShadowPtr, *AOriginPtr;
4943           std::tie(AShadowPtr, AOriginPtr) =
4944               getShadowOriginPtr(A, IRB, IRB.getInt8Ty(), Alignment,
4945                                  /*isStore*/ false);
4946           if (!PropagateShadow) {
4947             Store = IRB.CreateMemSet(ArgShadowBase,
4948                                      Constant::getNullValue(IRB.getInt8Ty()),
4949                                      Size, Alignment);
4950           } else {
4951             Store = IRB.CreateMemCpy(ArgShadowBase, Alignment, AShadowPtr,
4952                                      Alignment, Size);
4953             if (MS.TrackOrigins) {
4954               Value *ArgOriginBase = getOriginPtrForArgument(IRB, ArgOffset);
4955               // FIXME: OriginSize should be:
4956               // alignTo(A % kMinOriginAlignment + Size, kMinOriginAlignment)
4957               unsigned OriginSize = alignTo(Size, kMinOriginAlignment);
4958               IRB.CreateMemCpy(
4959                   ArgOriginBase,
4960                   /* by origin_tls[ArgOffset] */ kMinOriginAlignment,
4961                   AOriginPtr,
4962                   /* by getShadowOriginPtr */ kMinOriginAlignment, OriginSize);
4963             }
4964           }
4965         } else {
4966           // Any other parameters mean we need bit-grained tracking of uninit
4967           // data
4968           Size = DL.getTypeAllocSize(A->getType());
4969           if (ArgOffset + Size > kParamTLSSize)
4970             break;
4971           Store = IRB.CreateAlignedStore(ArgShadow, ArgShadowBase,
4972                                          kShadowTLSAlignment);
4973           Constant *Cst = dyn_cast<Constant>(ArgShadow);
4974           if (MS.TrackOrigins && !(Cst && Cst->isNullValue())) {
4975             IRB.CreateStore(getOrigin(A),
4976                             getOriginPtrForArgument(IRB, ArgOffset));
4977           }
4978         }
4979         (void)Store;
4980         assert(Store != nullptr);
4981         LLVM_DEBUG(dbgs() << "  Param:" << *Store << "\n");
4982       }
4983       assert(Size != 0);
4984       ArgOffset += alignTo(Size, kShadowTLSAlignment);
4985     }
4986     LLVM_DEBUG(dbgs() << "  done with call args\n");
4987 
4988     FunctionType *FT = CB.getFunctionType();
4989     if (FT->isVarArg()) {
4990       VAHelper->visitCallBase(CB, IRB);
4991     }
4992 
4993     // Now, get the shadow for the RetVal.
4994     if (!CB.getType()->isSized())
4995       return;
4996     // Don't emit the epilogue for musttail call returns.
4997     if (isa<CallInst>(CB) && cast<CallInst>(CB).isMustTailCall())
4998       return;
4999 
5000     if (MayCheckCall && CB.hasRetAttr(Attribute::NoUndef)) {
5001       setShadow(&CB, getCleanShadow(&CB));
5002       setOrigin(&CB, getCleanOrigin());
5003       return;
5004     }
5005 
5006     IRBuilder<> IRBBefore(&CB);
5007     // Until we have full dynamic coverage, make sure the retval shadow is 0.
5008     Value *Base = getShadowPtrForRetval(IRBBefore);
5009     IRBBefore.CreateAlignedStore(getCleanShadow(&CB), Base,
5010                                  kShadowTLSAlignment);
5011     BasicBlock::iterator NextInsn;
5012     if (isa<CallInst>(CB)) {
5013       NextInsn = ++CB.getIterator();
5014       assert(NextInsn != CB.getParent()->end());
5015     } else {
5016       BasicBlock *NormalDest = cast<InvokeInst>(CB).getNormalDest();
5017       if (!NormalDest->getSinglePredecessor()) {
5018         // FIXME: this case is tricky, so we are just conservative here.
5019         // Perhaps we need to split the edge between this BB and NormalDest,
5020         // but a naive attempt to use SplitEdge leads to a crash.
5021         setShadow(&CB, getCleanShadow(&CB));
5022         setOrigin(&CB, getCleanOrigin());
5023         return;
5024       }
5025       // FIXME: NextInsn is likely in a basic block that has not been visited
5026       // yet. Anything inserted there will be instrumented by MSan later!
5027       NextInsn = NormalDest->getFirstInsertionPt();
5028       assert(NextInsn != NormalDest->end() &&
5029              "Could not find insertion point for retval shadow load");
5030     }
5031     IRBuilder<> IRBAfter(&*NextInsn);
5032     Value *RetvalShadow = IRBAfter.CreateAlignedLoad(
5033         getShadowTy(&CB), getShadowPtrForRetval(IRBAfter), kShadowTLSAlignment,
5034         "_msret");
5035     setShadow(&CB, RetvalShadow);
5036     if (MS.TrackOrigins)
5037       setOrigin(&CB, IRBAfter.CreateLoad(MS.OriginTy, getOriginPtrForRetval()));
5038   }
5039 
5040   bool isAMustTailRetVal(Value *RetVal) {
5041     if (auto *I = dyn_cast<BitCastInst>(RetVal)) {
5042       RetVal = I->getOperand(0);
5043     }
5044     if (auto *I = dyn_cast<CallInst>(RetVal)) {
5045       return I->isMustTailCall();
5046     }
5047     return false;
5048   }
5049 
5050   void visitReturnInst(ReturnInst &I) {
5051     IRBuilder<> IRB(&I);
5052     Value *RetVal = I.getReturnValue();
5053     if (!RetVal)
5054       return;
5055     // Don't emit the epilogue for musttail call returns.
5056     if (isAMustTailRetVal(RetVal))
5057       return;
5058     Value *ShadowPtr = getShadowPtrForRetval(IRB);
5059     bool HasNoUndef = F.hasRetAttribute(Attribute::NoUndef);
5060     bool StoreShadow = !(MS.EagerChecks && HasNoUndef);
5061     // FIXME: Consider using SpecialCaseList to specify a list of functions that
5062     // must always return fully initialized values. For now, we hardcode "main".
5063     bool EagerCheck = (MS.EagerChecks && HasNoUndef) || (F.getName() == "main");
5064 
5065     Value *Shadow = getShadow(RetVal);
5066     bool StoreOrigin = true;
5067     if (EagerCheck) {
5068       insertShadowCheck(RetVal, &I);
5069       Shadow = getCleanShadow(RetVal);
5070       StoreOrigin = false;
5071     }
5072 
5073     // The caller may still expect information passed over TLS if we pass our
5074     // check
5075     if (StoreShadow) {
5076       IRB.CreateAlignedStore(Shadow, ShadowPtr, kShadowTLSAlignment);
5077       if (MS.TrackOrigins && StoreOrigin)
5078         IRB.CreateStore(getOrigin(RetVal), getOriginPtrForRetval());
5079     }
5080   }
5081 
5082   void visitPHINode(PHINode &I) {
5083     IRBuilder<> IRB(&I);
5084     if (!PropagateShadow) {
5085       setShadow(&I, getCleanShadow(&I));
5086       setOrigin(&I, getCleanOrigin());
5087       return;
5088     }
5089 
5090     ShadowPHINodes.push_back(&I);
5091     setShadow(&I, IRB.CreatePHI(getShadowTy(&I), I.getNumIncomingValues(),
5092                                 "_msphi_s"));
5093     if (MS.TrackOrigins)
5094       setOrigin(
5095           &I, IRB.CreatePHI(MS.OriginTy, I.getNumIncomingValues(), "_msphi_o"));
5096   }
5097 
5098   Value *getLocalVarIdptr(AllocaInst &I) {
5099     ConstantInt *IntConst =
5100         ConstantInt::get(Type::getInt32Ty((*F.getParent()).getContext()), 0);
5101     return new GlobalVariable(*F.getParent(), IntConst->getType(),
5102                               /*isConstant=*/false, GlobalValue::PrivateLinkage,
5103                               IntConst);
5104   }
5105 
5106   Value *getLocalVarDescription(AllocaInst &I) {
5107     return createPrivateConstGlobalForString(*F.getParent(), I.getName());
5108   }
5109 
5110   void poisonAllocaUserspace(AllocaInst &I, IRBuilder<> &IRB, Value *Len) {
5111     if (PoisonStack && ClPoisonStackWithCall) {
5112       IRB.CreateCall(MS.MsanPoisonStackFn, {&I, Len});
5113     } else {
5114       Value *ShadowBase, *OriginBase;
5115       std::tie(ShadowBase, OriginBase) = getShadowOriginPtr(
5116           &I, IRB, IRB.getInt8Ty(), Align(1), /*isStore*/ true);
5117 
5118       Value *PoisonValue = IRB.getInt8(PoisonStack ? ClPoisonStackPattern : 0);
5119       IRB.CreateMemSet(ShadowBase, PoisonValue, Len, I.getAlign());
5120     }
5121 
5122     if (PoisonStack && MS.TrackOrigins) {
5123       Value *Idptr = getLocalVarIdptr(I);
5124       if (ClPrintStackNames) {
5125         Value *Descr = getLocalVarDescription(I);
5126         IRB.CreateCall(MS.MsanSetAllocaOriginWithDescriptionFn,
5127                        {&I, Len, Idptr, Descr});
5128       } else {
5129         IRB.CreateCall(MS.MsanSetAllocaOriginNoDescriptionFn, {&I, Len, Idptr});
5130       }
5131     }
5132   }
5133 
5134   void poisonAllocaKmsan(AllocaInst &I, IRBuilder<> &IRB, Value *Len) {
5135     Value *Descr = getLocalVarDescription(I);
5136     if (PoisonStack) {
5137       IRB.CreateCall(MS.MsanPoisonAllocaFn, {&I, Len, Descr});
5138     } else {
5139       IRB.CreateCall(MS.MsanUnpoisonAllocaFn, {&I, Len});
5140     }
5141   }
5142 
5143   void instrumentAlloca(AllocaInst &I, Instruction *InsPoint = nullptr) {
5144     if (!InsPoint)
5145       InsPoint = &I;
5146     NextNodeIRBuilder IRB(InsPoint);
5147     const DataLayout &DL = F.getDataLayout();
5148     TypeSize TS = DL.getTypeAllocSize(I.getAllocatedType());
5149     Value *Len = IRB.CreateTypeSize(MS.IntptrTy, TS);
5150     if (I.isArrayAllocation())
5151       Len = IRB.CreateMul(Len,
5152                           IRB.CreateZExtOrTrunc(I.getArraySize(), MS.IntptrTy));
5153 
5154     if (MS.CompileKernel)
5155       poisonAllocaKmsan(I, IRB, Len);
5156     else
5157       poisonAllocaUserspace(I, IRB, Len);
5158   }
5159 
5160   void visitAllocaInst(AllocaInst &I) {
5161     setShadow(&I, getCleanShadow(&I));
5162     setOrigin(&I, getCleanOrigin());
5163     // We'll get to this alloca later unless it's poisoned at the corresponding
5164     // llvm.lifetime.start.
5165     AllocaSet.insert(&I);
5166   }
5167 
5168   void visitSelectInst(SelectInst &I) {
5169     // a = select b, c, d
5170     Value *B = I.getCondition();
5171     Value *C = I.getTrueValue();
5172     Value *D = I.getFalseValue();
5173 
5174     handleSelectLikeInst(I, B, C, D);
5175   }
5176 
5177   void handleSelectLikeInst(Instruction &I, Value *B, Value *C, Value *D) {
5178     IRBuilder<> IRB(&I);
5179 
5180     Value *Sb = getShadow(B);
5181     Value *Sc = getShadow(C);
5182     Value *Sd = getShadow(D);
5183 
5184     Value *Ob = MS.TrackOrigins ? getOrigin(B) : nullptr;
5185     Value *Oc = MS.TrackOrigins ? getOrigin(C) : nullptr;
5186     Value *Od = MS.TrackOrigins ? getOrigin(D) : nullptr;
5187 
5188     // Result shadow if condition shadow is 0.
5189     Value *Sa0 = IRB.CreateSelect(B, Sc, Sd);
5190     Value *Sa1;
5191     if (I.getType()->isAggregateType()) {
5192       // To avoid "sign extending" i1 to an arbitrary aggregate type, we just do
5193       // an extra "select". This results in much more compact IR.
5194       // Sa = select Sb, poisoned, (select b, Sc, Sd)
5195       Sa1 = getPoisonedShadow(getShadowTy(I.getType()));
5196     } else {
5197       // Sa = select Sb, [ (c^d) | Sc | Sd ], [ b ? Sc : Sd ]
5198       // If Sb (condition is poisoned), look for bits in c and d that are equal
5199       // and both unpoisoned.
5200       // If !Sb (condition is unpoisoned), simply pick one of Sc and Sd.
5201 
5202       // Cast arguments to shadow-compatible type.
5203       C = CreateAppToShadowCast(IRB, C);
5204       D = CreateAppToShadowCast(IRB, D);
5205 
5206       // Result shadow if condition shadow is 1.
5207       Sa1 = IRB.CreateOr({IRB.CreateXor(C, D), Sc, Sd});
5208     }
5209     Value *Sa = IRB.CreateSelect(Sb, Sa1, Sa0, "_msprop_select");
5210     setShadow(&I, Sa);
5211     if (MS.TrackOrigins) {
5212       // Origins are always i32, so any vector conditions must be flattened.
5213       // FIXME: consider tracking vector origins for app vectors?
5214       if (B->getType()->isVectorTy()) {
5215         B = convertToBool(B, IRB);
5216         Sb = convertToBool(Sb, IRB);
5217       }
5218       // a = select b, c, d
5219       // Oa = Sb ? Ob : (b ? Oc : Od)
5220       setOrigin(&I, IRB.CreateSelect(Sb, Ob, IRB.CreateSelect(B, Oc, Od)));
5221     }
5222   }
5223 
5224   void visitLandingPadInst(LandingPadInst &I) {
5225     // Do nothing.
5226     // See https://github.com/google/sanitizers/issues/504
5227     setShadow(&I, getCleanShadow(&I));
5228     setOrigin(&I, getCleanOrigin());
5229   }
5230 
5231   void visitCatchSwitchInst(CatchSwitchInst &I) {
5232     setShadow(&I, getCleanShadow(&I));
5233     setOrigin(&I, getCleanOrigin());
5234   }
5235 
5236   void visitFuncletPadInst(FuncletPadInst &I) {
5237     setShadow(&I, getCleanShadow(&I));
5238     setOrigin(&I, getCleanOrigin());
5239   }
5240 
5241   void visitGetElementPtrInst(GetElementPtrInst &I) { handleShadowOr(I); }
5242 
5243   void visitExtractValueInst(ExtractValueInst &I) {
5244     IRBuilder<> IRB(&I);
5245     Value *Agg = I.getAggregateOperand();
5246     LLVM_DEBUG(dbgs() << "ExtractValue:  " << I << "\n");
5247     Value *AggShadow = getShadow(Agg);
5248     LLVM_DEBUG(dbgs() << "   AggShadow:  " << *AggShadow << "\n");
5249     Value *ResShadow = IRB.CreateExtractValue(AggShadow, I.getIndices());
5250     LLVM_DEBUG(dbgs() << "   ResShadow:  " << *ResShadow << "\n");
5251     setShadow(&I, ResShadow);
5252     setOriginForNaryOp(I);
5253   }
5254 
5255   void visitInsertValueInst(InsertValueInst &I) {
5256     IRBuilder<> IRB(&I);
5257     LLVM_DEBUG(dbgs() << "InsertValue:  " << I << "\n");
5258     Value *AggShadow = getShadow(I.getAggregateOperand());
5259     Value *InsShadow = getShadow(I.getInsertedValueOperand());
5260     LLVM_DEBUG(dbgs() << "   AggShadow:  " << *AggShadow << "\n");
5261     LLVM_DEBUG(dbgs() << "   InsShadow:  " << *InsShadow << "\n");
5262     Value *Res = IRB.CreateInsertValue(AggShadow, InsShadow, I.getIndices());
5263     LLVM_DEBUG(dbgs() << "   Res:        " << *Res << "\n");
5264     setShadow(&I, Res);
5265     setOriginForNaryOp(I);
5266   }
5267 
5268   void dumpInst(Instruction &I) {
5269     if (CallInst *CI = dyn_cast<CallInst>(&I)) {
5270       errs() << "ZZZ call " << CI->getCalledFunction()->getName() << "\n";
5271     } else {
5272       errs() << "ZZZ " << I.getOpcodeName() << "\n";
5273     }
5274     errs() << "QQQ " << I << "\n";
5275   }
5276 
5277   void visitResumeInst(ResumeInst &I) {
5278     LLVM_DEBUG(dbgs() << "Resume: " << I << "\n");
5279     // Nothing to do here.
5280   }
5281 
5282   void visitCleanupReturnInst(CleanupReturnInst &CRI) {
5283     LLVM_DEBUG(dbgs() << "CleanupReturn: " << CRI << "\n");
5284     // Nothing to do here.
5285   }
5286 
5287   void visitCatchReturnInst(CatchReturnInst &CRI) {
5288     LLVM_DEBUG(dbgs() << "CatchReturn: " << CRI << "\n");
5289     // Nothing to do here.
5290   }
5291 
5292   void instrumentAsmArgument(Value *Operand, Type *ElemTy, Instruction &I,
5293                              IRBuilder<> &IRB, const DataLayout &DL,
5294                              bool isOutput) {
5295     // For each assembly argument, we check its value for being initialized.
5296     // If the argument is a pointer, we assume it points to a single element
5297     // of the corresponding type (or to a 8-byte word, if the type is unsized).
5298     // Each such pointer is instrumented with a call to the runtime library.
5299     Type *OpType = Operand->getType();
5300     // Check the operand value itself.
5301     insertShadowCheck(Operand, &I);
5302     if (!OpType->isPointerTy() || !isOutput) {
5303       assert(!isOutput);
5304       return;
5305     }
5306     if (!ElemTy->isSized())
5307       return;
5308     auto Size = DL.getTypeStoreSize(ElemTy);
5309     Value *SizeVal = IRB.CreateTypeSize(MS.IntptrTy, Size);
5310     if (MS.CompileKernel) {
5311       IRB.CreateCall(MS.MsanInstrumentAsmStoreFn, {Operand, SizeVal});
5312     } else {
5313       // ElemTy, derived from elementtype(), does not encode the alignment of
5314       // the pointer. Conservatively assume that the shadow memory is unaligned.
5315       // When Size is large, avoid StoreInst as it would expand to many
5316       // instructions.
5317       auto [ShadowPtr, _] =
5318           getShadowOriginPtrUserspace(Operand, IRB, IRB.getInt8Ty(), Align(1));
5319       if (Size <= 32)
5320         IRB.CreateAlignedStore(getCleanShadow(ElemTy), ShadowPtr, Align(1));
5321       else
5322         IRB.CreateMemSet(ShadowPtr, ConstantInt::getNullValue(IRB.getInt8Ty()),
5323                          SizeVal, Align(1));
5324     }
5325   }
5326 
5327   /// Get the number of output arguments returned by pointers.
5328   int getNumOutputArgs(InlineAsm *IA, CallBase *CB) {
5329     int NumRetOutputs = 0;
5330     int NumOutputs = 0;
5331     Type *RetTy = cast<Value>(CB)->getType();
5332     if (!RetTy->isVoidTy()) {
5333       // Register outputs are returned via the CallInst return value.
5334       auto *ST = dyn_cast<StructType>(RetTy);
5335       if (ST)
5336         NumRetOutputs = ST->getNumElements();
5337       else
5338         NumRetOutputs = 1;
5339     }
5340     InlineAsm::ConstraintInfoVector Constraints = IA->ParseConstraints();
5341     for (const InlineAsm::ConstraintInfo &Info : Constraints) {
5342       switch (Info.Type) {
5343       case InlineAsm::isOutput:
5344         NumOutputs++;
5345         break;
5346       default:
5347         break;
5348       }
5349     }
5350     return NumOutputs - NumRetOutputs;
5351   }
5352 
5353   void visitAsmInstruction(Instruction &I) {
5354     // Conservative inline assembly handling: check for poisoned shadow of
5355     // asm() arguments, then unpoison the result and all the memory locations
5356     // pointed to by those arguments.
5357     // An inline asm() statement in C++ contains lists of input and output
5358     // arguments used by the assembly code. These are mapped to operands of the
5359     // CallInst as follows:
5360     //  - nR register outputs ("=r) are returned by value in a single structure
5361     //  (SSA value of the CallInst);
5362     //  - nO other outputs ("=m" and others) are returned by pointer as first
5363     // nO operands of the CallInst;
5364     //  - nI inputs ("r", "m" and others) are passed to CallInst as the
5365     // remaining nI operands.
5366     // The total number of asm() arguments in the source is nR+nO+nI, and the
5367     // corresponding CallInst has nO+nI+1 operands (the last operand is the
5368     // function to be called).
5369     const DataLayout &DL = F.getDataLayout();
5370     CallBase *CB = cast<CallBase>(&I);
5371     IRBuilder<> IRB(&I);
5372     InlineAsm *IA = cast<InlineAsm>(CB->getCalledOperand());
5373     int OutputArgs = getNumOutputArgs(IA, CB);
5374     // The last operand of a CallInst is the function itself.
5375     int NumOperands = CB->getNumOperands() - 1;
5376 
5377     // Check input arguments. Doing so before unpoisoning output arguments, so
5378     // that we won't overwrite uninit values before checking them.
5379     for (int i = OutputArgs; i < NumOperands; i++) {
5380       Value *Operand = CB->getOperand(i);
5381       instrumentAsmArgument(Operand, CB->getParamElementType(i), I, IRB, DL,
5382                             /*isOutput*/ false);
5383     }
5384     // Unpoison output arguments. This must happen before the actual InlineAsm
5385     // call, so that the shadow for memory published in the asm() statement
5386     // remains valid.
5387     for (int i = 0; i < OutputArgs; i++) {
5388       Value *Operand = CB->getOperand(i);
5389       instrumentAsmArgument(Operand, CB->getParamElementType(i), I, IRB, DL,
5390                             /*isOutput*/ true);
5391     }
5392 
5393     setShadow(&I, getCleanShadow(&I));
5394     setOrigin(&I, getCleanOrigin());
5395   }
5396 
5397   void visitFreezeInst(FreezeInst &I) {
5398     // Freeze always returns a fully defined value.
5399     setShadow(&I, getCleanShadow(&I));
5400     setOrigin(&I, getCleanOrigin());
5401   }
5402 
5403   void visitInstruction(Instruction &I) {
5404     // Everything else: stop propagating and check for poisoned shadow.
5405     if (ClDumpStrictInstructions)
5406       dumpInst(I);
5407     LLVM_DEBUG(dbgs() << "DEFAULT: " << I << "\n");
5408     for (size_t i = 0, n = I.getNumOperands(); i < n; i++) {
5409       Value *Operand = I.getOperand(i);
5410       if (Operand->getType()->isSized())
5411         insertShadowCheck(Operand, &I);
5412     }
5413     setShadow(&I, getCleanShadow(&I));
5414     setOrigin(&I, getCleanOrigin());
5415   }
5416 };
5417 
5418 struct VarArgHelperBase : public VarArgHelper {
5419   Function &F;
5420   MemorySanitizer &MS;
5421   MemorySanitizerVisitor &MSV;
5422   SmallVector<CallInst *, 16> VAStartInstrumentationList;
5423   const unsigned VAListTagSize;
5424 
5425   VarArgHelperBase(Function &F, MemorySanitizer &MS,
5426                    MemorySanitizerVisitor &MSV, unsigned VAListTagSize)
5427       : F(F), MS(MS), MSV(MSV), VAListTagSize(VAListTagSize) {}
5428 
5429   Value *getShadowAddrForVAArgument(IRBuilder<> &IRB, unsigned ArgOffset) {
5430     Value *Base = IRB.CreatePointerCast(MS.VAArgTLS, MS.IntptrTy);
5431     return IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
5432   }
5433 
5434   /// Compute the shadow address for a given va_arg.
5435   Value *getShadowPtrForVAArgument(IRBuilder<> &IRB, unsigned ArgOffset) {
5436     Value *Base = IRB.CreatePointerCast(MS.VAArgTLS, MS.IntptrTy);
5437     Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
5438     return IRB.CreateIntToPtr(Base, MS.PtrTy, "_msarg_va_s");
5439   }
5440 
5441   /// Compute the shadow address for a given va_arg.
5442   Value *getShadowPtrForVAArgument(IRBuilder<> &IRB, unsigned ArgOffset,
5443                                    unsigned ArgSize) {
5444     // Make sure we don't overflow __msan_va_arg_tls.
5445     if (ArgOffset + ArgSize > kParamTLSSize)
5446       return nullptr;
5447     return getShadowPtrForVAArgument(IRB, ArgOffset);
5448   }
5449 
5450   /// Compute the origin address for a given va_arg.
5451   Value *getOriginPtrForVAArgument(IRBuilder<> &IRB, int ArgOffset) {
5452     Value *Base = IRB.CreatePointerCast(MS.VAArgOriginTLS, MS.IntptrTy);
5453     // getOriginPtrForVAArgument() is always called after
5454     // getShadowPtrForVAArgument(), so __msan_va_arg_origin_tls can never
5455     // overflow.
5456     Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
5457     return IRB.CreateIntToPtr(Base, MS.PtrTy, "_msarg_va_o");
5458   }
5459 
5460   void CleanUnusedTLS(IRBuilder<> &IRB, Value *ShadowBase,
5461                       unsigned BaseOffset) {
5462     // The tails of __msan_va_arg_tls is not large enough to fit full
5463     // value shadow, but it will be copied to backup anyway. Make it
5464     // clean.
5465     if (BaseOffset >= kParamTLSSize)
5466       return;
5467     Value *TailSize =
5468         ConstantInt::getSigned(IRB.getInt32Ty(), kParamTLSSize - BaseOffset);
5469     IRB.CreateMemSet(ShadowBase, ConstantInt::getNullValue(IRB.getInt8Ty()),
5470                      TailSize, Align(8));
5471   }
5472 
5473   void unpoisonVAListTagForInst(IntrinsicInst &I) {
5474     IRBuilder<> IRB(&I);
5475     Value *VAListTag = I.getArgOperand(0);
5476     const Align Alignment = Align(8);
5477     auto [ShadowPtr, OriginPtr] = MSV.getShadowOriginPtr(
5478         VAListTag, IRB, IRB.getInt8Ty(), Alignment, /*isStore*/ true);
5479     // Unpoison the whole __va_list_tag.
5480     IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
5481                      VAListTagSize, Alignment, false);
5482   }
5483 
5484   void visitVAStartInst(VAStartInst &I) override {
5485     if (F.getCallingConv() == CallingConv::Win64)
5486       return;
5487     VAStartInstrumentationList.push_back(&I);
5488     unpoisonVAListTagForInst(I);
5489   }
5490 
5491   void visitVACopyInst(VACopyInst &I) override {
5492     if (F.getCallingConv() == CallingConv::Win64)
5493       return;
5494     unpoisonVAListTagForInst(I);
5495   }
5496 };
5497 
5498 /// AMD64-specific implementation of VarArgHelper.
5499 struct VarArgAMD64Helper : public VarArgHelperBase {
5500   // An unfortunate workaround for asymmetric lowering of va_arg stuff.
5501   // See a comment in visitCallBase for more details.
5502   static const unsigned AMD64GpEndOffset = 48; // AMD64 ABI Draft 0.99.6 p3.5.7
5503   static const unsigned AMD64FpEndOffsetSSE = 176;
5504   // If SSE is disabled, fp_offset in va_list is zero.
5505   static const unsigned AMD64FpEndOffsetNoSSE = AMD64GpEndOffset;
5506 
5507   unsigned AMD64FpEndOffset;
5508   AllocaInst *VAArgTLSCopy = nullptr;
5509   AllocaInst *VAArgTLSOriginCopy = nullptr;
5510   Value *VAArgOverflowSize = nullptr;
5511 
5512   enum ArgKind { AK_GeneralPurpose, AK_FloatingPoint, AK_Memory };
5513 
5514   VarArgAMD64Helper(Function &F, MemorySanitizer &MS,
5515                     MemorySanitizerVisitor &MSV)
5516       : VarArgHelperBase(F, MS, MSV, /*VAListTagSize=*/24) {
5517     AMD64FpEndOffset = AMD64FpEndOffsetSSE;
5518     for (const auto &Attr : F.getAttributes().getFnAttrs()) {
5519       if (Attr.isStringAttribute() &&
5520           (Attr.getKindAsString() == "target-features")) {
5521         if (Attr.getValueAsString().contains("-sse"))
5522           AMD64FpEndOffset = AMD64FpEndOffsetNoSSE;
5523         break;
5524       }
5525     }
5526   }
5527 
5528   ArgKind classifyArgument(Value *arg) {
5529     // A very rough approximation of X86_64 argument classification rules.
5530     Type *T = arg->getType();
5531     if (T->isX86_FP80Ty())
5532       return AK_Memory;
5533     if (T->isFPOrFPVectorTy())
5534       return AK_FloatingPoint;
5535     if (T->isIntegerTy() && T->getPrimitiveSizeInBits() <= 64)
5536       return AK_GeneralPurpose;
5537     if (T->isPointerTy())
5538       return AK_GeneralPurpose;
5539     return AK_Memory;
5540   }
5541 
5542   // For VarArg functions, store the argument shadow in an ABI-specific format
5543   // that corresponds to va_list layout.
5544   // We do this because Clang lowers va_arg in the frontend, and this pass
5545   // only sees the low level code that deals with va_list internals.
5546   // A much easier alternative (provided that Clang emits va_arg instructions)
5547   // would have been to associate each live instance of va_list with a copy of
5548   // MSanParamTLS, and extract shadow on va_arg() call in the argument list
5549   // order.
5550   void visitCallBase(CallBase &CB, IRBuilder<> &IRB) override {
5551     unsigned GpOffset = 0;
5552     unsigned FpOffset = AMD64GpEndOffset;
5553     unsigned OverflowOffset = AMD64FpEndOffset;
5554     const DataLayout &DL = F.getDataLayout();
5555 
5556     for (const auto &[ArgNo, A] : llvm::enumerate(CB.args())) {
5557       bool IsFixed = ArgNo < CB.getFunctionType()->getNumParams();
5558       bool IsByVal = CB.paramHasAttr(ArgNo, Attribute::ByVal);
5559       if (IsByVal) {
5560         // ByVal arguments always go to the overflow area.
5561         // Fixed arguments passed through the overflow area will be stepped
5562         // over by va_start, so don't count them towards the offset.
5563         if (IsFixed)
5564           continue;
5565         assert(A->getType()->isPointerTy());
5566         Type *RealTy = CB.getParamByValType(ArgNo);
5567         uint64_t ArgSize = DL.getTypeAllocSize(RealTy);
5568         uint64_t AlignedSize = alignTo(ArgSize, 8);
5569         unsigned BaseOffset = OverflowOffset;
5570         Value *ShadowBase = getShadowPtrForVAArgument(IRB, OverflowOffset);
5571         Value *OriginBase = nullptr;
5572         if (MS.TrackOrigins)
5573           OriginBase = getOriginPtrForVAArgument(IRB, OverflowOffset);
5574         OverflowOffset += AlignedSize;
5575 
5576         if (OverflowOffset > kParamTLSSize) {
5577           CleanUnusedTLS(IRB, ShadowBase, BaseOffset);
5578           continue; // We have no space to copy shadow there.
5579         }
5580 
5581         Value *ShadowPtr, *OriginPtr;
5582         std::tie(ShadowPtr, OriginPtr) =
5583             MSV.getShadowOriginPtr(A, IRB, IRB.getInt8Ty(), kShadowTLSAlignment,
5584                                    /*isStore*/ false);
5585         IRB.CreateMemCpy(ShadowBase, kShadowTLSAlignment, ShadowPtr,
5586                          kShadowTLSAlignment, ArgSize);
5587         if (MS.TrackOrigins)
5588           IRB.CreateMemCpy(OriginBase, kShadowTLSAlignment, OriginPtr,
5589                            kShadowTLSAlignment, ArgSize);
5590       } else {
5591         ArgKind AK = classifyArgument(A);
5592         if (AK == AK_GeneralPurpose && GpOffset >= AMD64GpEndOffset)
5593           AK = AK_Memory;
5594         if (AK == AK_FloatingPoint && FpOffset >= AMD64FpEndOffset)
5595           AK = AK_Memory;
5596         Value *ShadowBase, *OriginBase = nullptr;
5597         switch (AK) {
5598         case AK_GeneralPurpose:
5599           ShadowBase = getShadowPtrForVAArgument(IRB, GpOffset);
5600           if (MS.TrackOrigins)
5601             OriginBase = getOriginPtrForVAArgument(IRB, GpOffset);
5602           GpOffset += 8;
5603           assert(GpOffset <= kParamTLSSize);
5604           break;
5605         case AK_FloatingPoint:
5606           ShadowBase = getShadowPtrForVAArgument(IRB, FpOffset);
5607           if (MS.TrackOrigins)
5608             OriginBase = getOriginPtrForVAArgument(IRB, FpOffset);
5609           FpOffset += 16;
5610           assert(FpOffset <= kParamTLSSize);
5611           break;
5612         case AK_Memory:
5613           if (IsFixed)
5614             continue;
5615           uint64_t ArgSize = DL.getTypeAllocSize(A->getType());
5616           uint64_t AlignedSize = alignTo(ArgSize, 8);
5617           unsigned BaseOffset = OverflowOffset;
5618           ShadowBase = getShadowPtrForVAArgument(IRB, OverflowOffset);
5619           if (MS.TrackOrigins) {
5620             OriginBase = getOriginPtrForVAArgument(IRB, OverflowOffset);
5621           }
5622           OverflowOffset += AlignedSize;
5623           if (OverflowOffset > kParamTLSSize) {
5624             // We have no space to copy shadow there.
5625             CleanUnusedTLS(IRB, ShadowBase, BaseOffset);
5626             continue;
5627           }
5628         }
5629         // Take fixed arguments into account for GpOffset and FpOffset,
5630         // but don't actually store shadows for them.
5631         // TODO(glider): don't call get*PtrForVAArgument() for them.
5632         if (IsFixed)
5633           continue;
5634         Value *Shadow = MSV.getShadow(A);
5635         IRB.CreateAlignedStore(Shadow, ShadowBase, kShadowTLSAlignment);
5636         if (MS.TrackOrigins) {
5637           Value *Origin = MSV.getOrigin(A);
5638           TypeSize StoreSize = DL.getTypeStoreSize(Shadow->getType());
5639           MSV.paintOrigin(IRB, Origin, OriginBase, StoreSize,
5640                           std::max(kShadowTLSAlignment, kMinOriginAlignment));
5641         }
5642       }
5643     }
5644     Constant *OverflowSize =
5645         ConstantInt::get(IRB.getInt64Ty(), OverflowOffset - AMD64FpEndOffset);
5646     IRB.CreateStore(OverflowSize, MS.VAArgOverflowSizeTLS);
5647   }
5648 
5649   void finalizeInstrumentation() override {
5650     assert(!VAArgOverflowSize && !VAArgTLSCopy &&
5651            "finalizeInstrumentation called twice");
5652     if (!VAStartInstrumentationList.empty()) {
5653       // If there is a va_start in this function, make a backup copy of
5654       // va_arg_tls somewhere in the function entry block.
5655       IRBuilder<> IRB(MSV.FnPrologueEnd);
5656       VAArgOverflowSize =
5657           IRB.CreateLoad(IRB.getInt64Ty(), MS.VAArgOverflowSizeTLS);
5658       Value *CopySize = IRB.CreateAdd(
5659           ConstantInt::get(MS.IntptrTy, AMD64FpEndOffset), VAArgOverflowSize);
5660       VAArgTLSCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
5661       VAArgTLSCopy->setAlignment(kShadowTLSAlignment);
5662       IRB.CreateMemSet(VAArgTLSCopy, Constant::getNullValue(IRB.getInt8Ty()),
5663                        CopySize, kShadowTLSAlignment, false);
5664 
5665       Value *SrcSize = IRB.CreateBinaryIntrinsic(
5666           Intrinsic::umin, CopySize,
5667           ConstantInt::get(MS.IntptrTy, kParamTLSSize));
5668       IRB.CreateMemCpy(VAArgTLSCopy, kShadowTLSAlignment, MS.VAArgTLS,
5669                        kShadowTLSAlignment, SrcSize);
5670       if (MS.TrackOrigins) {
5671         VAArgTLSOriginCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
5672         VAArgTLSOriginCopy->setAlignment(kShadowTLSAlignment);
5673         IRB.CreateMemCpy(VAArgTLSOriginCopy, kShadowTLSAlignment,
5674                          MS.VAArgOriginTLS, kShadowTLSAlignment, SrcSize);
5675       }
5676     }
5677 
5678     // Instrument va_start.
5679     // Copy va_list shadow from the backup copy of the TLS contents.
5680     for (CallInst *OrigInst : VAStartInstrumentationList) {
5681       NextNodeIRBuilder IRB(OrigInst);
5682       Value *VAListTag = OrigInst->getArgOperand(0);
5683 
5684       Value *RegSaveAreaPtrPtr = IRB.CreateIntToPtr(
5685           IRB.CreateAdd(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
5686                         ConstantInt::get(MS.IntptrTy, 16)),
5687           MS.PtrTy);
5688       Value *RegSaveAreaPtr = IRB.CreateLoad(MS.PtrTy, RegSaveAreaPtrPtr);
5689       Value *RegSaveAreaShadowPtr, *RegSaveAreaOriginPtr;
5690       const Align Alignment = Align(16);
5691       std::tie(RegSaveAreaShadowPtr, RegSaveAreaOriginPtr) =
5692           MSV.getShadowOriginPtr(RegSaveAreaPtr, IRB, IRB.getInt8Ty(),
5693                                  Alignment, /*isStore*/ true);
5694       IRB.CreateMemCpy(RegSaveAreaShadowPtr, Alignment, VAArgTLSCopy, Alignment,
5695                        AMD64FpEndOffset);
5696       if (MS.TrackOrigins)
5697         IRB.CreateMemCpy(RegSaveAreaOriginPtr, Alignment, VAArgTLSOriginCopy,
5698                          Alignment, AMD64FpEndOffset);
5699       Value *OverflowArgAreaPtrPtr = IRB.CreateIntToPtr(
5700           IRB.CreateAdd(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
5701                         ConstantInt::get(MS.IntptrTy, 8)),
5702           MS.PtrTy);
5703       Value *OverflowArgAreaPtr =
5704           IRB.CreateLoad(MS.PtrTy, OverflowArgAreaPtrPtr);
5705       Value *OverflowArgAreaShadowPtr, *OverflowArgAreaOriginPtr;
5706       std::tie(OverflowArgAreaShadowPtr, OverflowArgAreaOriginPtr) =
5707           MSV.getShadowOriginPtr(OverflowArgAreaPtr, IRB, IRB.getInt8Ty(),
5708                                  Alignment, /*isStore*/ true);
5709       Value *SrcPtr = IRB.CreateConstGEP1_32(IRB.getInt8Ty(), VAArgTLSCopy,
5710                                              AMD64FpEndOffset);
5711       IRB.CreateMemCpy(OverflowArgAreaShadowPtr, Alignment, SrcPtr, Alignment,
5712                        VAArgOverflowSize);
5713       if (MS.TrackOrigins) {
5714         SrcPtr = IRB.CreateConstGEP1_32(IRB.getInt8Ty(), VAArgTLSOriginCopy,
5715                                         AMD64FpEndOffset);
5716         IRB.CreateMemCpy(OverflowArgAreaOriginPtr, Alignment, SrcPtr, Alignment,
5717                          VAArgOverflowSize);
5718       }
5719     }
5720   }
5721 };
5722 
5723 /// AArch64-specific implementation of VarArgHelper.
5724 struct VarArgAArch64Helper : public VarArgHelperBase {
5725   static const unsigned kAArch64GrArgSize = 64;
5726   static const unsigned kAArch64VrArgSize = 128;
5727 
5728   static const unsigned AArch64GrBegOffset = 0;
5729   static const unsigned AArch64GrEndOffset = kAArch64GrArgSize;
5730   // Make VR space aligned to 16 bytes.
5731   static const unsigned AArch64VrBegOffset = AArch64GrEndOffset;
5732   static const unsigned AArch64VrEndOffset =
5733       AArch64VrBegOffset + kAArch64VrArgSize;
5734   static const unsigned AArch64VAEndOffset = AArch64VrEndOffset;
5735 
5736   AllocaInst *VAArgTLSCopy = nullptr;
5737   Value *VAArgOverflowSize = nullptr;
5738 
5739   enum ArgKind { AK_GeneralPurpose, AK_FloatingPoint, AK_Memory };
5740 
5741   VarArgAArch64Helper(Function &F, MemorySanitizer &MS,
5742                       MemorySanitizerVisitor &MSV)
5743       : VarArgHelperBase(F, MS, MSV, /*VAListTagSize=*/32) {}
5744 
5745   // A very rough approximation of aarch64 argument classification rules.
5746   std::pair<ArgKind, uint64_t> classifyArgument(Type *T) {
5747     if (T->isIntOrPtrTy() && T->getPrimitiveSizeInBits() <= 64)
5748       return {AK_GeneralPurpose, 1};
5749     if (T->isFloatingPointTy() && T->getPrimitiveSizeInBits() <= 128)
5750       return {AK_FloatingPoint, 1};
5751 
5752     if (T->isArrayTy()) {
5753       auto R = classifyArgument(T->getArrayElementType());
5754       R.second *= T->getScalarType()->getArrayNumElements();
5755       return R;
5756     }
5757 
5758     if (const FixedVectorType *FV = dyn_cast<FixedVectorType>(T)) {
5759       auto R = classifyArgument(FV->getScalarType());
5760       R.second *= FV->getNumElements();
5761       return R;
5762     }
5763 
5764     LLVM_DEBUG(errs() << "Unknown vararg type: " << *T << "\n");
5765     return {AK_Memory, 0};
5766   }
5767 
5768   // The instrumentation stores the argument shadow in a non ABI-specific
5769   // format because it does not know which argument is named (since Clang,
5770   // like x86_64 case, lowers the va_args in the frontend and this pass only
5771   // sees the low level code that deals with va_list internals).
5772   // The first seven GR registers are saved in the first 56 bytes of the
5773   // va_arg tls arra, followed by the first 8 FP/SIMD registers, and then
5774   // the remaining arguments.
5775   // Using constant offset within the va_arg TLS array allows fast copy
5776   // in the finalize instrumentation.
5777   void visitCallBase(CallBase &CB, IRBuilder<> &IRB) override {
5778     unsigned GrOffset = AArch64GrBegOffset;
5779     unsigned VrOffset = AArch64VrBegOffset;
5780     unsigned OverflowOffset = AArch64VAEndOffset;
5781 
5782     const DataLayout &DL = F.getDataLayout();
5783     for (const auto &[ArgNo, A] : llvm::enumerate(CB.args())) {
5784       bool IsFixed = ArgNo < CB.getFunctionType()->getNumParams();
5785       auto [AK, RegNum] = classifyArgument(A->getType());
5786       if (AK == AK_GeneralPurpose &&
5787           (GrOffset + RegNum * 8) > AArch64GrEndOffset)
5788         AK = AK_Memory;
5789       if (AK == AK_FloatingPoint &&
5790           (VrOffset + RegNum * 16) > AArch64VrEndOffset)
5791         AK = AK_Memory;
5792       Value *Base;
5793       switch (AK) {
5794       case AK_GeneralPurpose:
5795         Base = getShadowPtrForVAArgument(IRB, GrOffset);
5796         GrOffset += 8 * RegNum;
5797         break;
5798       case AK_FloatingPoint:
5799         Base = getShadowPtrForVAArgument(IRB, VrOffset);
5800         VrOffset += 16 * RegNum;
5801         break;
5802       case AK_Memory:
5803         // Don't count fixed arguments in the overflow area - va_start will
5804         // skip right over them.
5805         if (IsFixed)
5806           continue;
5807         uint64_t ArgSize = DL.getTypeAllocSize(A->getType());
5808         uint64_t AlignedSize = alignTo(ArgSize, 8);
5809         unsigned BaseOffset = OverflowOffset;
5810         Base = getShadowPtrForVAArgument(IRB, BaseOffset);
5811         OverflowOffset += AlignedSize;
5812         if (OverflowOffset > kParamTLSSize) {
5813           // We have no space to copy shadow there.
5814           CleanUnusedTLS(IRB, Base, BaseOffset);
5815           continue;
5816         }
5817         break;
5818       }
5819       // Count Gp/Vr fixed arguments to their respective offsets, but don't
5820       // bother to actually store a shadow.
5821       if (IsFixed)
5822         continue;
5823       IRB.CreateAlignedStore(MSV.getShadow(A), Base, kShadowTLSAlignment);
5824     }
5825     Constant *OverflowSize =
5826         ConstantInt::get(IRB.getInt64Ty(), OverflowOffset - AArch64VAEndOffset);
5827     IRB.CreateStore(OverflowSize, MS.VAArgOverflowSizeTLS);
5828   }
5829 
5830   // Retrieve a va_list field of 'void*' size.
5831   Value *getVAField64(IRBuilder<> &IRB, Value *VAListTag, int offset) {
5832     Value *SaveAreaPtrPtr = IRB.CreateIntToPtr(
5833         IRB.CreateAdd(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
5834                       ConstantInt::get(MS.IntptrTy, offset)),
5835         MS.PtrTy);
5836     return IRB.CreateLoad(Type::getInt64Ty(*MS.C), SaveAreaPtrPtr);
5837   }
5838 
5839   // Retrieve a va_list field of 'int' size.
5840   Value *getVAField32(IRBuilder<> &IRB, Value *VAListTag, int offset) {
5841     Value *SaveAreaPtr = IRB.CreateIntToPtr(
5842         IRB.CreateAdd(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
5843                       ConstantInt::get(MS.IntptrTy, offset)),
5844         MS.PtrTy);
5845     Value *SaveArea32 = IRB.CreateLoad(IRB.getInt32Ty(), SaveAreaPtr);
5846     return IRB.CreateSExt(SaveArea32, MS.IntptrTy);
5847   }
5848 
5849   void finalizeInstrumentation() override {
5850     assert(!VAArgOverflowSize && !VAArgTLSCopy &&
5851            "finalizeInstrumentation called twice");
5852     if (!VAStartInstrumentationList.empty()) {
5853       // If there is a va_start in this function, make a backup copy of
5854       // va_arg_tls somewhere in the function entry block.
5855       IRBuilder<> IRB(MSV.FnPrologueEnd);
5856       VAArgOverflowSize =
5857           IRB.CreateLoad(IRB.getInt64Ty(), MS.VAArgOverflowSizeTLS);
5858       Value *CopySize = IRB.CreateAdd(
5859           ConstantInt::get(MS.IntptrTy, AArch64VAEndOffset), VAArgOverflowSize);
5860       VAArgTLSCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
5861       VAArgTLSCopy->setAlignment(kShadowTLSAlignment);
5862       IRB.CreateMemSet(VAArgTLSCopy, Constant::getNullValue(IRB.getInt8Ty()),
5863                        CopySize, kShadowTLSAlignment, false);
5864 
5865       Value *SrcSize = IRB.CreateBinaryIntrinsic(
5866           Intrinsic::umin, CopySize,
5867           ConstantInt::get(MS.IntptrTy, kParamTLSSize));
5868       IRB.CreateMemCpy(VAArgTLSCopy, kShadowTLSAlignment, MS.VAArgTLS,
5869                        kShadowTLSAlignment, SrcSize);
5870     }
5871 
5872     Value *GrArgSize = ConstantInt::get(MS.IntptrTy, kAArch64GrArgSize);
5873     Value *VrArgSize = ConstantInt::get(MS.IntptrTy, kAArch64VrArgSize);
5874 
5875     // Instrument va_start, copy va_list shadow from the backup copy of
5876     // the TLS contents.
5877     for (CallInst *OrigInst : VAStartInstrumentationList) {
5878       NextNodeIRBuilder IRB(OrigInst);
5879 
5880       Value *VAListTag = OrigInst->getArgOperand(0);
5881 
5882       // The variadic ABI for AArch64 creates two areas to save the incoming
5883       // argument registers (one for 64-bit general register xn-x7 and another
5884       // for 128-bit FP/SIMD vn-v7).
5885       // We need then to propagate the shadow arguments on both regions
5886       // 'va::__gr_top + va::__gr_offs' and 'va::__vr_top + va::__vr_offs'.
5887       // The remaining arguments are saved on shadow for 'va::stack'.
5888       // One caveat is it requires only to propagate the non-named arguments,
5889       // however on the call site instrumentation 'all' the arguments are
5890       // saved. So to copy the shadow values from the va_arg TLS array
5891       // we need to adjust the offset for both GR and VR fields based on
5892       // the __{gr,vr}_offs value (since they are stores based on incoming
5893       // named arguments).
5894       Type *RegSaveAreaPtrTy = IRB.getPtrTy();
5895 
5896       // Read the stack pointer from the va_list.
5897       Value *StackSaveAreaPtr =
5898           IRB.CreateIntToPtr(getVAField64(IRB, VAListTag, 0), RegSaveAreaPtrTy);
5899 
5900       // Read both the __gr_top and __gr_off and add them up.
5901       Value *GrTopSaveAreaPtr = getVAField64(IRB, VAListTag, 8);
5902       Value *GrOffSaveArea = getVAField32(IRB, VAListTag, 24);
5903 
5904       Value *GrRegSaveAreaPtr = IRB.CreateIntToPtr(
5905           IRB.CreateAdd(GrTopSaveAreaPtr, GrOffSaveArea), RegSaveAreaPtrTy);
5906 
5907       // Read both the __vr_top and __vr_off and add them up.
5908       Value *VrTopSaveAreaPtr = getVAField64(IRB, VAListTag, 16);
5909       Value *VrOffSaveArea = getVAField32(IRB, VAListTag, 28);
5910 
5911       Value *VrRegSaveAreaPtr = IRB.CreateIntToPtr(
5912           IRB.CreateAdd(VrTopSaveAreaPtr, VrOffSaveArea), RegSaveAreaPtrTy);
5913 
5914       // It does not know how many named arguments is being used and, on the
5915       // callsite all the arguments were saved.  Since __gr_off is defined as
5916       // '0 - ((8 - named_gr) * 8)', the idea is to just propagate the variadic
5917       // argument by ignoring the bytes of shadow from named arguments.
5918       Value *GrRegSaveAreaShadowPtrOff =
5919           IRB.CreateAdd(GrArgSize, GrOffSaveArea);
5920 
5921       Value *GrRegSaveAreaShadowPtr =
5922           MSV.getShadowOriginPtr(GrRegSaveAreaPtr, IRB, IRB.getInt8Ty(),
5923                                  Align(8), /*isStore*/ true)
5924               .first;
5925 
5926       Value *GrSrcPtr =
5927           IRB.CreateInBoundsPtrAdd(VAArgTLSCopy, GrRegSaveAreaShadowPtrOff);
5928       Value *GrCopySize = IRB.CreateSub(GrArgSize, GrRegSaveAreaShadowPtrOff);
5929 
5930       IRB.CreateMemCpy(GrRegSaveAreaShadowPtr, Align(8), GrSrcPtr, Align(8),
5931                        GrCopySize);
5932 
5933       // Again, but for FP/SIMD values.
5934       Value *VrRegSaveAreaShadowPtrOff =
5935           IRB.CreateAdd(VrArgSize, VrOffSaveArea);
5936 
5937       Value *VrRegSaveAreaShadowPtr =
5938           MSV.getShadowOriginPtr(VrRegSaveAreaPtr, IRB, IRB.getInt8Ty(),
5939                                  Align(8), /*isStore*/ true)
5940               .first;
5941 
5942       Value *VrSrcPtr = IRB.CreateInBoundsPtrAdd(
5943           IRB.CreateInBoundsPtrAdd(VAArgTLSCopy,
5944                                    IRB.getInt32(AArch64VrBegOffset)),
5945           VrRegSaveAreaShadowPtrOff);
5946       Value *VrCopySize = IRB.CreateSub(VrArgSize, VrRegSaveAreaShadowPtrOff);
5947 
5948       IRB.CreateMemCpy(VrRegSaveAreaShadowPtr, Align(8), VrSrcPtr, Align(8),
5949                        VrCopySize);
5950 
5951       // And finally for remaining arguments.
5952       Value *StackSaveAreaShadowPtr =
5953           MSV.getShadowOriginPtr(StackSaveAreaPtr, IRB, IRB.getInt8Ty(),
5954                                  Align(16), /*isStore*/ true)
5955               .first;
5956 
5957       Value *StackSrcPtr = IRB.CreateInBoundsPtrAdd(
5958           VAArgTLSCopy, IRB.getInt32(AArch64VAEndOffset));
5959 
5960       IRB.CreateMemCpy(StackSaveAreaShadowPtr, Align(16), StackSrcPtr,
5961                        Align(16), VAArgOverflowSize);
5962     }
5963   }
5964 };
5965 
5966 /// PowerPC-specific implementation of VarArgHelper.
5967 struct VarArgPowerPCHelper : public VarArgHelperBase {
5968   AllocaInst *VAArgTLSCopy = nullptr;
5969   Value *VAArgSize = nullptr;
5970 
5971   VarArgPowerPCHelper(Function &F, MemorySanitizer &MS,
5972                       MemorySanitizerVisitor &MSV, unsigned VAListTagSize)
5973       : VarArgHelperBase(F, MS, MSV, VAListTagSize) {}
5974 
5975   void visitCallBase(CallBase &CB, IRBuilder<> &IRB) override {
5976     // For PowerPC, we need to deal with alignment of stack arguments -
5977     // they are mostly aligned to 8 bytes, but vectors and i128 arrays
5978     // are aligned to 16 bytes, byvals can be aligned to 8 or 16 bytes,
5979     // For that reason, we compute current offset from stack pointer (which is
5980     // always properly aligned), and offset for the first vararg, then subtract
5981     // them.
5982     unsigned VAArgBase;
5983     Triple TargetTriple(F.getParent()->getTargetTriple());
5984     // Parameter save area starts at 48 bytes from frame pointer for ABIv1,
5985     // and 32 bytes for ABIv2.  This is usually determined by target
5986     // endianness, but in theory could be overridden by function attribute.
5987     if (TargetTriple.isPPC64()) {
5988       if (TargetTriple.isPPC64ELFv2ABI())
5989         VAArgBase = 32;
5990       else
5991         VAArgBase = 48;
5992     } else {
5993       // Parameter save area is 8 bytes from frame pointer in PPC32
5994       VAArgBase = 8;
5995     }
5996     unsigned VAArgOffset = VAArgBase;
5997     const DataLayout &DL = F.getDataLayout();
5998     for (const auto &[ArgNo, A] : llvm::enumerate(CB.args())) {
5999       bool IsFixed = ArgNo < CB.getFunctionType()->getNumParams();
6000       bool IsByVal = CB.paramHasAttr(ArgNo, Attribute::ByVal);
6001       if (IsByVal) {
6002         assert(A->getType()->isPointerTy());
6003         Type *RealTy = CB.getParamByValType(ArgNo);
6004         uint64_t ArgSize = DL.getTypeAllocSize(RealTy);
6005         Align ArgAlign = CB.getParamAlign(ArgNo).value_or(Align(8));
6006         if (ArgAlign < 8)
6007           ArgAlign = Align(8);
6008         VAArgOffset = alignTo(VAArgOffset, ArgAlign);
6009         if (!IsFixed) {
6010           Value *Base =
6011               getShadowPtrForVAArgument(IRB, VAArgOffset - VAArgBase, ArgSize);
6012           if (Base) {
6013             Value *AShadowPtr, *AOriginPtr;
6014             std::tie(AShadowPtr, AOriginPtr) =
6015                 MSV.getShadowOriginPtr(A, IRB, IRB.getInt8Ty(),
6016                                        kShadowTLSAlignment, /*isStore*/ false);
6017 
6018             IRB.CreateMemCpy(Base, kShadowTLSAlignment, AShadowPtr,
6019                              kShadowTLSAlignment, ArgSize);
6020           }
6021         }
6022         VAArgOffset += alignTo(ArgSize, Align(8));
6023       } else {
6024         Value *Base;
6025         uint64_t ArgSize = DL.getTypeAllocSize(A->getType());
6026         Align ArgAlign = Align(8);
6027         if (A->getType()->isArrayTy()) {
6028           // Arrays are aligned to element size, except for long double
6029           // arrays, which are aligned to 8 bytes.
6030           Type *ElementTy = A->getType()->getArrayElementType();
6031           if (!ElementTy->isPPC_FP128Ty())
6032             ArgAlign = Align(DL.getTypeAllocSize(ElementTy));
6033         } else if (A->getType()->isVectorTy()) {
6034           // Vectors are naturally aligned.
6035           ArgAlign = Align(ArgSize);
6036         }
6037         if (ArgAlign < 8)
6038           ArgAlign = Align(8);
6039         VAArgOffset = alignTo(VAArgOffset, ArgAlign);
6040         if (DL.isBigEndian()) {
6041           // Adjusting the shadow for argument with size < 8 to match the
6042           // placement of bits in big endian system
6043           if (ArgSize < 8)
6044             VAArgOffset += (8 - ArgSize);
6045         }
6046         if (!IsFixed) {
6047           Base =
6048               getShadowPtrForVAArgument(IRB, VAArgOffset - VAArgBase, ArgSize);
6049           if (Base)
6050             IRB.CreateAlignedStore(MSV.getShadow(A), Base, kShadowTLSAlignment);
6051         }
6052         VAArgOffset += ArgSize;
6053         VAArgOffset = alignTo(VAArgOffset, Align(8));
6054       }
6055       if (IsFixed)
6056         VAArgBase = VAArgOffset;
6057     }
6058 
6059     Constant *TotalVAArgSize =
6060         ConstantInt::get(MS.IntptrTy, VAArgOffset - VAArgBase);
6061     // Here using VAArgOverflowSizeTLS as VAArgSizeTLS to avoid creation of
6062     // a new class member i.e. it is the total size of all VarArgs.
6063     IRB.CreateStore(TotalVAArgSize, MS.VAArgOverflowSizeTLS);
6064   }
6065 
6066   void finalizeInstrumentation() override {
6067     assert(!VAArgSize && !VAArgTLSCopy &&
6068            "finalizeInstrumentation called twice");
6069     IRBuilder<> IRB(MSV.FnPrologueEnd);
6070     VAArgSize = IRB.CreateLoad(IRB.getInt64Ty(), MS.VAArgOverflowSizeTLS);
6071     Value *CopySize = VAArgSize;
6072 
6073     if (!VAStartInstrumentationList.empty()) {
6074       // If there is a va_start in this function, make a backup copy of
6075       // va_arg_tls somewhere in the function entry block.
6076 
6077       VAArgTLSCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
6078       VAArgTLSCopy->setAlignment(kShadowTLSAlignment);
6079       IRB.CreateMemSet(VAArgTLSCopy, Constant::getNullValue(IRB.getInt8Ty()),
6080                        CopySize, kShadowTLSAlignment, false);
6081 
6082       Value *SrcSize = IRB.CreateBinaryIntrinsic(
6083           Intrinsic::umin, CopySize,
6084           ConstantInt::get(IRB.getInt64Ty(), kParamTLSSize));
6085       IRB.CreateMemCpy(VAArgTLSCopy, kShadowTLSAlignment, MS.VAArgTLS,
6086                        kShadowTLSAlignment, SrcSize);
6087     }
6088 
6089     // Instrument va_start.
6090     // Copy va_list shadow from the backup copy of the TLS contents.
6091     Triple TargetTriple(F.getParent()->getTargetTriple());
6092     for (CallInst *OrigInst : VAStartInstrumentationList) {
6093       NextNodeIRBuilder IRB(OrigInst);
6094       Value *VAListTag = OrigInst->getArgOperand(0);
6095       Value *RegSaveAreaPtrPtr = IRB.CreatePtrToInt(VAListTag, MS.IntptrTy);
6096 
6097       // In PPC32 va_list_tag is a struct, whereas in PPC64 it's a pointer
6098       if (!TargetTriple.isPPC64()) {
6099         RegSaveAreaPtrPtr =
6100             IRB.CreateAdd(RegSaveAreaPtrPtr, ConstantInt::get(MS.IntptrTy, 8));
6101       }
6102       RegSaveAreaPtrPtr = IRB.CreateIntToPtr(RegSaveAreaPtrPtr, MS.PtrTy);
6103 
6104       Value *RegSaveAreaPtr = IRB.CreateLoad(MS.PtrTy, RegSaveAreaPtrPtr);
6105       Value *RegSaveAreaShadowPtr, *RegSaveAreaOriginPtr;
6106       const DataLayout &DL = F.getDataLayout();
6107       unsigned IntptrSize = DL.getTypeStoreSize(MS.IntptrTy);
6108       const Align Alignment = Align(IntptrSize);
6109       std::tie(RegSaveAreaShadowPtr, RegSaveAreaOriginPtr) =
6110           MSV.getShadowOriginPtr(RegSaveAreaPtr, IRB, IRB.getInt8Ty(),
6111                                  Alignment, /*isStore*/ true);
6112       IRB.CreateMemCpy(RegSaveAreaShadowPtr, Alignment, VAArgTLSCopy, Alignment,
6113                        CopySize);
6114     }
6115   }
6116 };
6117 
6118 /// SystemZ-specific implementation of VarArgHelper.
6119 struct VarArgSystemZHelper : public VarArgHelperBase {
6120   static const unsigned SystemZGpOffset = 16;
6121   static const unsigned SystemZGpEndOffset = 56;
6122   static const unsigned SystemZFpOffset = 128;
6123   static const unsigned SystemZFpEndOffset = 160;
6124   static const unsigned SystemZMaxVrArgs = 8;
6125   static const unsigned SystemZRegSaveAreaSize = 160;
6126   static const unsigned SystemZOverflowOffset = 160;
6127   static const unsigned SystemZVAListTagSize = 32;
6128   static const unsigned SystemZOverflowArgAreaPtrOffset = 16;
6129   static const unsigned SystemZRegSaveAreaPtrOffset = 24;
6130 
6131   bool IsSoftFloatABI;
6132   AllocaInst *VAArgTLSCopy = nullptr;
6133   AllocaInst *VAArgTLSOriginCopy = nullptr;
6134   Value *VAArgOverflowSize = nullptr;
6135 
6136   enum class ArgKind {
6137     GeneralPurpose,
6138     FloatingPoint,
6139     Vector,
6140     Memory,
6141     Indirect,
6142   };
6143 
6144   enum class ShadowExtension { None, Zero, Sign };
6145 
6146   VarArgSystemZHelper(Function &F, MemorySanitizer &MS,
6147                       MemorySanitizerVisitor &MSV)
6148       : VarArgHelperBase(F, MS, MSV, SystemZVAListTagSize),
6149         IsSoftFloatABI(F.getFnAttribute("use-soft-float").getValueAsBool()) {}
6150 
6151   ArgKind classifyArgument(Type *T) {
6152     // T is a SystemZABIInfo::classifyArgumentType() output, and there are
6153     // only a few possibilities of what it can be. In particular, enums, single
6154     // element structs and large types have already been taken care of.
6155 
6156     // Some i128 and fp128 arguments are converted to pointers only in the
6157     // back end.
6158     if (T->isIntegerTy(128) || T->isFP128Ty())
6159       return ArgKind::Indirect;
6160     if (T->isFloatingPointTy())
6161       return IsSoftFloatABI ? ArgKind::GeneralPurpose : ArgKind::FloatingPoint;
6162     if (T->isIntegerTy() || T->isPointerTy())
6163       return ArgKind::GeneralPurpose;
6164     if (T->isVectorTy())
6165       return ArgKind::Vector;
6166     return ArgKind::Memory;
6167   }
6168 
6169   ShadowExtension getShadowExtension(const CallBase &CB, unsigned ArgNo) {
6170     // ABI says: "One of the simple integer types no more than 64 bits wide.
6171     // ... If such an argument is shorter than 64 bits, replace it by a full
6172     // 64-bit integer representing the same number, using sign or zero
6173     // extension". Shadow for an integer argument has the same type as the
6174     // argument itself, so it can be sign or zero extended as well.
6175     bool ZExt = CB.paramHasAttr(ArgNo, Attribute::ZExt);
6176     bool SExt = CB.paramHasAttr(ArgNo, Attribute::SExt);
6177     if (ZExt) {
6178       assert(!SExt);
6179       return ShadowExtension::Zero;
6180     }
6181     if (SExt) {
6182       assert(!ZExt);
6183       return ShadowExtension::Sign;
6184     }
6185     return ShadowExtension::None;
6186   }
6187 
6188   void visitCallBase(CallBase &CB, IRBuilder<> &IRB) override {
6189     unsigned GpOffset = SystemZGpOffset;
6190     unsigned FpOffset = SystemZFpOffset;
6191     unsigned VrIndex = 0;
6192     unsigned OverflowOffset = SystemZOverflowOffset;
6193     const DataLayout &DL = F.getDataLayout();
6194     for (const auto &[ArgNo, A] : llvm::enumerate(CB.args())) {
6195       bool IsFixed = ArgNo < CB.getFunctionType()->getNumParams();
6196       // SystemZABIInfo does not produce ByVal parameters.
6197       assert(!CB.paramHasAttr(ArgNo, Attribute::ByVal));
6198       Type *T = A->getType();
6199       ArgKind AK = classifyArgument(T);
6200       if (AK == ArgKind::Indirect) {
6201         T = MS.PtrTy;
6202         AK = ArgKind::GeneralPurpose;
6203       }
6204       if (AK == ArgKind::GeneralPurpose && GpOffset >= SystemZGpEndOffset)
6205         AK = ArgKind::Memory;
6206       if (AK == ArgKind::FloatingPoint && FpOffset >= SystemZFpEndOffset)
6207         AK = ArgKind::Memory;
6208       if (AK == ArgKind::Vector && (VrIndex >= SystemZMaxVrArgs || !IsFixed))
6209         AK = ArgKind::Memory;
6210       Value *ShadowBase = nullptr;
6211       Value *OriginBase = nullptr;
6212       ShadowExtension SE = ShadowExtension::None;
6213       switch (AK) {
6214       case ArgKind::GeneralPurpose: {
6215         // Always keep track of GpOffset, but store shadow only for varargs.
6216         uint64_t ArgSize = 8;
6217         if (GpOffset + ArgSize <= kParamTLSSize) {
6218           if (!IsFixed) {
6219             SE = getShadowExtension(CB, ArgNo);
6220             uint64_t GapSize = 0;
6221             if (SE == ShadowExtension::None) {
6222               uint64_t ArgAllocSize = DL.getTypeAllocSize(T);
6223               assert(ArgAllocSize <= ArgSize);
6224               GapSize = ArgSize - ArgAllocSize;
6225             }
6226             ShadowBase = getShadowAddrForVAArgument(IRB, GpOffset + GapSize);
6227             if (MS.TrackOrigins)
6228               OriginBase = getOriginPtrForVAArgument(IRB, GpOffset + GapSize);
6229           }
6230           GpOffset += ArgSize;
6231         } else {
6232           GpOffset = kParamTLSSize;
6233         }
6234         break;
6235       }
6236       case ArgKind::FloatingPoint: {
6237         // Always keep track of FpOffset, but store shadow only for varargs.
6238         uint64_t ArgSize = 8;
6239         if (FpOffset + ArgSize <= kParamTLSSize) {
6240           if (!IsFixed) {
6241             // PoP says: "A short floating-point datum requires only the
6242             // left-most 32 bit positions of a floating-point register".
6243             // Therefore, in contrast to AK_GeneralPurpose and AK_Memory,
6244             // don't extend shadow and don't mind the gap.
6245             ShadowBase = getShadowAddrForVAArgument(IRB, FpOffset);
6246             if (MS.TrackOrigins)
6247               OriginBase = getOriginPtrForVAArgument(IRB, FpOffset);
6248           }
6249           FpOffset += ArgSize;
6250         } else {
6251           FpOffset = kParamTLSSize;
6252         }
6253         break;
6254       }
6255       case ArgKind::Vector: {
6256         // Keep track of VrIndex. No need to store shadow, since vector varargs
6257         // go through AK_Memory.
6258         assert(IsFixed);
6259         VrIndex++;
6260         break;
6261       }
6262       case ArgKind::Memory: {
6263         // Keep track of OverflowOffset and store shadow only for varargs.
6264         // Ignore fixed args, since we need to copy only the vararg portion of
6265         // the overflow area shadow.
6266         if (!IsFixed) {
6267           uint64_t ArgAllocSize = DL.getTypeAllocSize(T);
6268           uint64_t ArgSize = alignTo(ArgAllocSize, 8);
6269           if (OverflowOffset + ArgSize <= kParamTLSSize) {
6270             SE = getShadowExtension(CB, ArgNo);
6271             uint64_t GapSize =
6272                 SE == ShadowExtension::None ? ArgSize - ArgAllocSize : 0;
6273             ShadowBase =
6274                 getShadowAddrForVAArgument(IRB, OverflowOffset + GapSize);
6275             if (MS.TrackOrigins)
6276               OriginBase =
6277                   getOriginPtrForVAArgument(IRB, OverflowOffset + GapSize);
6278             OverflowOffset += ArgSize;
6279           } else {
6280             OverflowOffset = kParamTLSSize;
6281           }
6282         }
6283         break;
6284       }
6285       case ArgKind::Indirect:
6286         llvm_unreachable("Indirect must be converted to GeneralPurpose");
6287       }
6288       if (ShadowBase == nullptr)
6289         continue;
6290       Value *Shadow = MSV.getShadow(A);
6291       if (SE != ShadowExtension::None)
6292         Shadow = MSV.CreateShadowCast(IRB, Shadow, IRB.getInt64Ty(),
6293                                       /*Signed*/ SE == ShadowExtension::Sign);
6294       ShadowBase = IRB.CreateIntToPtr(ShadowBase, MS.PtrTy, "_msarg_va_s");
6295       IRB.CreateStore(Shadow, ShadowBase);
6296       if (MS.TrackOrigins) {
6297         Value *Origin = MSV.getOrigin(A);
6298         TypeSize StoreSize = DL.getTypeStoreSize(Shadow->getType());
6299         MSV.paintOrigin(IRB, Origin, OriginBase, StoreSize,
6300                         kMinOriginAlignment);
6301       }
6302     }
6303     Constant *OverflowSize = ConstantInt::get(
6304         IRB.getInt64Ty(), OverflowOffset - SystemZOverflowOffset);
6305     IRB.CreateStore(OverflowSize, MS.VAArgOverflowSizeTLS);
6306   }
6307 
6308   void copyRegSaveArea(IRBuilder<> &IRB, Value *VAListTag) {
6309     Value *RegSaveAreaPtrPtr = IRB.CreateIntToPtr(
6310         IRB.CreateAdd(
6311             IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
6312             ConstantInt::get(MS.IntptrTy, SystemZRegSaveAreaPtrOffset)),
6313         MS.PtrTy);
6314     Value *RegSaveAreaPtr = IRB.CreateLoad(MS.PtrTy, RegSaveAreaPtrPtr);
6315     Value *RegSaveAreaShadowPtr, *RegSaveAreaOriginPtr;
6316     const Align Alignment = Align(8);
6317     std::tie(RegSaveAreaShadowPtr, RegSaveAreaOriginPtr) =
6318         MSV.getShadowOriginPtr(RegSaveAreaPtr, IRB, IRB.getInt8Ty(), Alignment,
6319                                /*isStore*/ true);
6320     // TODO(iii): copy only fragments filled by visitCallBase()
6321     // TODO(iii): support packed-stack && !use-soft-float
6322     // For use-soft-float functions, it is enough to copy just the GPRs.
6323     unsigned RegSaveAreaSize =
6324         IsSoftFloatABI ? SystemZGpEndOffset : SystemZRegSaveAreaSize;
6325     IRB.CreateMemCpy(RegSaveAreaShadowPtr, Alignment, VAArgTLSCopy, Alignment,
6326                      RegSaveAreaSize);
6327     if (MS.TrackOrigins)
6328       IRB.CreateMemCpy(RegSaveAreaOriginPtr, Alignment, VAArgTLSOriginCopy,
6329                        Alignment, RegSaveAreaSize);
6330   }
6331 
6332   // FIXME: This implementation limits OverflowOffset to kParamTLSSize, so we
6333   // don't know real overflow size and can't clear shadow beyond kParamTLSSize.
6334   void copyOverflowArea(IRBuilder<> &IRB, Value *VAListTag) {
6335     Value *OverflowArgAreaPtrPtr = IRB.CreateIntToPtr(
6336         IRB.CreateAdd(
6337             IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
6338             ConstantInt::get(MS.IntptrTy, SystemZOverflowArgAreaPtrOffset)),
6339         MS.PtrTy);
6340     Value *OverflowArgAreaPtr = IRB.CreateLoad(MS.PtrTy, OverflowArgAreaPtrPtr);
6341     Value *OverflowArgAreaShadowPtr, *OverflowArgAreaOriginPtr;
6342     const Align Alignment = Align(8);
6343     std::tie(OverflowArgAreaShadowPtr, OverflowArgAreaOriginPtr) =
6344         MSV.getShadowOriginPtr(OverflowArgAreaPtr, IRB, IRB.getInt8Ty(),
6345                                Alignment, /*isStore*/ true);
6346     Value *SrcPtr = IRB.CreateConstGEP1_32(IRB.getInt8Ty(), VAArgTLSCopy,
6347                                            SystemZOverflowOffset);
6348     IRB.CreateMemCpy(OverflowArgAreaShadowPtr, Alignment, SrcPtr, Alignment,
6349                      VAArgOverflowSize);
6350     if (MS.TrackOrigins) {
6351       SrcPtr = IRB.CreateConstGEP1_32(IRB.getInt8Ty(), VAArgTLSOriginCopy,
6352                                       SystemZOverflowOffset);
6353       IRB.CreateMemCpy(OverflowArgAreaOriginPtr, Alignment, SrcPtr, Alignment,
6354                        VAArgOverflowSize);
6355     }
6356   }
6357 
6358   void finalizeInstrumentation() override {
6359     assert(!VAArgOverflowSize && !VAArgTLSCopy &&
6360            "finalizeInstrumentation called twice");
6361     if (!VAStartInstrumentationList.empty()) {
6362       // If there is a va_start in this function, make a backup copy of
6363       // va_arg_tls somewhere in the function entry block.
6364       IRBuilder<> IRB(MSV.FnPrologueEnd);
6365       VAArgOverflowSize =
6366           IRB.CreateLoad(IRB.getInt64Ty(), MS.VAArgOverflowSizeTLS);
6367       Value *CopySize =
6368           IRB.CreateAdd(ConstantInt::get(MS.IntptrTy, SystemZOverflowOffset),
6369                         VAArgOverflowSize);
6370       VAArgTLSCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
6371       VAArgTLSCopy->setAlignment(kShadowTLSAlignment);
6372       IRB.CreateMemSet(VAArgTLSCopy, Constant::getNullValue(IRB.getInt8Ty()),
6373                        CopySize, kShadowTLSAlignment, false);
6374 
6375       Value *SrcSize = IRB.CreateBinaryIntrinsic(
6376           Intrinsic::umin, CopySize,
6377           ConstantInt::get(MS.IntptrTy, kParamTLSSize));
6378       IRB.CreateMemCpy(VAArgTLSCopy, kShadowTLSAlignment, MS.VAArgTLS,
6379                        kShadowTLSAlignment, SrcSize);
6380       if (MS.TrackOrigins) {
6381         VAArgTLSOriginCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
6382         VAArgTLSOriginCopy->setAlignment(kShadowTLSAlignment);
6383         IRB.CreateMemCpy(VAArgTLSOriginCopy, kShadowTLSAlignment,
6384                          MS.VAArgOriginTLS, kShadowTLSAlignment, SrcSize);
6385       }
6386     }
6387 
6388     // Instrument va_start.
6389     // Copy va_list shadow from the backup copy of the TLS contents.
6390     for (CallInst *OrigInst : VAStartInstrumentationList) {
6391       NextNodeIRBuilder IRB(OrigInst);
6392       Value *VAListTag = OrigInst->getArgOperand(0);
6393       copyRegSaveArea(IRB, VAListTag);
6394       copyOverflowArea(IRB, VAListTag);
6395     }
6396   }
6397 };
6398 
6399 /// i386-specific implementation of VarArgHelper.
6400 struct VarArgI386Helper : public VarArgHelperBase {
6401   AllocaInst *VAArgTLSCopy = nullptr;
6402   Value *VAArgSize = nullptr;
6403 
6404   VarArgI386Helper(Function &F, MemorySanitizer &MS,
6405                    MemorySanitizerVisitor &MSV)
6406       : VarArgHelperBase(F, MS, MSV, /*VAListTagSize=*/4) {}
6407 
6408   void visitCallBase(CallBase &CB, IRBuilder<> &IRB) override {
6409     const DataLayout &DL = F.getDataLayout();
6410     unsigned IntptrSize = DL.getTypeStoreSize(MS.IntptrTy);
6411     unsigned VAArgOffset = 0;
6412     for (const auto &[ArgNo, A] : llvm::enumerate(CB.args())) {
6413       bool IsFixed = ArgNo < CB.getFunctionType()->getNumParams();
6414       bool IsByVal = CB.paramHasAttr(ArgNo, Attribute::ByVal);
6415       if (IsByVal) {
6416         assert(A->getType()->isPointerTy());
6417         Type *RealTy = CB.getParamByValType(ArgNo);
6418         uint64_t ArgSize = DL.getTypeAllocSize(RealTy);
6419         Align ArgAlign = CB.getParamAlign(ArgNo).value_or(Align(IntptrSize));
6420         if (ArgAlign < IntptrSize)
6421           ArgAlign = Align(IntptrSize);
6422         VAArgOffset = alignTo(VAArgOffset, ArgAlign);
6423         if (!IsFixed) {
6424           Value *Base = getShadowPtrForVAArgument(IRB, VAArgOffset, ArgSize);
6425           if (Base) {
6426             Value *AShadowPtr, *AOriginPtr;
6427             std::tie(AShadowPtr, AOriginPtr) =
6428                 MSV.getShadowOriginPtr(A, IRB, IRB.getInt8Ty(),
6429                                        kShadowTLSAlignment, /*isStore*/ false);
6430 
6431             IRB.CreateMemCpy(Base, kShadowTLSAlignment, AShadowPtr,
6432                              kShadowTLSAlignment, ArgSize);
6433           }
6434           VAArgOffset += alignTo(ArgSize, Align(IntptrSize));
6435         }
6436       } else {
6437         Value *Base;
6438         uint64_t ArgSize = DL.getTypeAllocSize(A->getType());
6439         Align ArgAlign = Align(IntptrSize);
6440         VAArgOffset = alignTo(VAArgOffset, ArgAlign);
6441         if (DL.isBigEndian()) {
6442           // Adjusting the shadow for argument with size < IntptrSize to match
6443           // the placement of bits in big endian system
6444           if (ArgSize < IntptrSize)
6445             VAArgOffset += (IntptrSize - ArgSize);
6446         }
6447         if (!IsFixed) {
6448           Base = getShadowPtrForVAArgument(IRB, VAArgOffset, ArgSize);
6449           if (Base)
6450             IRB.CreateAlignedStore(MSV.getShadow(A), Base, kShadowTLSAlignment);
6451           VAArgOffset += ArgSize;
6452           VAArgOffset = alignTo(VAArgOffset, Align(IntptrSize));
6453         }
6454       }
6455     }
6456 
6457     Constant *TotalVAArgSize = ConstantInt::get(MS.IntptrTy, VAArgOffset);
6458     // Here using VAArgOverflowSizeTLS as VAArgSizeTLS to avoid creation of
6459     // a new class member i.e. it is the total size of all VarArgs.
6460     IRB.CreateStore(TotalVAArgSize, MS.VAArgOverflowSizeTLS);
6461   }
6462 
6463   void finalizeInstrumentation() override {
6464     assert(!VAArgSize && !VAArgTLSCopy &&
6465            "finalizeInstrumentation called twice");
6466     IRBuilder<> IRB(MSV.FnPrologueEnd);
6467     VAArgSize = IRB.CreateLoad(IRB.getInt64Ty(), MS.VAArgOverflowSizeTLS);
6468     Value *CopySize = VAArgSize;
6469 
6470     if (!VAStartInstrumentationList.empty()) {
6471       // If there is a va_start in this function, make a backup copy of
6472       // va_arg_tls somewhere in the function entry block.
6473       VAArgTLSCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
6474       VAArgTLSCopy->setAlignment(kShadowTLSAlignment);
6475       IRB.CreateMemSet(VAArgTLSCopy, Constant::getNullValue(IRB.getInt8Ty()),
6476                        CopySize, kShadowTLSAlignment, false);
6477 
6478       Value *SrcSize = IRB.CreateBinaryIntrinsic(
6479           Intrinsic::umin, CopySize,
6480           ConstantInt::get(IRB.getInt64Ty(), kParamTLSSize));
6481       IRB.CreateMemCpy(VAArgTLSCopy, kShadowTLSAlignment, MS.VAArgTLS,
6482                        kShadowTLSAlignment, SrcSize);
6483     }
6484 
6485     // Instrument va_start.
6486     // Copy va_list shadow from the backup copy of the TLS contents.
6487     for (CallInst *OrigInst : VAStartInstrumentationList) {
6488       NextNodeIRBuilder IRB(OrigInst);
6489       Value *VAListTag = OrigInst->getArgOperand(0);
6490       Type *RegSaveAreaPtrTy = PointerType::getUnqual(*MS.C);
6491       Value *RegSaveAreaPtrPtr =
6492           IRB.CreateIntToPtr(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
6493                              PointerType::get(*MS.C, 0));
6494       Value *RegSaveAreaPtr =
6495           IRB.CreateLoad(RegSaveAreaPtrTy, RegSaveAreaPtrPtr);
6496       Value *RegSaveAreaShadowPtr, *RegSaveAreaOriginPtr;
6497       const DataLayout &DL = F.getDataLayout();
6498       unsigned IntptrSize = DL.getTypeStoreSize(MS.IntptrTy);
6499       const Align Alignment = Align(IntptrSize);
6500       std::tie(RegSaveAreaShadowPtr, RegSaveAreaOriginPtr) =
6501           MSV.getShadowOriginPtr(RegSaveAreaPtr, IRB, IRB.getInt8Ty(),
6502                                  Alignment, /*isStore*/ true);
6503       IRB.CreateMemCpy(RegSaveAreaShadowPtr, Alignment, VAArgTLSCopy, Alignment,
6504                        CopySize);
6505     }
6506   }
6507 };
6508 
6509 /// Implementation of VarArgHelper that is used for ARM32, MIPS, RISCV,
6510 /// LoongArch64.
6511 struct VarArgGenericHelper : public VarArgHelperBase {
6512   AllocaInst *VAArgTLSCopy = nullptr;
6513   Value *VAArgSize = nullptr;
6514 
6515   VarArgGenericHelper(Function &F, MemorySanitizer &MS,
6516                       MemorySanitizerVisitor &MSV, const unsigned VAListTagSize)
6517       : VarArgHelperBase(F, MS, MSV, VAListTagSize) {}
6518 
6519   void visitCallBase(CallBase &CB, IRBuilder<> &IRB) override {
6520     unsigned VAArgOffset = 0;
6521     const DataLayout &DL = F.getDataLayout();
6522     unsigned IntptrSize = DL.getTypeStoreSize(MS.IntptrTy);
6523     for (const auto &[ArgNo, A] : llvm::enumerate(CB.args())) {
6524       bool IsFixed = ArgNo < CB.getFunctionType()->getNumParams();
6525       if (IsFixed)
6526         continue;
6527       uint64_t ArgSize = DL.getTypeAllocSize(A->getType());
6528       if (DL.isBigEndian()) {
6529         // Adjusting the shadow for argument with size < IntptrSize to match the
6530         // placement of bits in big endian system
6531         if (ArgSize < IntptrSize)
6532           VAArgOffset += (IntptrSize - ArgSize);
6533       }
6534       Value *Base = getShadowPtrForVAArgument(IRB, VAArgOffset, ArgSize);
6535       VAArgOffset += ArgSize;
6536       VAArgOffset = alignTo(VAArgOffset, IntptrSize);
6537       if (!Base)
6538         continue;
6539       IRB.CreateAlignedStore(MSV.getShadow(A), Base, kShadowTLSAlignment);
6540     }
6541 
6542     Constant *TotalVAArgSize = ConstantInt::get(MS.IntptrTy, VAArgOffset);
6543     // Here using VAArgOverflowSizeTLS as VAArgSizeTLS to avoid creation of
6544     // a new class member i.e. it is the total size of all VarArgs.
6545     IRB.CreateStore(TotalVAArgSize, MS.VAArgOverflowSizeTLS);
6546   }
6547 
6548   void finalizeInstrumentation() override {
6549     assert(!VAArgSize && !VAArgTLSCopy &&
6550            "finalizeInstrumentation called twice");
6551     IRBuilder<> IRB(MSV.FnPrologueEnd);
6552     VAArgSize = IRB.CreateLoad(IRB.getInt64Ty(), MS.VAArgOverflowSizeTLS);
6553     Value *CopySize = VAArgSize;
6554 
6555     if (!VAStartInstrumentationList.empty()) {
6556       // If there is a va_start in this function, make a backup copy of
6557       // va_arg_tls somewhere in the function entry block.
6558       VAArgTLSCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
6559       VAArgTLSCopy->setAlignment(kShadowTLSAlignment);
6560       IRB.CreateMemSet(VAArgTLSCopy, Constant::getNullValue(IRB.getInt8Ty()),
6561                        CopySize, kShadowTLSAlignment, false);
6562 
6563       Value *SrcSize = IRB.CreateBinaryIntrinsic(
6564           Intrinsic::umin, CopySize,
6565           ConstantInt::get(IRB.getInt64Ty(), kParamTLSSize));
6566       IRB.CreateMemCpy(VAArgTLSCopy, kShadowTLSAlignment, MS.VAArgTLS,
6567                        kShadowTLSAlignment, SrcSize);
6568     }
6569 
6570     // Instrument va_start.
6571     // Copy va_list shadow from the backup copy of the TLS contents.
6572     for (CallInst *OrigInst : VAStartInstrumentationList) {
6573       NextNodeIRBuilder IRB(OrigInst);
6574       Value *VAListTag = OrigInst->getArgOperand(0);
6575       Type *RegSaveAreaPtrTy = PointerType::getUnqual(*MS.C);
6576       Value *RegSaveAreaPtrPtr =
6577           IRB.CreateIntToPtr(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
6578                              PointerType::get(*MS.C, 0));
6579       Value *RegSaveAreaPtr =
6580           IRB.CreateLoad(RegSaveAreaPtrTy, RegSaveAreaPtrPtr);
6581       Value *RegSaveAreaShadowPtr, *RegSaveAreaOriginPtr;
6582       const DataLayout &DL = F.getDataLayout();
6583       unsigned IntptrSize = DL.getTypeStoreSize(MS.IntptrTy);
6584       const Align Alignment = Align(IntptrSize);
6585       std::tie(RegSaveAreaShadowPtr, RegSaveAreaOriginPtr) =
6586           MSV.getShadowOriginPtr(RegSaveAreaPtr, IRB, IRB.getInt8Ty(),
6587                                  Alignment, /*isStore*/ true);
6588       IRB.CreateMemCpy(RegSaveAreaShadowPtr, Alignment, VAArgTLSCopy, Alignment,
6589                        CopySize);
6590     }
6591   }
6592 };
6593 
6594 // ARM32, Loongarch64, MIPS and RISCV share the same calling conventions
6595 // regarding VAArgs.
6596 using VarArgARM32Helper = VarArgGenericHelper;
6597 using VarArgRISCVHelper = VarArgGenericHelper;
6598 using VarArgMIPSHelper = VarArgGenericHelper;
6599 using VarArgLoongArch64Helper = VarArgGenericHelper;
6600 
6601 /// A no-op implementation of VarArgHelper.
6602 struct VarArgNoOpHelper : public VarArgHelper {
6603   VarArgNoOpHelper(Function &F, MemorySanitizer &MS,
6604                    MemorySanitizerVisitor &MSV) {}
6605 
6606   void visitCallBase(CallBase &CB, IRBuilder<> &IRB) override {}
6607 
6608   void visitVAStartInst(VAStartInst &I) override {}
6609 
6610   void visitVACopyInst(VACopyInst &I) override {}
6611 
6612   void finalizeInstrumentation() override {}
6613 };
6614 
6615 } // end anonymous namespace
6616 
6617 static VarArgHelper *CreateVarArgHelper(Function &Func, MemorySanitizer &Msan,
6618                                         MemorySanitizerVisitor &Visitor) {
6619   // VarArg handling is only implemented on AMD64. False positives are possible
6620   // on other platforms.
6621   Triple TargetTriple(Func.getParent()->getTargetTriple());
6622 
6623   if (TargetTriple.getArch() == Triple::x86)
6624     return new VarArgI386Helper(Func, Msan, Visitor);
6625 
6626   if (TargetTriple.getArch() == Triple::x86_64)
6627     return new VarArgAMD64Helper(Func, Msan, Visitor);
6628 
6629   if (TargetTriple.isARM())
6630     return new VarArgARM32Helper(Func, Msan, Visitor, /*VAListTagSize=*/4);
6631 
6632   if (TargetTriple.isAArch64())
6633     return new VarArgAArch64Helper(Func, Msan, Visitor);
6634 
6635   if (TargetTriple.isSystemZ())
6636     return new VarArgSystemZHelper(Func, Msan, Visitor);
6637 
6638   // On PowerPC32 VAListTag is a struct
6639   // {char, char, i16 padding, char *, char *}
6640   if (TargetTriple.isPPC32())
6641     return new VarArgPowerPCHelper(Func, Msan, Visitor, /*VAListTagSize=*/12);
6642 
6643   if (TargetTriple.isPPC64())
6644     return new VarArgPowerPCHelper(Func, Msan, Visitor, /*VAListTagSize=*/8);
6645 
6646   if (TargetTriple.isRISCV32())
6647     return new VarArgRISCVHelper(Func, Msan, Visitor, /*VAListTagSize=*/4);
6648 
6649   if (TargetTriple.isRISCV64())
6650     return new VarArgRISCVHelper(Func, Msan, Visitor, /*VAListTagSize=*/8);
6651 
6652   if (TargetTriple.isMIPS32())
6653     return new VarArgMIPSHelper(Func, Msan, Visitor, /*VAListTagSize=*/4);
6654 
6655   if (TargetTriple.isMIPS64())
6656     return new VarArgMIPSHelper(Func, Msan, Visitor, /*VAListTagSize=*/8);
6657 
6658   if (TargetTriple.isLoongArch64())
6659     return new VarArgLoongArch64Helper(Func, Msan, Visitor,
6660                                        /*VAListTagSize=*/8);
6661 
6662   return new VarArgNoOpHelper(Func, Msan, Visitor);
6663 }
6664 
6665 bool MemorySanitizer::sanitizeFunction(Function &F, TargetLibraryInfo &TLI) {
6666   if (!CompileKernel && F.getName() == kMsanModuleCtorName)
6667     return false;
6668 
6669   if (F.hasFnAttribute(Attribute::DisableSanitizerInstrumentation))
6670     return false;
6671 
6672   MemorySanitizerVisitor Visitor(F, *this, TLI);
6673 
6674   // Clear out memory attributes.
6675   AttributeMask B;
6676   B.addAttribute(Attribute::Memory).addAttribute(Attribute::Speculatable);
6677   F.removeFnAttrs(B);
6678 
6679   return Visitor.runOnFunction();
6680 }
6681